In [29]:
import pandas as pd
import numpy as np
import spacy 
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_recall_fscore_support, matthews_corrcoef
from transformers import BertForSequenceClassification, AutoTokenizer
from transformers import AdamW, get_linear_schedule_with_warmup
from tqdm import tqdm, trange
import random
import os
import torch.nn.functional as F
import torch
from torch.nn import CrossEntropyLoss
from torch.utils.data import DataLoader
import transformers
from tqdm import tqdm, trange
from utils import normalizeTweet, split_into_sentences, bio_tagging, create_training_data



#data = pd.read_excel("/home/adrian/workspace/causality/Causal-associations-diabetes-twitter/data/Causality + hypoglycemia.xlsx", sheet_name=">5000_samples_")
data = pd.read_excel("Causality + hypoglycemia.xlsx", sheet_name=">5000_samples_")
print("Total count:", data.shape[0])
data = data[data["Causal association"].notnull()]
data = data[["full_text", "Intent", "Cause", "Effect", "Causal association"]]
print("Labeled count:", data.shape[0])

data.head()

  data = pd.read_excel("Causality + hypoglycemia.xlsx", sheet_name=">5000_samples_")


Total count: 5434
Labeled count: 5000


Unnamed: 0,full_text,Intent,Cause,Effect,Causal association
0,"tonight , I learned my older girl will back he...",,,,0.0
1,USER USER I knew diabetes and fibromyalgia wer...,joke,,,0.0
2,⬇ ️ ⬇ ️ ⬇ ️ THIS ⬇ ️ ⬇ ️ ⬇ ️ My wife has type ...,mS,,,0.0
3,USER Cheers ! Have one for this diabetic too !,mS,,,0.0
4,USER Additionally the medicines are being char...,,medicines are being charged at MRP,costing much higher,1.0


## Add BIO tags

In [30]:
data["tokenized"] = data["full_text"].map(lambda tweet: normalizeTweet(tweet).split(" "))
data["bio_tags"] = data.apply(lambda row: bio_tagging(row["full_text"],row["Cause"], row["Effect"]), axis=1)
data.head(n=20)

Unnamed: 0,full_text,Intent,Cause,Effect,Causal association,tokenized,bio_tags
0,"tonight , I learned my older girl will back he...",,,,0.0,"[tonight, ,, I, learned, my, older, girl, will...","[O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, ..."
1,USER USER I knew diabetes and fibromyalgia wer...,joke,,,0.0,"[USER, USER, I, knew, diabetes, and, fibromyal...","[O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, ..."
2,⬇ ️ ⬇ ️ ⬇ ️ THIS ⬇ ️ ⬇ ️ ⬇ ️ My wife has type ...,mS,,,0.0,"[:down_arrow:, :down_arrow:, :down_arrow:, THI...","[O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, ..."
3,USER Cheers ! Have one for this diabetic too !,mS,,,0.0,"[USER, Cheers, !, Have, one, for, this, diabet...","[O, O, O, O, O, O, O, O, O, O]"
4,USER Additionally the medicines are being char...,,medicines are being charged at MRP,costing much higher,1.0,"[USER, Additionally, the, medicines, are, bein...","[O, O, O, B-C, I-C, I-C, I-C, I-C, I-C, O, O, ..."
5,USER USER We have those days Esp . if it inter...,msS,diabetic,hate,1.0,"[USER, USER, We, have, those, days, Esp, ., if...","[O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, ..."
6,Why all of a sudden are people hungry and vuln...,q,,,0.0,"[Why, all, of, a, sudden, are, people, hungry,...","[O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, ..."
7,"i got lime for my glucose test , wasn't that b...",,glucose test,nauseous,1.0,"[i, got, lime, for, my, glucose, test, ,, was,...","[O, O, O, O, O, B-C, I-C, O, O, O, O, O, O, O,..."
8,This stickur of Unkel Funny iz ware i am shave...,,,,0.0,"[This, stickur, of, Unkel, Funny, iz, ware, i,...","[O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, ..."
9,For the second time in my life I gave myself i...,mS,,,0.0,"[For, the, second, time, in, my, life, I, gave...","[O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, ..."


## Split all tweets into sentences => new dataframe with more rows

In [31]:
def get_start_end_index_of_sentence_in_tweet(tweet, sentence):
    """ 
    The sentence tokens are included in the tweet tokens.
    Return the start end end indices of the sentence tokens in the tweet tokens

    """

    sentence_start_word = sentence[0]
    start_indices = [i for i, x in enumerate(tweet) if x == sentence_start_word] # find all indices of the start word of the sentence 
    try:
        for start_index in start_indices:
            isTrueStartIndex = all([tweet[start_index+i] == sentence[i] for i in range(len(sentence))])
            #print("start_index:", start_index, "isTrueStartIndex:", isTrueStartIndex)
            if isTrueStartIndex:
                return start_index, start_index + len(sentence) 
    except:
        print("ERROR: StartIndex should have been found for sentence:")
        print("tweet:")
        print(tweet)
        print("sentence:")
        print(sentence)
    return -1, -2 # should not be returned


def split_tweets_to_sentences(data):
    """ 
        Splits tweets into sentences and associates the appropriate intent, causes, effects and causal association
        to each sentence.
        
        Parameters:
        - min_words_in_sentences: Minimal number of words in a sentence such that the sentence is kept. 
                                  Assumption: A sentence with too few words does not have enough information
                              
                              
                              
        Ex.:
        full_text                              | Intent | Cause | Effect | Causal association | ...
        --------------------------------------------------------------------------------------------
        what? type 1 causes insulin dependence | q;msS  | type 1|insulin dependence | 1       | ...  
        
        New dataframe returned: 
        full_text                              | Intent | Cause | Effect | Causal association | ...
        --------------------------------------------------------------------------------------------
        what?                                  |   q    |       |        |       0            | ...
        type 1 causes insulin dependence       |        | type 1| insulin dependence | 1       | ...  
    """

    newDF = pd.DataFrame(columns=["sentence", "Intent", "Cause", "Effect", "Causal association", "tokenized", "bio_tags"])
    
    for i,row in data.iterrows():
        causes = row["Cause"]
        effects = row["Effect"]
        sentences = split_into_sentences(normalizeTweet(row["full_text"]))
        
        # single sentence in tweet
        if len(sentences) == 1:
            singleSentenceIntent = ""
            if isinstance(row["Intent"], str):
                if len(row["Intent"].split(";")) > 1:
                    singleSentenceIntent = row["Intent"].strip().replace(";msS", "").replace("msS;", "").replace(";mS", "").replace("mS;", "")
                else:
                    if row["Intent"] == "mS" or row["Intent"] == "msS":
                        singleSentenceIntent = ""
                    else:
                        singleSentenceIntent = row["Intent"].strip()
                    
            newDF=newDF.append(pd.Series({"sentence": sentences[0] # only one sentence
                         , "Intent": singleSentenceIntent
                         , "Cause" : row["Cause"]
                         , "Effect": row["Effect"]
                         , "Causal association" : row["Causal association"]
                         , "tokenized": row["tokenized"]
                         , "bio_tags": row["bio_tags"]}), ignore_index=True)
        
        # tweet has several sentences
        else: 
            intents = str(row["Intent"]).strip().split(";")
            for sentence in sentences:
                sent_tokenized = sentence.split(" ")
                causeInSentence = np.nan if not isinstance(causes, str) or not any([cause in sentence for cause in causes.split(";")]) else ";".join([cause for cause in causes.split(";") if cause in sentence])
                effectInSentence = np.nan if not isinstance(effects, str) or not any([effect in sentence for effect in effects.split(";")]) else ";".join([effect for effect in effects.split(";") if effect in sentence])
                causalAssociationInSentence = 1 if isinstance(causeInSentence, str) and isinstance(effectInSentence, str) else 0
                startIndex, endIndex = get_start_end_index_of_sentence_in_tweet(row["tokenized"], sent_tokenized)
                sentence_tokenized = row["tokenized"][startIndex:endIndex]
                sentence_bio_tags = row["bio_tags"][startIndex:endIndex]
                
                if "q" in intents and sentence[-1] == "?": # if current sentence is question
                    newDF=newDF.append(pd.Series({"sentence": sentence, "Intent": "q", "Cause" : causeInSentence
                                                , "Effect": effectInSentence, "Causal association" : causalAssociationInSentence
                                                , "tokenized": sentence_tokenized, "bio_tags": sentence_bio_tags}), ignore_index=True)                    
                elif "joke" in intents: # all sentences with "joke" in tweet keep the intent "joke"
                    newDF=newDF.append(pd.Series({"sentence": sentence, "Intent": "joke", "Cause" : causeInSentence
                                                , "Effect": effectInSentence, "Causal association" : causalAssociationInSentence
                                                , "tokenized": sentence_tokenized, "bio_tags": sentence_bio_tags}), ignore_index=True)   
                elif "neg" in intents: # all sentences with "neg" in tweet keep intent "neg"
                    newDF=newDF.append(pd.Series({"sentence": sentence, "Intent": "neg", "Cause" : causeInSentence
                                                , "Effect": effectInSentence, "Causal association" : causalAssociationInSentence
                                                , "tokenized": sentence_tokenized, "bio_tags": sentence_bio_tags}), ignore_index=True)               
                elif isinstance(causeInSentence, str) and isinstance(effectInSentence, str): # cause effect sentence
                    causalIntent = ""
                    if len(causeInSentence.split(";")) > 1:
                        causalIntent = "mC"
                        if len(effectInSentence.split(";")) > 1:
                            causalIntent = "mC;mE"
                    elif len(effectInSentence.split(";")) > 1:
                        causalIntent = "mE"
                    newDF=newDF.append(pd.Series({"sentence": sentence, "Intent": causalIntent, "Cause" : causeInSentence
                                                , "Effect": effectInSentence, "Causal association" : causalAssociationInSentence
                                                , "tokenized": sentence_tokenized, "bio_tags": sentence_bio_tags}), ignore_index=True)                                  
                else:
                    nonCausalIntent = ""
                    if isinstance(causeInSentence, str): # only cause is given
                        if len(causeInSentence.split(";")) > 1:
                            nonCausalIntent = "mC"
                    elif isinstance(effectInSentence, str): # only effect is given
                        if len(effectInSentence.split(";")) > 1:
                            nonCausalIntent = "mE"
                    newDF=newDF.append(pd.Series({"sentence": sentence, "Intent": nonCausalIntent, "Cause" : causeInSentence
                                                , "Effect": effectInSentence, "Causal association" : causalAssociationInSentence
                                                , "tokenized": sentence_tokenized, "bio_tags": sentence_bio_tags}), ignore_index=True)

    return newDF
       
# sample: has one example for each possible "Intent" value
"""
allIntents = data["Intent"].value_counts().keys().tolist()
sample = data[data["Intent"] == "mS"][0:1]
for intent in allIntents:
    sample = sample.append(data[data["Intent"] == intent][1:2])
print(sample.shape)

i = 1
test = sample#[i:i+1]
dataSentences = split_tweets_to_sentences(test)
dataSentences.head(30)
#test.head()
"""

print("N tweets:", data.shape[0])
dataSentences = split_tweets_to_sentences(data)
print("N sentences:", dataSentences.shape[0])
dataSentences.head()

N tweets: 5000
N sentences: 11784


Unnamed: 0,sentence,Intent,Cause,Effect,Causal association,tokenized,bio_tags
0,"tonight , I learned my older girl will back he...",,,,0,"[tonight, ,, I, learned, my, older, girl, will...","[O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, O]"
1,Fiercely .,,,,0,"[Fiercely, .]","[O, O]"
2,#impressive #bigsister #type1 #type1times2,,,,0,"[#impressive, #bigsister, #type1, #type1times2]","[O, O, O, O]"
3,USER USER I knew diabetes and fibromyalgia wer...,joke,,,0,"[USER, USER, I, knew, diabetes, and, fibromyal...","[O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, O]"
4,:face_with_rolling_eyes:,joke,,,0,[:face_with_rolling_eyes:],[O]


### Filter out negation, jokes, questions and sentences with a minimal token length of 3

In [32]:
print("N sentences before filtering: ", dataSentences.shape[0])
dataSentFiltered = dataSentences[~dataSentences["Intent"].str.contains("neg|joke|q")] # remove sentences with joke, q, neg
dataSentFiltered = dataSentFiltered[dataSentFiltered["tokenized"].map(len) >= 3] # only keep sentences with at least 3 words
print("N sentences after filtering: ", dataSentFiltered.shape[0])
dataSentFiltered.head()


N sentences before filtering:  11784
N sentences after filtering:  8835


Unnamed: 0,sentence,Intent,Cause,Effect,Causal association,tokenized,bio_tags
0,"tonight , I learned my older girl will back he...",,,,0,"[tonight, ,, I, learned, my, older, girl, will...","[O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, O]"
2,#impressive #bigsister #type1 #type1times2,,,,0,"[#impressive, #bigsister, #type1, #type1times2]","[O, O, O, O]"
5,:down_arrow: :down_arrow: :down_arrow: THIS :d...,,,,0,"[:down_arrow:, :down_arrow:, :down_arrow:, THI...","[O, O, O, O, O, O, O, O, O, O, O, O, O, O]"
6,I 'm a trans woman .,,,,0,"[I, 'm, a, trans, woman, .]","[O, O, O, O, O, O]"
7,"Both of us could use a world where "" brave and...",,,,0,"[Both, of, us, could, use, a, world, where, "",...","[O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, ..."


In [33]:
dataSentFiltered["Intent"].value_counts()

         8705
mE         72
mC         47
mC;mE      10
mE;mC       1
Name: Intent, dtype: int64

### Only work on cause-effect tweets

In [34]:
dataSentFiltered["Causal association"].value_counts()

0.0    7799
1.0    1036
Name: Causal association, dtype: int64

#### only take sentences with cause or  effect or both : so we 
#### only take sentence with cause and effect or either cause and effect

In [35]:


## We are doing the same in the code below - jsut less code. 
# trainingData = dataSentFiltered[(dataSentFiltered["Cause"].notnull() & dataSentFiltered["Effect"].notnull())
#               | (dataSentFiltered["Cause"].notnull() & dataSentFiltered["Effect"].isnull())
#               | (dataSentFiltered["Cause"].isnull() & dataSentFiltered["Effect"].notnull())
#             ]
# trainingData.shape

In [36]:
# only take sentences with cause and effect
#trainingData = dataSentFiltered[dataSentFiltered["Causal association"] == 1]

# only take sentence with cause and effect or either cause and effect
trainingData = dataSentFiltered[(dataSentFiltered["Cause"].notnull()) | (dataSentFiltered["Effect"].notnull())]

trainingData.shape

(1420, 7)

### Create training, validation, test sets

In [37]:
trainingDataSample = trainingData#.sample(n=200)   # VIVEK: DELETE TAKING SAMPLE. THIS WAS ONLY FOR TESTING
train = trainingDataSample.sample(frac=0.8, random_state=0)
test = trainingDataSample.drop(train.index)
validate = train.sample(frac=0.2, random_state=0)
train = train.drop(validate.index)
print("Train:", train.shape)
print("Validate:", validate.shape)
print("Test:", test.shape)

Train: (909, 7)
Validate: (227, 7)
Test: (284, 7)


In [38]:
# Transform labels + encodings into Pytorch DataSet object (including __len__, __getitem__)
class TweetDataSet(torch.utils.data.Dataset):
    def __init__(self, text, labels, bio_tags, tokenizer):
        self.text = text
        self.labels = labels
        self.tokenizer = tokenizer
        self.bio_tags = bio_tags
        self.tag2id = {label: idx for idx, label in enumerate(["O", "B-C", "I-C", "B-E", "I-E"])}
        self.tag2id[-100] = -100
        self.id2tag = {id:tag for tag,id in self.tag2id.items()}

    def __getitem__(self, idx):
        inputs = self.tokenizer(self.text, padding=True, truncation=True, return_token_type_ids=True)
        ids = inputs["input_ids"]
        mask = inputs["attention_mask"]
        token_type_ids = inputs["token_type_ids"]
        bio_tags_extended = self.extend_tags(self.text[idx], self.bio_tags[idx], ids[idx])
        assert(len(ids[idx]) == len(bio_tags_extended), "token ids and BIO tags lengths do not match!")
        return {
                "input_ids" : torch.tensor(ids[idx], dtype=torch.long)
              , "attention_mask" : torch.tensor(mask[idx], dtype=torch.long)
              , "token_type_ids" : torch.tensor(token_type_ids[idx], dtype=torch.long)
              , "labels" : torch.tensor(self.labels[idx], dtype=torch.long)
              , "bio_tags" : torch.tensor(list(map(lambda bioTags: self.tag2id[bioTags], bio_tags_extended))
, dtype=torch.long)
        }

    def __len__(self):
        return len(self.labels)

    
    def extend_tags(self, tokens_old, tags_old, ids_tokenized_padded):
        """ 
            Each token has a BIO tag label. 
            However BERT's tokenization splits tokens into subwords. How to label those subwords?
            
            Option 1:
            ---------
            
            add the same label to each subword than the first subword. Only replace "B" by "I"
            Ex. 
            #lowbloodsugar => '#low@@', 'blood@@', 'sugar@@'
               "B-C"       =>   "B-C" ,   "I-C"  ,   "I-C"
            
            Option 2 (implemented):      
            ---------
            
            From : https://huggingface.co/transformers/custom_datasets.html#token-classification-with-w-nut-emerging-entities
            A common obstacle with using pre-trained models for token-level classification: many of the tokens in
            the W-NUT corpus are not in DistilBert’s vocabulary. Bert and many models like it use a method called 
            WordPiece Tokenization, meaning that single words are split into multiple tokens such that each token
            is likely to be in the vocabulary. For example, DistilBert’s tokenizer would split the Twitter 
            handle @huggingface into the tokens ['@', 'hugging', '##face']. This is a problem for us because we 
            have exactly one tag per token. If the tokenizer splits a token into multiple sub-tokens, then we will
            end up with a mismatch between our tokens and our labels.

            One way to handle this is to only train on the tag labels for the first subtoken of a split token. 
            We can do this in 🤗 Transformers by setting the labels we wish to ignore to -100. 
            In the example above, if the label for @HuggingFace is 3 (indexing B-corporation), we would set 
            the labels of ['@', 'hugging', '##face'] to [3, -100, -100].
        """
        tags = [-100] # add for start token <CLS>
        for token_old, tag in zip(tokens_old.split(" "), tags_old):
#            print(F"\ntoken_old: {token_old};    tag: {tag}")
            for i, sub_token in enumerate(self.tokenizer.tokenize(token_old)):
                if (i == 0):
                    tags.append(tag)
                else: 
                    tags.append(-100)
           
        tags.append(-100) # 0 for end of sentence token
    
        # append -100 for all padded elements
        padded_elements = ids_tokenized_padded.count(1) # id 1 is <PAD> ; Alternative: where attention_mask == 0 add -100
        tags.extend([-100]*padded_elements)
        
        return tags
        
        
    
tokenizer = AutoTokenizer.from_pretrained("vinai/bertweet-base")

train_dataset = TweetDataSet(train["sentence"].values.tolist()
                           , train["Causal association"].values.tolist()
                           , train["bio_tags"].values.tolist()
                           , tokenizer)
val_dataset = TweetDataSet(validate["sentence"].values.tolist()
                           , validate["Causal association"].values.tolist()
                           , validate["bio_tags"].values.tolist()
                           , tokenizer)
test_dataset = TweetDataSet(test["sentence"].values.tolist()
                           , test["Causal association"].values.tolist()
                           , test["bio_tags"].values.tolist()
                           , tokenizer)
print(len(train_dataset))
print(len(val_dataset))
print(len(test_dataset))

# put data to batches
train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True)
validation_loader = DataLoader(val_dataset, batch_size=8, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=8, shuffle=True)


  assert(len(ids[idx]) == len(bio_tags_extended), "token ids and BIO tags lengths do not match!")
Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


909
227
284


In [39]:
from transformers import DistilBertForSequenceClassification, Trainer, TrainingArguments
from transformers import AutoModelForSequenceClassification
from sklearn.metrics import accuracy_score, precision_recall_fscore_support

def compute_metrics(pred, labels):
    """
        Dataset is unbalanced -> measure weighted metrics
        Calculate metrics for each label, and find their average wieghted by support (Number of true instances for each label)
        This alters 'macro' to account for label imbalance;
        it can result in an F-Score taht is not between precision and recall
    """
    precision, recall, f1, _ = precision_recall_fscore_support(labels, pred, average='macro') # TODO: check weightin
    acc = accuracy_score(labels, pred)
    return {
        'accuracy': acc,
        'f1': f1,
        'precision': precision,
        'recall': recall
    }



class CausalNER(torch.nn.Module):
    """ Model Bert"""
    def __init__(self):
        super(CausalNER, self).__init__()
        self.num_labels = 5 # B-C, I-C, B-E, I-E, O
        self.bert = transformers.BertModel.from_pretrained("vinai/bertweet-base")
        self.dropout = torch.nn.Dropout(0.3)
        self.linear1 = torch.nn.Linear(768, 256)
        self.linear2 = torch.nn.Linear(256, self.num_labels)
        self.softmax = torch.nn.Softmax(-1)
        
    def forward(self, input_ids, attention_mask, token_type_ids):
#        _, output_1 = self.bert(input_ids, attention_mask = attention_mask, token_type_ids=token_type_ids, return_dict=False) # if output 1 is our cls token
        output_seq, _ = self.bert(input_ids, attention_mask = attention_mask, token_type_ids=token_type_ids, return_dict=False) # if output 1 is our cls token
        output_2 = self.dropout(output_seq)
        output_3 = self.linear1(output_2)
        output_4 = self.dropout(output_3)
        output_5 = self.linear2(output_4)
        return output_5


### Model parameters

In [40]:
batchsize_train = 16
lr = 1e-3
adam_eps = 1e-8
epochs = 35
num_warmup_steps = 0
num_training_steps = len(train_loader)*epochs

In [41]:
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

model = CausalNER()
model.to(device)

# fine-tune only the task-specific parameters -> Vivek? 
for param in model.bert.parameters():
    param.requires_grad = False
    


optim = AdamW(model.parameters(), lr=lr, eps=adam_eps)
# scheduler with a linearly decreasing learning rate from the initial lr set in the optimizer to 0;
# after a warmup period during which it increases linearly from to the initial lr set in the optimizer
scheduler = get_linear_schedule_with_warmup(optim, num_warmup_steps=num_warmup_steps, num_training_steps=num_training_steps) 

loss_fn = CrossEntropyLoss(ignore_index=-100) # ignore subwords/tokens with label -100 


You are using a model of type roberta to instantiate a model of type bert. This is not supported for all configurations of models and can yield errors.
Some weights of the model checkpoint at vinai/bertweet-base were not used when initializing BertModel: ['roberta.encoder.layer.11.attention.self.query.weight', 'roberta.encoder.layer.2.output.dense.bias', 'roberta.encoder.layer.1.attention.self.query.weight', 'roberta.encoder.layer.3.intermediate.dense.bias', 'roberta.encoder.layer.1.attention.self.value.bias', 'roberta.encoder.layer.10.output.LayerNorm.weight', 'roberta.encoder.layer.1.attention.self.value.weight', 'roberta.encoder.layer.7.output.LayerNorm.weight', 'roberta.encoder.layer.6.attention.self.query.bias', 'roberta.encoder.layer.9.intermediate.dense.bias', 'roberta.encoder.layer.10.attention.self.value.bias', 'roberta.encoder.layer.5.attention.self.key.weight', 'roberta.encoder.layer.0.attention.output.LayerNorm.weight', 'roberta.encoder.layer.2.intermediate.dense.bias', 'ro

### Training

In [42]:
# Store our loss and learning rate for plotting
learning_rate = []

N_bio_tags = 5 # "O", "B-C", "I-C", "B-E", "I-C"
for epoch in trange(1, epochs+1, desc='Epoch'):
    print("<" + "="*22 + F" Epoch {epoch} "+ "="*22 + ">")

    
    ############ training eval metrics ######################
    train_loss = []
    train_acc = []
    train_prec = []
    train_rec = []
    train_f1 = []
    
    #########################################################
    
    
    for batch in tqdm(train_loader):
        optim.zero_grad() # gradients get accumulated by default -> clear previous accumulated gradients
        input_ids = batch['input_ids'].to(device)
        attention_mask = batch['attention_mask'].to(device)
        token_type_ids = batch["token_type_ids"].to(device)
        labels = batch['labels'].to(device)
        bio_tags = batch['bio_tags'].to(device)
        
        ################################################
        model.train() # set model to training mode
        logits = model(**{"input_ids":input_ids, "attention_mask":attention_mask, "token_type_ids":token_type_ids}) # forward pass

        ################################################ 
        # similar to the class RobertaForToken classification in transformers: https://github.com/huggingface/transformers/blob/master/src/transformers/models/roberta/modeling_roberta.py
        active_loss = attention_mask.view(-1) == 1  # either based on attention_mask (includes <CLS>, <SEP> token)
        active_logits = logits.view(-1, N_bio_tags)[active_loss] # N_bio_tags=5 
        active_tags = bio_tags.view(-1)[active_loss]
        loss = loss_fn(active_logits, active_tags)             
        print("loss:", loss)       ## TODO VIVEK: check loss function calculation
        loss.backward() # backward pass
        optim.step()    # update parameters and take a steup using the computed gradient
        scheduler.step()# update learning rate scheduler
        train_loss.append(loss.item())
            
            
        ################## Training Performance Measures ##########
        logits = logits.detach().to('cpu').numpy()
        tags_ids = bio_tags.to('cpu').numpy()

        # calculate performance measures only on tokens and not subwords or special tokens
        tags_mask = tags_ids != -100 # only get token labels and not labels from subwords or special tokens
        pred = np.argmax(logits, axis=2)[tags_mask] #.flatten() # convert logits to list of predicted labels
        tags = tags_ids[tags_mask]                      
                
        metrics = compute_metrics(pred, tags)
        train_acc.append(metrics["accuracy"])
        train_prec.append(metrics["precision"])
        train_rec.append(metrics["recall"])
        train_f1.append(metrics["f1"])
                          
           
    print(F'\n\tTraining Loss: {np.mean(train_loss)}')
    print(F'\n\tTraining acc: {np.mean(train_acc)}')
    print(F'\n\tTraining prec: {np.mean(train_prec)}')
    print(F'\n\tTraining rec: {np.mean(train_rec)}')
    print(F'\n\tTraining f1: {np.mean(train_f1)}')
                          
                          
    # store the current learning rate
    for param_group in optim.param_groups:
        print("\n\tCurrent Learning rate: ", param_group['lr'])
        learning_rate.append(param_group['lr'])
    

    ############# Validation ################
    
    nb_eval_steps = 0 # Tracking variables
    val_accuracy = []
    val_loss = []
    val_acc = []
    val_prec = []
    val_rec = []
    val_f1 = []

    # Evaluate data for one epoch
    for batch in tqdm(validation_loader):
        batch = tuple(batch[t].to(device) for t in batch)      # batch to GPU
        v_input_ids, v_input_mask, v_token_type_ids, v_labels, v_bio_tags = batch  # unpack inputs from dataloader
        
        with torch.no_grad(): # tell model not to compute or store gradients -> saves memory + speeds up validation
            model.eval() # put model in evaluation mode for validation set
            logits = model(**{"input_ids":v_input_ids, "attention_mask":v_input_mask, "token_type_ids":v_token_type_ids}) # forward pass, calculates logit predictions

        ######################################################
        
        # similar to the class RobertaForToken classification in transformers: https://github.com/huggingface/transformers/blob/master/src/transformers/models/roberta/modeling_roberta.py
        v_active_loss = v_input_mask.view(-1) == 1  # either based on attention_mask (includes <CLS>, <SEP> token)
        v_active_logits = logits.view(-1, N_bio_tags)[v_active_loss] # 5 
        v_active_tags = v_bio_tags.view(-1)[v_active_loss]
        v_loss = loss_fn(v_active_logits, v_active_tags)             
        val_loss.append(v_loss.item())
              
        #########################################################
        logits = logits.detach().to('cpu').numpy()
        tags_ids = v_bio_tags.to('cpu').numpy()

        # calculate performance measures only on tokens and not subwords or special tokens
        tags_mask = tags_ids != -100 # only get token labels and not labels from subwords or special tokens
        pred = np.argmax(logits, axis=2)[tags_mask] #.flatten() # convert logits to list of predicted labels
        tags = tags_ids[tags_mask]#.flatten()        
        
        metrics = compute_metrics(pred, tags)
        val_acc.append(metrics["accuracy"])
        val_prec.append(metrics["precision"])
        val_rec.append(metrics["recall"])
        val_f1.append(metrics["f1"])
                              
        nb_eval_steps += 1
        
    print(F'\n\tValidation Loss: {np.mean(val_loss)}')
    print(F'\n\tValidation acc: {np.mean(val_acc)}')
    print(F'\n\tValidation prec: {np.mean(val_prec)}')
    print(F'\n\tValidation rec: {np.mean(val_rec)}')
    print(F'\n\tValidation f1: {np.mean(val_f1)}')
    


Epoch:   0%|          | 0/35 [00:00<?, ?it/s]
  , "labels" : torch.tensor(self.labels[idx], dtype=torch.long)





  2%|▏         | 1/57 [00:01<01:10,  1.26s/it][A

loss: tensor(1.7871, device='cuda:0', grad_fn=<NllLossBackward>)


  _warn_prf(average, modifier, msg_start, len(result))

  4%|▎         | 2/57 [00:02<00:58,  1.07s/it][A

loss: tensor(0.8451, device='cuda:0', grad_fn=<NllLossBackward>)



  5%|▌         | 3/57 [00:03<00:57,  1.06s/it][A

loss: tensor(1.5736, device='cuda:0', grad_fn=<NllLossBackward>)



  7%|▋         | 4/57 [00:04<00:55,  1.04s/it][A

loss: tensor(1.1012, device='cuda:0', grad_fn=<NllLossBackward>)



  9%|▉         | 5/57 [00:05<00:52,  1.00s/it][A

loss: tensor(0.9606, device='cuda:0', grad_fn=<NllLossBackward>)



 11%|█         | 6/57 [00:06<00:52,  1.03s/it][A

loss: tensor(0.8849, device='cuda:0', grad_fn=<NllLossBackward>)



 12%|█▏        | 7/57 [00:07<00:50,  1.00s/it][A

loss: tensor(0.7082, device='cuda:0', grad_fn=<NllLossBackward>)



 14%|█▍        | 8/57 [00:08<00:50,  1.02s/it][A

loss: tensor(1.0781, device='cuda:0', grad_fn=<NllLossBackward>)



 16%|█▌        | 9/57 [00:09<00:47,  1.00it/s][A

loss: tensor(0.8634, device='cuda:0', grad_fn=<NllLossBackward>)



 18%|█▊        | 10/57 [00:10<00:46,  1.02it/s][A

loss: tensor(0.7147, device='cuda:0', grad_fn=<NllLossBackward>)



 19%|█▉        | 11/57 [00:11<00:47,  1.03s/it][A

loss: tensor(0.7878, device='cuda:0', grad_fn=<NllLossBackward>)



 21%|██        | 12/57 [00:12<00:44,  1.00it/s][A

loss: tensor(0.6831, device='cuda:0', grad_fn=<NllLossBackward>)



 23%|██▎       | 13/57 [00:13<00:44,  1.01s/it][A

loss: tensor(0.7460, device='cuda:0', grad_fn=<NllLossBackward>)



 25%|██▍       | 14/57 [00:14<00:42,  1.01it/s][A

loss: tensor(0.7765, device='cuda:0', grad_fn=<NllLossBackward>)



 26%|██▋       | 15/57 [00:15<00:42,  1.01s/it][A

loss: tensor(0.8837, device='cuda:0', grad_fn=<NllLossBackward>)



 28%|██▊       | 16/57 [00:16<00:41,  1.00s/it][A

loss: tensor(0.8540, device='cuda:0', grad_fn=<NllLossBackward>)



 30%|██▉       | 17/57 [00:17<00:39,  1.02it/s][A

loss: tensor(0.9257, device='cuda:0', grad_fn=<NllLossBackward>)



 32%|███▏      | 18/57 [00:18<00:39,  1.00s/it][A

loss: tensor(0.8718, device='cuda:0', grad_fn=<NllLossBackward>)



 33%|███▎      | 19/57 [00:19<00:37,  1.02it/s][A

loss: tensor(0.6909, device='cuda:0', grad_fn=<NllLossBackward>)



 35%|███▌      | 20/57 [00:20<00:37,  1.01s/it][A

loss: tensor(0.7068, device='cuda:0', grad_fn=<NllLossBackward>)



 37%|███▋      | 21/57 [00:21<00:35,  1.01it/s][A

loss: tensor(0.5994, device='cuda:0', grad_fn=<NllLossBackward>)



 39%|███▊      | 22/57 [00:22<00:35,  1.01s/it][A

loss: tensor(0.7469, device='cuda:0', grad_fn=<NllLossBackward>)



 40%|████      | 23/57 [00:23<00:33,  1.00it/s][A

loss: tensor(0.6782, device='cuda:0', grad_fn=<NllLossBackward>)



 42%|████▏     | 24/57 [00:24<00:32,  1.02it/s][A

loss: tensor(0.8153, device='cuda:0', grad_fn=<NllLossBackward>)



 44%|████▍     | 25/57 [00:25<00:32,  1.01s/it][A

loss: tensor(0.8865, device='cuda:0', grad_fn=<NllLossBackward>)



 46%|████▌     | 26/57 [00:26<00:30,  1.00it/s][A

loss: tensor(0.8791, device='cuda:0', grad_fn=<NllLossBackward>)



 47%|████▋     | 27/57 [00:27<00:30,  1.02s/it][A

loss: tensor(0.7718, device='cuda:0', grad_fn=<NllLossBackward>)



 49%|████▉     | 28/57 [00:28<00:29,  1.01s/it][A

loss: tensor(0.7973, device='cuda:0', grad_fn=<NllLossBackward>)



 51%|█████     | 29/57 [00:29<00:27,  1.00it/s][A

loss: tensor(0.8506, device='cuda:0', grad_fn=<NllLossBackward>)



 53%|█████▎    | 30/57 [00:30<00:27,  1.02s/it][A

loss: tensor(0.8008, device='cuda:0', grad_fn=<NllLossBackward>)



 54%|█████▍    | 31/57 [00:31<00:25,  1.00it/s][A

loss: tensor(0.6397, device='cuda:0', grad_fn=<NllLossBackward>)



 56%|█████▌    | 32/57 [00:32<00:25,  1.02s/it][A

loss: tensor(0.5860, device='cuda:0', grad_fn=<NllLossBackward>)



 58%|█████▊    | 33/57 [00:33<00:24,  1.03s/it][A

loss: tensor(0.6924, device='cuda:0', grad_fn=<NllLossBackward>)



 60%|█████▉    | 34/57 [00:34<00:24,  1.07s/it][A

loss: tensor(0.5837, device='cuda:0', grad_fn=<NllLossBackward>)



 61%|██████▏   | 35/57 [00:35<00:23,  1.06s/it][A

loss: tensor(0.7384, device='cuda:0', grad_fn=<NllLossBackward>)



 63%|██████▎   | 36/57 [00:36<00:22,  1.06s/it][A

loss: tensor(0.5840, device='cuda:0', grad_fn=<NllLossBackward>)



 65%|██████▍   | 37/57 [00:37<00:21,  1.07s/it][A

loss: tensor(0.6230, device='cuda:0', grad_fn=<NllLossBackward>)



 67%|██████▋   | 38/57 [00:38<00:19,  1.05s/it][A

loss: tensor(0.7568, device='cuda:0', grad_fn=<NllLossBackward>)



 68%|██████▊   | 39/57 [00:39<00:19,  1.06s/it][A

loss: tensor(0.5863, device='cuda:0', grad_fn=<NllLossBackward>)



 70%|███████   | 40/57 [00:40<00:17,  1.04s/it][A

loss: tensor(0.7512, device='cuda:0', grad_fn=<NllLossBackward>)



 72%|███████▏  | 41/57 [00:41<00:16,  1.06s/it][A

loss: tensor(0.9003, device='cuda:0', grad_fn=<NllLossBackward>)



 74%|███████▎  | 42/57 [00:42<00:15,  1.04s/it][A

loss: tensor(0.6571, device='cuda:0', grad_fn=<NllLossBackward>)



 75%|███████▌  | 43/57 [00:43<00:14,  1.02s/it][A

loss: tensor(0.7055, device='cuda:0', grad_fn=<NllLossBackward>)



 77%|███████▋  | 44/57 [00:44<00:13,  1.05s/it][A

loss: tensor(0.6047, device='cuda:0', grad_fn=<NllLossBackward>)



 79%|███████▉  | 45/57 [00:46<00:12,  1.04s/it][A

loss: tensor(0.6467, device='cuda:0', grad_fn=<NllLossBackward>)



 81%|████████  | 46/57 [00:47<00:11,  1.06s/it][A

loss: tensor(0.8735, device='cuda:0', grad_fn=<NllLossBackward>)



 82%|████████▏ | 47/57 [00:48<00:10,  1.03s/it][A

loss: tensor(0.7089, device='cuda:0', grad_fn=<NllLossBackward>)



 84%|████████▍ | 48/57 [00:49<00:09,  1.06s/it][A

loss: tensor(0.6934, device='cuda:0', grad_fn=<NllLossBackward>)



 86%|████████▌ | 49/57 [00:50<00:08,  1.03s/it][A

loss: tensor(0.5713, device='cuda:0', grad_fn=<NllLossBackward>)



 88%|████████▊ | 50/57 [00:51<00:07,  1.00s/it][A

loss: tensor(0.7848, device='cuda:0', grad_fn=<NllLossBackward>)



 89%|████████▉ | 51/57 [00:52<00:06,  1.02s/it][A

loss: tensor(0.7830, device='cuda:0', grad_fn=<NllLossBackward>)



 91%|█████████ | 52/57 [00:53<00:05,  1.01s/it][A

loss: tensor(0.8114, device='cuda:0', grad_fn=<NllLossBackward>)



 93%|█████████▎| 53/57 [00:54<00:04,  1.03s/it][A

loss: tensor(0.6882, device='cuda:0', grad_fn=<NllLossBackward>)



 95%|█████████▍| 54/57 [00:55<00:02,  1.00it/s][A

loss: tensor(0.6123, device='cuda:0', grad_fn=<NllLossBackward>)



 96%|█████████▋| 55/57 [00:56<00:01,  1.01it/s][A

loss: tensor(0.7924, device='cuda:0', grad_fn=<NllLossBackward>)



 98%|█████████▊| 56/57 [00:57<00:01,  1.02s/it][A

loss: tensor(0.6368, device='cuda:0', grad_fn=<NllLossBackward>)



100%|██████████| 57/57 [00:57<00:00,  1.02s/it][A

  0%|          | 0/29 [00:00<?, ?it/s][A
  3%|▎         | 1/29 [00:00<00:04,  6.57it/s][A

loss: tensor(0.7004, device='cuda:0', grad_fn=<NllLossBackward>)

	Training Loss: 0.78914720656579

	Training acc: 0.7974334265483175

	Training prec: 0.22347946379535338

	Training rec: 0.21188165616757623

	Training f1: 0.19722926168077007

	Current Learning rate:  0.0009714285714285714



  7%|▋         | 2/29 [00:00<00:03,  6.81it/s][A
 10%|█         | 3/29 [00:00<00:03,  6.97it/s][A
 14%|█▍        | 4/29 [00:00<00:03,  7.05it/s][A
 17%|█▋        | 5/29 [00:00<00:03,  7.08it/s][A
 21%|██        | 6/29 [00:00<00:03,  7.09it/s][A
 24%|██▍       | 7/29 [00:00<00:03,  7.10it/s][A
 28%|██▊       | 8/29 [00:01<00:02,  7.10it/s][A
 31%|███       | 9/29 [00:01<00:02,  7.14it/s][A
 34%|███▍      | 10/29 [00:01<00:02,  7.15it/s][A
 38%|███▊      | 11/29 [00:01<00:02,  7.04it/s][A
 41%|████▏     | 12/29 [00:01<00:02,  6.94it/s][A
 45%|████▍     | 13/29 [00:01<00:02,  6.86it/s][A
 48%|████▊     | 14/29 [00:01<00:02,  6.95it/s][A
 52%|█████▏    | 15/29 [00:02<00:02,  6.98it/s][A
 55%|█████▌    | 16/29 [00:02<00:01,  6.96it/s][A
 59%|█████▊    | 17/29 [00:02<00:01,  6.93it/s][A
 62%|██████▏   | 18/29 [00:02<00:01,  6.80it/s][A
 66%|██████▌   | 19/29 [00:02<00:01,  6.92it/s][A
 69%|██████▉   | 20/29 [00:02<00:01,  6.93it/s][A
 72%|███████▏  | 21/29 [00:03<00:01,  


	Validation Loss: 0.7253945582899554

	Validation acc: 0.7962596071571953

	Validation prec: 0.28550978374569297

	Validation rec: 0.2584809617212529

	Validation f1: 0.2552580469558719



  2%|▏         | 1/57 [00:00<00:53,  1.05it/s][A

loss: tensor(0.8855, device='cuda:0', grad_fn=<NllLossBackward>)



  4%|▎         | 2/57 [00:01<00:53,  1.03it/s][A

loss: tensor(0.6015, device='cuda:0', grad_fn=<NllLossBackward>)



  5%|▌         | 3/57 [00:02<00:54,  1.01s/it][A

loss: tensor(0.7563, device='cuda:0', grad_fn=<NllLossBackward>)



  7%|▋         | 4/57 [00:03<00:52,  1.01it/s][A

loss: tensor(0.7153, device='cuda:0', grad_fn=<NllLossBackward>)



  9%|▉         | 5/57 [00:05<00:53,  1.04s/it][A

loss: tensor(0.6362, device='cuda:0', grad_fn=<NllLossBackward>)



 11%|█         | 6/57 [00:06<00:51,  1.01s/it][A

loss: tensor(0.6550, device='cuda:0', grad_fn=<NllLossBackward>)



 12%|█▏        | 7/57 [00:07<00:52,  1.05s/it][A

loss: tensor(0.6730, device='cuda:0', grad_fn=<NllLossBackward>)



 14%|█▍        | 8/57 [00:08<00:49,  1.01s/it][A

loss: tensor(0.7134, device='cuda:0', grad_fn=<NllLossBackward>)



 16%|█▌        | 9/57 [00:09<00:47,  1.02it/s][A

loss: tensor(0.6365, device='cuda:0', grad_fn=<NllLossBackward>)



 18%|█▊        | 10/57 [00:10<00:47,  1.01s/it][A

loss: tensor(0.6376, device='cuda:0', grad_fn=<NllLossBackward>)



 19%|█▉        | 11/57 [00:11<00:45,  1.00it/s][A

loss: tensor(0.8611, device='cuda:0', grad_fn=<NllLossBackward>)



 21%|██        | 12/57 [00:12<00:45,  1.02s/it][A

loss: tensor(0.7532, device='cuda:0', grad_fn=<NllLossBackward>)



 23%|██▎       | 13/57 [00:13<00:44,  1.02s/it][A

loss: tensor(0.6903, device='cuda:0', grad_fn=<NllLossBackward>)



 25%|██▍       | 14/57 [00:14<00:45,  1.07s/it][A

loss: tensor(0.6628, device='cuda:0', grad_fn=<NllLossBackward>)



 26%|██▋       | 15/57 [00:15<00:44,  1.06s/it][A

loss: tensor(0.7381, device='cuda:0', grad_fn=<NllLossBackward>)



 28%|██▊       | 16/57 [00:16<00:43,  1.06s/it][A

loss: tensor(0.6911, device='cuda:0', grad_fn=<NllLossBackward>)



 30%|██▉       | 17/57 [00:17<00:44,  1.11s/it][A

loss: tensor(0.6327, device='cuda:0', grad_fn=<NllLossBackward>)



 32%|███▏      | 18/57 [00:18<00:43,  1.12s/it][A

loss: tensor(0.8765, device='cuda:0', grad_fn=<NllLossBackward>)



 33%|███▎      | 19/57 [00:20<00:44,  1.16s/it][A

loss: tensor(0.7218, device='cuda:0', grad_fn=<NllLossBackward>)



 35%|███▌      | 20/57 [00:21<00:42,  1.15s/it][A

loss: tensor(0.8005, device='cuda:0', grad_fn=<NllLossBackward>)



 37%|███▋      | 21/57 [00:22<00:41,  1.15s/it][A

loss: tensor(0.6863, device='cuda:0', grad_fn=<NllLossBackward>)



 39%|███▊      | 22/57 [00:23<00:41,  1.18s/it][A

loss: tensor(0.6652, device='cuda:0', grad_fn=<NllLossBackward>)



 40%|████      | 23/57 [00:24<00:39,  1.16s/it][A

loss: tensor(0.6286, device='cuda:0', grad_fn=<NllLossBackward>)



 42%|████▏     | 24/57 [00:25<00:39,  1.19s/it][A

loss: tensor(0.7438, device='cuda:0', grad_fn=<NllLossBackward>)



 44%|████▍     | 25/57 [00:27<00:37,  1.17s/it][A

loss: tensor(0.7355, device='cuda:0', grad_fn=<NllLossBackward>)



 46%|████▌     | 26/57 [00:28<00:36,  1.19s/it][A

loss: tensor(0.6044, device='cuda:0', grad_fn=<NllLossBackward>)



 47%|████▋     | 27/57 [00:29<00:33,  1.13s/it][A

loss: tensor(0.5713, device='cuda:0', grad_fn=<NllLossBackward>)



 49%|████▉     | 28/57 [00:30<00:31,  1.08s/it][A

loss: tensor(0.8152, device='cuda:0', grad_fn=<NllLossBackward>)



 51%|█████     | 29/57 [00:31<00:30,  1.09s/it][A

loss: tensor(0.6348, device='cuda:0', grad_fn=<NllLossBackward>)



 53%|█████▎    | 30/57 [00:32<00:28,  1.05s/it][A

loss: tensor(0.6651, device='cuda:0', grad_fn=<NllLossBackward>)



 54%|█████▍    | 31/57 [00:33<00:27,  1.08s/it][A

loss: tensor(0.6304, device='cuda:0', grad_fn=<NllLossBackward>)



 56%|█████▌    | 32/57 [00:34<00:26,  1.04s/it][A

loss: tensor(0.9355, device='cuda:0', grad_fn=<NllLossBackward>)



 58%|█████▊    | 33/57 [00:35<00:25,  1.06s/it][A

loss: tensor(0.6281, device='cuda:0', grad_fn=<NllLossBackward>)



 60%|█████▉    | 34/57 [00:36<00:23,  1.03s/it][A

loss: tensor(0.6834, device='cuda:0', grad_fn=<NllLossBackward>)



 61%|██████▏   | 35/57 [00:37<00:22,  1.02s/it][A

loss: tensor(0.6257, device='cuda:0', grad_fn=<NllLossBackward>)



 63%|██████▎   | 36/57 [00:38<00:22,  1.05s/it][A

loss: tensor(0.6702, device='cuda:0', grad_fn=<NllLossBackward>)



 65%|██████▍   | 37/57 [00:39<00:20,  1.02s/it][A

loss: tensor(0.6170, device='cuda:0', grad_fn=<NllLossBackward>)



 67%|██████▋   | 38/57 [00:40<00:20,  1.06s/it][A

loss: tensor(0.5690, device='cuda:0', grad_fn=<NllLossBackward>)



 68%|██████▊   | 39/57 [00:41<00:18,  1.03s/it][A

loss: tensor(0.5841, device='cuda:0', grad_fn=<NllLossBackward>)



 70%|███████   | 40/57 [00:42<00:17,  1.02s/it][A

loss: tensor(0.7311, device='cuda:0', grad_fn=<NllLossBackward>)



 72%|███████▏  | 41/57 [00:43<00:16,  1.04s/it][A

loss: tensor(0.5939, device='cuda:0', grad_fn=<NllLossBackward>)



 74%|███████▎  | 42/57 [00:44<00:15,  1.03s/it][A

loss: tensor(0.6526, device='cuda:0', grad_fn=<NllLossBackward>)



 75%|███████▌  | 43/57 [00:45<00:14,  1.06s/it][A

loss: tensor(0.5969, device='cuda:0', grad_fn=<NllLossBackward>)



 77%|███████▋  | 44/57 [00:46<00:13,  1.04s/it][A

loss: tensor(0.6695, device='cuda:0', grad_fn=<NllLossBackward>)



 79%|███████▉  | 45/57 [00:48<00:12,  1.06s/it][A

loss: tensor(0.6283, device='cuda:0', grad_fn=<NllLossBackward>)



 81%|████████  | 46/57 [00:48<00:11,  1.03s/it][A

loss: tensor(0.6394, device='cuda:0', grad_fn=<NllLossBackward>)



 82%|████████▏ | 47/57 [00:49<00:10,  1.02s/it][A

loss: tensor(0.7313, device='cuda:0', grad_fn=<NllLossBackward>)



 84%|████████▍ | 48/57 [00:51<00:09,  1.04s/it][A

loss: tensor(0.7574, device='cuda:0', grad_fn=<NllLossBackward>)



 86%|████████▌ | 49/57 [00:52<00:08,  1.02s/it][A

loss: tensor(0.6265, device='cuda:0', grad_fn=<NllLossBackward>)



 88%|████████▊ | 50/57 [00:53<00:07,  1.05s/it][A

loss: tensor(0.6278, device='cuda:0', grad_fn=<NllLossBackward>)



 89%|████████▉ | 51/57 [00:54<00:06,  1.06s/it][A

loss: tensor(0.6558, device='cuda:0', grad_fn=<NllLossBackward>)



 91%|█████████ | 52/57 [00:55<00:05,  1.07s/it][A

loss: tensor(0.7174, device='cuda:0', grad_fn=<NllLossBackward>)



 93%|█████████▎| 53/57 [00:56<00:04,  1.04s/it][A

loss: tensor(0.6676, device='cuda:0', grad_fn=<NllLossBackward>)



 95%|█████████▍| 54/57 [00:57<00:03,  1.02s/it][A

loss: tensor(0.5552, device='cuda:0', grad_fn=<NllLossBackward>)



 96%|█████████▋| 55/57 [00:58<00:02,  1.05s/it][A

loss: tensor(0.7639, device='cuda:0', grad_fn=<NllLossBackward>)



 98%|█████████▊| 56/57 [00:59<00:01,  1.07s/it][A

loss: tensor(0.7392, device='cuda:0', grad_fn=<NllLossBackward>)



100%|██████████| 57/57 [01:00<00:00,  1.06s/it][A

  0%|          | 0/29 [00:00<?, ?it/s][A
  3%|▎         | 1/29 [00:00<00:04,  6.48it/s][A

loss: tensor(0.6540, device='cuda:0', grad_fn=<NllLossBackward>)

	Training Loss: 0.6849095361274585

	Training acc: 0.8129803847997522

	Training prec: 0.32968346393459785

	Training rec: 0.23921904028978064

	Training f1: 0.2395156019960095

	Current Learning rate:  0.0009428571428571429



  7%|▋         | 2/29 [00:00<00:04,  6.44it/s][A
 10%|█         | 3/29 [00:00<00:03,  6.51it/s][A
 14%|█▍        | 4/29 [00:00<00:03,  6.56it/s][A
 17%|█▋        | 5/29 [00:00<00:03,  6.52it/s][A
 21%|██        | 6/29 [00:00<00:03,  6.62it/s][A
 24%|██▍       | 7/29 [00:01<00:03,  6.72it/s][A
 28%|██▊       | 8/29 [00:01<00:03,  6.76it/s][A
 31%|███       | 9/29 [00:01<00:02,  6.79it/s][A
 34%|███▍      | 10/29 [00:01<00:02,  6.73it/s][A
 38%|███▊      | 11/29 [00:01<00:02,  6.69it/s][A
 41%|████▏     | 12/29 [00:01<00:02,  6.68it/s][A
 45%|████▍     | 13/29 [00:01<00:02,  6.65it/s][A
 48%|████▊     | 14/29 [00:02<00:02,  6.60it/s][A
 52%|█████▏    | 15/29 [00:02<00:02,  6.32it/s][A
 55%|█████▌    | 16/29 [00:02<00:02,  6.38it/s][A
 59%|█████▊    | 17/29 [00:02<00:01,  6.42it/s][A
 62%|██████▏   | 18/29 [00:02<00:01,  6.52it/s][A
 66%|██████▌   | 19/29 [00:02<00:01,  6.54it/s][A
 69%|██████▉   | 20/29 [00:03<00:01,  6.60it/s][A
 72%|███████▏  | 21/29 [00:03<00:01,  


	Validation Loss: 0.7068542406476778

	Validation acc: 0.8031314408701856

	Validation prec: 0.34986908676677886

	Validation rec: 0.27760423215759894

	Validation f1: 0.2779570969270542



  2%|▏         | 1/57 [00:00<00:55,  1.00it/s][A

loss: tensor(0.7619, device='cuda:0', grad_fn=<NllLossBackward>)



  4%|▎         | 2/57 [00:02<00:58,  1.07s/it][A

loss: tensor(0.7049, device='cuda:0', grad_fn=<NllLossBackward>)



  5%|▌         | 3/57 [00:03<00:56,  1.04s/it][A

loss: tensor(0.6183, device='cuda:0', grad_fn=<NllLossBackward>)



  7%|▋         | 4/57 [00:04<00:57,  1.08s/it][A

loss: tensor(0.6243, device='cuda:0', grad_fn=<NllLossBackward>)



  9%|▉         | 5/57 [00:05<00:54,  1.04s/it][A

loss: tensor(0.7081, device='cuda:0', grad_fn=<NllLossBackward>)



 11%|█         | 6/57 [00:06<00:52,  1.02s/it][A

loss: tensor(0.6408, device='cuda:0', grad_fn=<NllLossBackward>)



 12%|█▏        | 7/57 [00:07<00:53,  1.06s/it][A

loss: tensor(0.4763, device='cuda:0', grad_fn=<NllLossBackward>)



 14%|█▍        | 8/57 [00:08<00:51,  1.04s/it][A

loss: tensor(0.6225, device='cuda:0', grad_fn=<NllLossBackward>)



 16%|█▌        | 9/57 [00:09<00:50,  1.06s/it][A

loss: tensor(0.6054, device='cuda:0', grad_fn=<NllLossBackward>)



 18%|█▊        | 10/57 [00:10<00:48,  1.03s/it][A

loss: tensor(0.7489, device='cuda:0', grad_fn=<NllLossBackward>)



 19%|█▉        | 11/57 [00:11<00:46,  1.01s/it][A

loss: tensor(0.5551, device='cuda:0', grad_fn=<NllLossBackward>)



 21%|██        | 12/57 [00:12<00:47,  1.05s/it][A

loss: tensor(0.5331, device='cuda:0', grad_fn=<NllLossBackward>)



 23%|██▎       | 13/57 [00:13<00:45,  1.03s/it][A

loss: tensor(0.7384, device='cuda:0', grad_fn=<NllLossBackward>)



 25%|██▍       | 14/57 [00:14<00:45,  1.06s/it][A

loss: tensor(0.4560, device='cuda:0', grad_fn=<NllLossBackward>)



 26%|██▋       | 15/57 [00:15<00:43,  1.05s/it][A

loss: tensor(0.6113, device='cuda:0', grad_fn=<NllLossBackward>)



 28%|██▊       | 16/57 [00:16<00:43,  1.07s/it][A

loss: tensor(0.7078, device='cuda:0', grad_fn=<NllLossBackward>)



 30%|██▉       | 17/57 [00:17<00:43,  1.08s/it][A

loss: tensor(0.7837, device='cuda:0', grad_fn=<NllLossBackward>)



 32%|███▏      | 18/57 [00:18<00:40,  1.04s/it][A

loss: tensor(0.6259, device='cuda:0', grad_fn=<NllLossBackward>)



 33%|███▎      | 19/57 [00:19<00:40,  1.05s/it][A

loss: tensor(0.6104, device='cuda:0', grad_fn=<NllLossBackward>)



 35%|███▌      | 20/57 [00:20<00:38,  1.04s/it][A

loss: tensor(0.6171, device='cuda:0', grad_fn=<NllLossBackward>)



 37%|███▋      | 21/57 [00:22<00:38,  1.07s/it][A

loss: tensor(0.7681, device='cuda:0', grad_fn=<NllLossBackward>)



 39%|███▊      | 22/57 [00:23<00:36,  1.05s/it][A

loss: tensor(0.7992, device='cuda:0', grad_fn=<NllLossBackward>)



 40%|████      | 23/57 [00:24<00:36,  1.08s/it][A

loss: tensor(0.6245, device='cuda:0', grad_fn=<NllLossBackward>)



 42%|████▏     | 24/57 [00:25<00:35,  1.07s/it][A

loss: tensor(0.6462, device='cuda:0', grad_fn=<NllLossBackward>)



 44%|████▍     | 25/57 [00:26<00:33,  1.06s/it][A

loss: tensor(0.5538, device='cuda:0', grad_fn=<NllLossBackward>)



 46%|████▌     | 26/57 [00:27<00:33,  1.09s/it][A

loss: tensor(0.6312, device='cuda:0', grad_fn=<NllLossBackward>)



 47%|████▋     | 27/57 [00:28<00:31,  1.06s/it][A

loss: tensor(0.5430, device='cuda:0', grad_fn=<NllLossBackward>)



 49%|████▉     | 28/57 [00:29<00:31,  1.08s/it][A

loss: tensor(0.8629, device='cuda:0', grad_fn=<NllLossBackward>)



 51%|█████     | 29/57 [00:30<00:29,  1.05s/it][A

loss: tensor(0.7687, device='cuda:0', grad_fn=<NllLossBackward>)



 53%|█████▎    | 30/57 [00:31<00:28,  1.07s/it][A

loss: tensor(0.6734, device='cuda:0', grad_fn=<NllLossBackward>)



 54%|█████▍    | 31/57 [00:32<00:27,  1.05s/it][A

loss: tensor(0.7192, device='cuda:0', grad_fn=<NllLossBackward>)



 56%|█████▌    | 32/57 [00:33<00:25,  1.03s/it][A

loss: tensor(0.6975, device='cuda:0', grad_fn=<NllLossBackward>)



 58%|█████▊    | 33/57 [00:34<00:26,  1.09s/it][A

loss: tensor(0.8190, device='cuda:0', grad_fn=<NllLossBackward>)



 60%|█████▉    | 34/57 [00:35<00:24,  1.05s/it][A

loss: tensor(0.8378, device='cuda:0', grad_fn=<NllLossBackward>)



 61%|██████▏   | 35/57 [00:36<00:23,  1.06s/it][A

loss: tensor(0.8782, device='cuda:0', grad_fn=<NllLossBackward>)



 63%|██████▎   | 36/57 [00:37<00:21,  1.03s/it][A

loss: tensor(0.6764, device='cuda:0', grad_fn=<NllLossBackward>)



 65%|██████▍   | 37/57 [00:38<00:20,  1.01s/it][A

loss: tensor(0.6230, device='cuda:0', grad_fn=<NllLossBackward>)



 67%|██████▋   | 38/57 [00:39<00:19,  1.03s/it][A

loss: tensor(0.8173, device='cuda:0', grad_fn=<NllLossBackward>)



 68%|██████▊   | 39/57 [00:40<00:18,  1.01s/it][A

loss: tensor(0.5669, device='cuda:0', grad_fn=<NllLossBackward>)



 70%|███████   | 40/57 [00:42<00:17,  1.05s/it][A

loss: tensor(0.7021, device='cuda:0', grad_fn=<NllLossBackward>)



 72%|███████▏  | 41/57 [00:43<00:16,  1.03s/it][A

loss: tensor(0.7921, device='cuda:0', grad_fn=<NllLossBackward>)



 74%|███████▎  | 42/57 [00:44<00:15,  1.06s/it][A

loss: tensor(0.7487, device='cuda:0', grad_fn=<NllLossBackward>)



 75%|███████▌  | 43/57 [00:45<00:14,  1.04s/it][A

loss: tensor(0.7404, device='cuda:0', grad_fn=<NllLossBackward>)



 77%|███████▋  | 44/57 [00:46<00:13,  1.03s/it][A

loss: tensor(0.6782, device='cuda:0', grad_fn=<NllLossBackward>)



 79%|███████▉  | 45/57 [00:47<00:12,  1.05s/it][A

loss: tensor(0.6784, device='cuda:0', grad_fn=<NllLossBackward>)



 81%|████████  | 46/57 [00:48<00:11,  1.03s/it][A

loss: tensor(0.7111, device='cuda:0', grad_fn=<NllLossBackward>)



 82%|████████▏ | 47/57 [00:49<00:10,  1.07s/it][A

loss: tensor(0.7788, device='cuda:0', grad_fn=<NllLossBackward>)



 84%|████████▍ | 48/57 [00:50<00:09,  1.05s/it][A

loss: tensor(0.6965, device='cuda:0', grad_fn=<NllLossBackward>)



 86%|████████▌ | 49/57 [00:51<00:08,  1.07s/it][A

loss: tensor(0.7274, device='cuda:0', grad_fn=<NllLossBackward>)



 88%|████████▊ | 50/57 [00:52<00:07,  1.05s/it][A

loss: tensor(0.6270, device='cuda:0', grad_fn=<NllLossBackward>)



 89%|████████▉ | 51/57 [00:53<00:06,  1.03s/it][A

loss: tensor(0.7913, device='cuda:0', grad_fn=<NllLossBackward>)



 91%|█████████ | 52/57 [00:54<00:05,  1.06s/it][A

loss: tensor(0.7208, device='cuda:0', grad_fn=<NllLossBackward>)



 93%|█████████▎| 53/57 [00:55<00:04,  1.05s/it][A

loss: tensor(0.6190, device='cuda:0', grad_fn=<NllLossBackward>)



 95%|█████████▍| 54/57 [00:56<00:03,  1.10s/it][A

loss: tensor(0.6776, device='cuda:0', grad_fn=<NllLossBackward>)



 96%|█████████▋| 55/57 [00:57<00:02,  1.07s/it][A

loss: tensor(0.6000, device='cuda:0', grad_fn=<NllLossBackward>)



 98%|█████████▊| 56/57 [00:58<00:01,  1.08s/it][A

loss: tensor(0.5447, device='cuda:0', grad_fn=<NllLossBackward>)



100%|██████████| 57/57 [00:59<00:00,  1.05s/it][A

  0%|          | 0/29 [00:00<?, ?it/s][A
  3%|▎         | 1/29 [00:00<00:04,  6.28it/s][A

loss: tensor(0.7401, device='cuda:0', grad_fn=<NllLossBackward>)

	Training Loss: 0.6812409444859153

	Training acc: 0.8144896218598388

	Training prec: 0.3490684987340536

	Training rec: 0.2527772616682898

	Training f1: 0.2576882272663483

	Current Learning rate:  0.0009142857142857143



  7%|▋         | 2/29 [00:00<00:04,  6.48it/s][A
 10%|█         | 3/29 [00:00<00:04,  6.37it/s][A
 14%|█▍        | 4/29 [00:00<00:03,  6.46it/s][A
 17%|█▋        | 5/29 [00:00<00:03,  6.51it/s][A
 21%|██        | 6/29 [00:00<00:03,  6.49it/s][A
 24%|██▍       | 7/29 [00:01<00:03,  6.51it/s][A
 28%|██▊       | 8/29 [00:01<00:03,  6.53it/s][A
 31%|███       | 9/29 [00:01<00:03,  6.52it/s][A
 34%|███▍      | 10/29 [00:01<00:02,  6.52it/s][A
 38%|███▊      | 11/29 [00:01<00:02,  6.53it/s][A
 41%|████▏     | 12/29 [00:01<00:02,  6.56it/s][A
 45%|████▍     | 13/29 [00:01<00:02,  6.55it/s][A
 48%|████▊     | 14/29 [00:02<00:02,  6.45it/s][A
 52%|█████▏    | 15/29 [00:02<00:02,  6.26it/s][A
 55%|█████▌    | 16/29 [00:02<00:02,  6.05it/s][A
 59%|█████▊    | 17/29 [00:02<00:01,  6.20it/s][A
 62%|██████▏   | 18/29 [00:02<00:01,  6.32it/s][A
 66%|██████▌   | 19/29 [00:02<00:01,  6.36it/s][A
 69%|██████▉   | 20/29 [00:03<00:01,  6.33it/s][A
 72%|███████▏  | 21/29 [00:03<00:01,  


	Validation Loss: 0.7222742335549717

	Validation acc: 0.7999441397269245

	Validation prec: 0.2796463023448399

	Validation rec: 0.255284180979835

	Validation f1: 0.2494030447486372



  2%|▏         | 1/57 [00:01<01:01,  1.09s/it][A

loss: tensor(0.5771, device='cuda:0', grad_fn=<NllLossBackward>)



  4%|▎         | 2/57 [00:02<00:56,  1.03s/it][A

loss: tensor(0.8771, device='cuda:0', grad_fn=<NllLossBackward>)



  5%|▌         | 3/57 [00:03<00:54,  1.00s/it][A

loss: tensor(0.7411, device='cuda:0', grad_fn=<NllLossBackward>)



  7%|▋         | 4/57 [00:04<00:55,  1.04s/it][A

loss: tensor(0.6794, device='cuda:0', grad_fn=<NllLossBackward>)



  9%|▉         | 5/57 [00:05<00:53,  1.03s/it][A

loss: tensor(0.6922, device='cuda:0', grad_fn=<NllLossBackward>)



 11%|█         | 6/57 [00:06<00:55,  1.09s/it][A

loss: tensor(0.6517, device='cuda:0', grad_fn=<NllLossBackward>)



 12%|█▏        | 7/57 [00:07<00:52,  1.05s/it][A

loss: tensor(0.8313, device='cuda:0', grad_fn=<NllLossBackward>)



 14%|█▍        | 8/57 [00:08<00:51,  1.06s/it][A

loss: tensor(0.6726, device='cuda:0', grad_fn=<NllLossBackward>)



 16%|█▌        | 9/57 [00:09<00:49,  1.03s/it][A

loss: tensor(0.6466, device='cuda:0', grad_fn=<NllLossBackward>)



 18%|█▊        | 10/57 [00:10<00:47,  1.02s/it][A

loss: tensor(0.5951, device='cuda:0', grad_fn=<NllLossBackward>)



 19%|█▉        | 11/57 [00:11<00:50,  1.09s/it][A

loss: tensor(0.7841, device='cuda:0', grad_fn=<NllLossBackward>)



 21%|██        | 12/57 [00:12<00:47,  1.05s/it][A

loss: tensor(0.5679, device='cuda:0', grad_fn=<NllLossBackward>)



 23%|██▎       | 13/57 [00:13<00:47,  1.08s/it][A

loss: tensor(0.6125, device='cuda:0', grad_fn=<NllLossBackward>)



 25%|██▍       | 14/57 [00:14<00:46,  1.07s/it][A

loss: tensor(0.6391, device='cuda:0', grad_fn=<NllLossBackward>)



 26%|██▋       | 15/57 [00:15<00:44,  1.05s/it][A

loss: tensor(0.6314, device='cuda:0', grad_fn=<NllLossBackward>)



 28%|██▊       | 16/57 [00:16<00:43,  1.07s/it][A

loss: tensor(0.6734, device='cuda:0', grad_fn=<NllLossBackward>)



 30%|██▉       | 17/57 [00:17<00:41,  1.04s/it][A

loss: tensor(0.7484, device='cuda:0', grad_fn=<NllLossBackward>)



 32%|███▏      | 18/57 [00:19<00:41,  1.07s/it][A

loss: tensor(0.7589, device='cuda:0', grad_fn=<NllLossBackward>)



 33%|███▎      | 19/57 [00:20<00:39,  1.04s/it][A

loss: tensor(0.6912, device='cuda:0', grad_fn=<NllLossBackward>)



 35%|███▌      | 20/57 [00:21<00:39,  1.06s/it][A

loss: tensor(0.6421, device='cuda:0', grad_fn=<NllLossBackward>)



 37%|███▋      | 21/57 [00:22<00:37,  1.03s/it][A

loss: tensor(0.7083, device='cuda:0', grad_fn=<NllLossBackward>)



 39%|███▊      | 22/57 [00:23<00:35,  1.02s/it][A

loss: tensor(0.5646, device='cuda:0', grad_fn=<NllLossBackward>)



 40%|████      | 23/57 [00:24<00:35,  1.03s/it][A

loss: tensor(0.8579, device='cuda:0', grad_fn=<NllLossBackward>)



 42%|████▏     | 24/57 [00:25<00:33,  1.02s/it][A

loss: tensor(0.7766, device='cuda:0', grad_fn=<NllLossBackward>)



 44%|████▍     | 25/57 [00:26<00:33,  1.03s/it][A

loss: tensor(0.7367, device='cuda:0', grad_fn=<NllLossBackward>)



 46%|████▌     | 26/57 [00:27<00:31,  1.02s/it][A

loss: tensor(0.5708, device='cuda:0', grad_fn=<NllLossBackward>)



 47%|████▋     | 27/57 [00:28<00:31,  1.04s/it][A

loss: tensor(0.6768, device='cuda:0', grad_fn=<NllLossBackward>)



 49%|████▉     | 28/57 [00:29<00:29,  1.02s/it][A

loss: tensor(0.7766, device='cuda:0', grad_fn=<NllLossBackward>)



 51%|█████     | 29/57 [00:30<00:28,  1.00s/it][A

loss: tensor(0.7695, device='cuda:0', grad_fn=<NllLossBackward>)



 53%|█████▎    | 30/57 [00:31<00:27,  1.01s/it][A

loss: tensor(0.5632, device='cuda:0', grad_fn=<NllLossBackward>)



 54%|█████▍    | 31/57 [00:32<00:26,  1.00s/it][A

loss: tensor(0.5921, device='cuda:0', grad_fn=<NllLossBackward>)



 56%|█████▌    | 32/57 [00:33<00:25,  1.03s/it][A

loss: tensor(0.7396, device='cuda:0', grad_fn=<NllLossBackward>)



 58%|█████▊    | 33/57 [00:34<00:24,  1.01s/it][A

loss: tensor(0.7658, device='cuda:0', grad_fn=<NllLossBackward>)



 60%|█████▉    | 34/57 [00:35<00:23,  1.02s/it][A

loss: tensor(0.5051, device='cuda:0', grad_fn=<NllLossBackward>)



 61%|██████▏   | 35/57 [00:36<00:22,  1.02s/it][A

loss: tensor(0.6393, device='cuda:0', grad_fn=<NllLossBackward>)



 63%|██████▎   | 36/57 [00:37<00:21,  1.01s/it][A

loss: tensor(0.6689, device='cuda:0', grad_fn=<NllLossBackward>)



 65%|██████▍   | 37/57 [00:38<00:20,  1.03s/it][A

loss: tensor(0.6886, device='cuda:0', grad_fn=<NllLossBackward>)



 67%|██████▋   | 38/57 [00:39<00:19,  1.02s/it][A

loss: tensor(0.7715, device='cuda:0', grad_fn=<NllLossBackward>)



 68%|██████▊   | 39/57 [00:40<00:18,  1.04s/it][A

loss: tensor(0.5582, device='cuda:0', grad_fn=<NllLossBackward>)



 70%|███████   | 40/57 [00:41<00:17,  1.02s/it][A

loss: tensor(0.7422, device='cuda:0', grad_fn=<NllLossBackward>)



 72%|███████▏  | 41/57 [00:42<00:16,  1.01s/it][A

loss: tensor(0.4839, device='cuda:0', grad_fn=<NllLossBackward>)



 74%|███████▎  | 42/57 [00:43<00:15,  1.03s/it][A

loss: tensor(0.6477, device='cuda:0', grad_fn=<NllLossBackward>)



 75%|███████▌  | 43/57 [00:44<00:14,  1.02s/it][A

loss: tensor(0.6378, device='cuda:0', grad_fn=<NllLossBackward>)



 77%|███████▋  | 44/57 [00:45<00:13,  1.04s/it][A

loss: tensor(0.6014, device='cuda:0', grad_fn=<NllLossBackward>)



 79%|███████▉  | 45/57 [00:46<00:12,  1.03s/it][A

loss: tensor(0.7206, device='cuda:0', grad_fn=<NllLossBackward>)



 81%|████████  | 46/57 [00:47<00:11,  1.05s/it][A

loss: tensor(0.7491, device='cuda:0', grad_fn=<NllLossBackward>)



 82%|████████▏ | 47/57 [00:48<00:10,  1.02s/it][A

loss: tensor(0.7003, device='cuda:0', grad_fn=<NllLossBackward>)



 84%|████████▍ | 48/57 [00:49<00:09,  1.01s/it][A

loss: tensor(0.6904, device='cuda:0', grad_fn=<NllLossBackward>)



 86%|████████▌ | 49/57 [00:50<00:08,  1.03s/it][A

loss: tensor(0.6374, device='cuda:0', grad_fn=<NllLossBackward>)



 88%|████████▊ | 50/57 [00:51<00:07,  1.02s/it][A

loss: tensor(0.5864, device='cuda:0', grad_fn=<NllLossBackward>)



 89%|████████▉ | 51/57 [00:52<00:06,  1.04s/it][A

loss: tensor(0.6977, device='cuda:0', grad_fn=<NllLossBackward>)



 91%|█████████ | 52/57 [00:53<00:05,  1.02s/it][A

loss: tensor(0.6746, device='cuda:0', grad_fn=<NllLossBackward>)



 93%|█████████▎| 53/57 [00:54<00:04,  1.04s/it][A

loss: tensor(0.6951, device='cuda:0', grad_fn=<NllLossBackward>)



 95%|█████████▍| 54/57 [00:55<00:03,  1.03s/it][A

loss: tensor(0.6303, device='cuda:0', grad_fn=<NllLossBackward>)



 96%|█████████▋| 55/57 [00:56<00:02,  1.01s/it][A

loss: tensor(0.8599, device='cuda:0', grad_fn=<NllLossBackward>)



 98%|█████████▊| 56/57 [00:57<00:01,  1.04s/it][A

loss: tensor(0.6245, device='cuda:0', grad_fn=<NllLossBackward>)



100%|██████████| 57/57 [00:58<00:00,  1.03s/it][A

  0%|          | 0/29 [00:00<?, ?it/s][A
  3%|▎         | 1/29 [00:00<00:04,  6.56it/s][A

loss: tensor(0.4700, device='cuda:0', grad_fn=<NllLossBackward>)

	Training Loss: 0.6753110284345192

	Training acc: 0.8143681086779032

	Training prec: 0.3593296117929779

	Training rec: 0.25131039771340796

	Training f1: 0.2558493882569904

	Current Learning rate:  0.0008857142857142857



  7%|▋         | 2/29 [00:00<00:04,  6.44it/s][A
 10%|█         | 3/29 [00:00<00:04,  6.34it/s][A
 14%|█▍        | 4/29 [00:00<00:03,  6.43it/s][A
 17%|█▋        | 5/29 [00:00<00:03,  6.48it/s][A
 21%|██        | 6/29 [00:00<00:03,  6.50it/s][A
 24%|██▍       | 7/29 [00:01<00:03,  6.51it/s][A
 28%|██▊       | 8/29 [00:01<00:03,  6.58it/s][A
 31%|███       | 9/29 [00:01<00:03,  6.59it/s][A
 34%|███▍      | 10/29 [00:01<00:02,  6.57it/s][A
 38%|███▊      | 11/29 [00:01<00:02,  6.55it/s][A
 41%|████▏     | 12/29 [00:01<00:02,  6.55it/s][A
 45%|████▍     | 13/29 [00:01<00:02,  6.56it/s][A
 48%|████▊     | 14/29 [00:02<00:02,  6.61it/s][A
 52%|█████▏    | 15/29 [00:02<00:02,  6.64it/s][A
 55%|█████▌    | 16/29 [00:02<00:01,  6.65it/s][A
 59%|█████▊    | 17/29 [00:02<00:01,  6.65it/s][A
 62%|██████▏   | 18/29 [00:02<00:01,  6.65it/s][A
 66%|██████▌   | 19/29 [00:02<00:01,  6.56it/s][A
 69%|██████▉   | 20/29 [00:03<00:01,  6.48it/s][A
 72%|███████▏  | 21/29 [00:03<00:01,  


	Validation Loss: 0.7172224388040346

	Validation acc: 0.7950056821897565

	Validation prec: 0.3191282863169468

	Validation rec: 0.26655166451298845

	Validation f1: 0.2665725473066811



  2%|▏         | 1/57 [00:01<01:01,  1.10s/it][A

loss: tensor(0.6820, device='cuda:0', grad_fn=<NllLossBackward>)



  4%|▎         | 2/57 [00:02<00:57,  1.04s/it][A

loss: tensor(0.7389, device='cuda:0', grad_fn=<NllLossBackward>)



  5%|▌         | 3/57 [00:03<00:58,  1.09s/it][A

loss: tensor(0.5860, device='cuda:0', grad_fn=<NllLossBackward>)



  7%|▋         | 4/57 [00:04<00:55,  1.05s/it][A

loss: tensor(0.6731, device='cuda:0', grad_fn=<NllLossBackward>)



  9%|▉         | 5/57 [00:05<00:56,  1.08s/it][A

loss: tensor(0.6428, device='cuda:0', grad_fn=<NllLossBackward>)



 11%|█         | 6/57 [00:06<00:54,  1.07s/it][A

loss: tensor(0.6280, device='cuda:0', grad_fn=<NllLossBackward>)



 12%|█▏        | 7/57 [00:07<00:52,  1.04s/it][A

loss: tensor(0.6942, device='cuda:0', grad_fn=<NllLossBackward>)



 14%|█▍        | 8/57 [00:08<00:52,  1.07s/it][A

loss: tensor(0.8655, device='cuda:0', grad_fn=<NllLossBackward>)



 16%|█▌        | 9/57 [00:09<00:50,  1.06s/it][A

loss: tensor(0.5223, device='cuda:0', grad_fn=<NllLossBackward>)



 18%|█▊        | 10/57 [00:10<00:51,  1.10s/it][A

loss: tensor(0.5914, device='cuda:0', grad_fn=<NllLossBackward>)



 19%|█▉        | 11/57 [00:11<00:49,  1.07s/it][A

loss: tensor(0.6493, device='cuda:0', grad_fn=<NllLossBackward>)



 21%|██        | 12/57 [00:12<00:47,  1.05s/it][A

loss: tensor(0.7972, device='cuda:0', grad_fn=<NllLossBackward>)



 23%|██▎       | 13/57 [00:13<00:47,  1.07s/it][A

loss: tensor(0.7826, device='cuda:0', grad_fn=<NllLossBackward>)



 25%|██▍       | 14/57 [00:14<00:45,  1.05s/it][A

loss: tensor(0.5609, device='cuda:0', grad_fn=<NllLossBackward>)



 26%|██▋       | 15/57 [00:16<00:45,  1.07s/it][A

loss: tensor(0.8801, device='cuda:0', grad_fn=<NllLossBackward>)



 28%|██▊       | 16/57 [00:16<00:42,  1.04s/it][A

loss: tensor(0.5158, device='cuda:0', grad_fn=<NllLossBackward>)



 30%|██▉       | 17/57 [00:18<00:42,  1.06s/it][A

loss: tensor(0.5650, device='cuda:0', grad_fn=<NllLossBackward>)



 32%|███▏      | 18/57 [00:19<00:40,  1.05s/it][A

loss: tensor(0.7145, device='cuda:0', grad_fn=<NllLossBackward>)



 33%|███▎      | 19/57 [00:20<00:39,  1.04s/it][A

loss: tensor(0.6702, device='cuda:0', grad_fn=<NllLossBackward>)



 35%|███▌      | 20/57 [00:21<00:39,  1.08s/it][A

loss: tensor(0.6137, device='cuda:0', grad_fn=<NllLossBackward>)



 37%|███▋      | 21/57 [00:22<00:37,  1.05s/it][A

loss: tensor(0.8851, device='cuda:0', grad_fn=<NllLossBackward>)



 39%|███▊      | 22/57 [00:23<00:37,  1.07s/it][A

loss: tensor(0.5579, device='cuda:0', grad_fn=<NllLossBackward>)



 40%|████      | 23/57 [00:24<00:35,  1.05s/it][A

loss: tensor(0.5320, device='cuda:0', grad_fn=<NllLossBackward>)



 42%|████▏     | 24/57 [00:25<00:35,  1.09s/it][A

loss: tensor(0.5709, device='cuda:0', grad_fn=<NllLossBackward>)



 44%|████▍     | 25/57 [00:26<00:34,  1.07s/it][A

loss: tensor(0.7612, device='cuda:0', grad_fn=<NllLossBackward>)



 46%|████▌     | 26/57 [00:27<00:33,  1.07s/it][A

loss: tensor(0.6015, device='cuda:0', grad_fn=<NllLossBackward>)



 47%|████▋     | 27/57 [00:29<00:34,  1.15s/it][A

loss: tensor(0.5753, device='cuda:0', grad_fn=<NllLossBackward>)



 49%|████▉     | 28/57 [00:30<00:32,  1.11s/it][A

loss: tensor(0.6403, device='cuda:0', grad_fn=<NllLossBackward>)



 51%|█████     | 29/57 [00:31<00:32,  1.15s/it][A

loss: tensor(0.7119, device='cuda:0', grad_fn=<NllLossBackward>)



 53%|█████▎    | 30/57 [00:32<00:31,  1.16s/it][A

loss: tensor(0.5846, device='cuda:0', grad_fn=<NllLossBackward>)



 54%|█████▍    | 31/57 [00:33<00:30,  1.16s/it][A

loss: tensor(0.6655, device='cuda:0', grad_fn=<NllLossBackward>)



 56%|█████▌    | 32/57 [00:34<00:27,  1.11s/it][A

loss: tensor(0.7262, device='cuda:0', grad_fn=<NllLossBackward>)



 58%|█████▊    | 33/57 [00:35<00:26,  1.09s/it][A

loss: tensor(0.6768, device='cuda:0', grad_fn=<NllLossBackward>)



 60%|█████▉    | 34/57 [00:36<00:25,  1.10s/it][A

loss: tensor(0.7268, device='cuda:0', grad_fn=<NllLossBackward>)



 61%|██████▏   | 35/57 [00:37<00:24,  1.10s/it][A

loss: tensor(0.5380, device='cuda:0', grad_fn=<NllLossBackward>)



 63%|██████▎   | 36/57 [00:39<00:24,  1.15s/it][A

loss: tensor(0.6414, device='cuda:0', grad_fn=<NllLossBackward>)



 65%|██████▍   | 37/57 [00:40<00:22,  1.12s/it][A

loss: tensor(0.6481, device='cuda:0', grad_fn=<NllLossBackward>)



 67%|██████▋   | 38/57 [00:41<00:20,  1.08s/it][A

loss: tensor(0.6079, device='cuda:0', grad_fn=<NllLossBackward>)



 68%|██████▊   | 39/57 [00:42<00:19,  1.09s/it][A

loss: tensor(0.4879, device='cuda:0', grad_fn=<NllLossBackward>)



 70%|███████   | 40/57 [00:43<00:17,  1.05s/it][A

loss: tensor(0.8681, device='cuda:0', grad_fn=<NllLossBackward>)



 72%|███████▏  | 41/57 [00:44<00:17,  1.06s/it][A

loss: tensor(0.5977, device='cuda:0', grad_fn=<NllLossBackward>)



 74%|███████▎  | 42/57 [00:45<00:15,  1.03s/it][A

loss: tensor(0.7781, device='cuda:0', grad_fn=<NllLossBackward>)


  _warn_prf(average, modifier, msg_start, len(result))

 75%|███████▌  | 43/57 [00:46<00:14,  1.06s/it][A

loss: tensor(0.5615, device='cuda:0', grad_fn=<NllLossBackward>)



 77%|███████▋  | 44/57 [00:47<00:13,  1.03s/it][A

loss: tensor(0.7684, device='cuda:0', grad_fn=<NllLossBackward>)



 79%|███████▉  | 45/57 [00:48<00:12,  1.01s/it][A

loss: tensor(0.8543, device='cuda:0', grad_fn=<NllLossBackward>)



 81%|████████  | 46/57 [00:49<00:11,  1.03s/it][A

loss: tensor(0.7349, device='cuda:0', grad_fn=<NllLossBackward>)



 82%|████████▏ | 47/57 [00:50<00:10,  1.01s/it][A

loss: tensor(0.7126, device='cuda:0', grad_fn=<NllLossBackward>)



 84%|████████▍ | 48/57 [00:51<00:09,  1.03s/it][A

loss: tensor(0.7261, device='cuda:0', grad_fn=<NllLossBackward>)



 86%|████████▌ | 49/57 [00:52<00:08,  1.01s/it][A

loss: tensor(0.6011, device='cuda:0', grad_fn=<NllLossBackward>)



 88%|████████▊ | 50/57 [00:53<00:07,  1.03s/it][A

loss: tensor(0.4476, device='cuda:0', grad_fn=<NllLossBackward>)



 89%|████████▉ | 51/57 [00:54<00:06,  1.00s/it][A

loss: tensor(0.5298, device='cuda:0', grad_fn=<NllLossBackward>)



 91%|█████████ | 52/57 [00:55<00:04,  1.01it/s][A

loss: tensor(0.7273, device='cuda:0', grad_fn=<NllLossBackward>)



 93%|█████████▎| 53/57 [00:56<00:04,  1.01s/it][A

loss: tensor(0.6326, device='cuda:0', grad_fn=<NllLossBackward>)



 95%|█████████▍| 54/57 [00:57<00:03,  1.00s/it][A

loss: tensor(0.7104, device='cuda:0', grad_fn=<NllLossBackward>)



 96%|█████████▋| 55/57 [00:58<00:02,  1.03s/it][A

loss: tensor(0.6345, device='cuda:0', grad_fn=<NllLossBackward>)



 98%|█████████▊| 56/57 [00:59<00:01,  1.02s/it][A

loss: tensor(0.7864, device='cuda:0', grad_fn=<NllLossBackward>)



100%|██████████| 57/57 [01:00<00:00,  1.06s/it][A

  0%|          | 0/29 [00:00<?, ?it/s][A
  3%|▎         | 1/29 [00:00<00:03,  7.12it/s][A

loss: tensor(0.8289, device='cuda:0', grad_fn=<NllLossBackward>)

	Training Loss: 0.6669005589526996

	Training acc: 0.813014835756027

	Training prec: 0.37424127943877256

	Training rec: 0.25085154114944375

	Training f1: 0.2573032504823149

	Current Learning rate:  0.0008571428571428571



  7%|▋         | 2/29 [00:00<00:03,  7.17it/s][A
 10%|█         | 3/29 [00:00<00:03,  7.18it/s][A
 14%|█▍        | 4/29 [00:00<00:04,  5.44it/s][A
 17%|█▋        | 5/29 [00:00<00:04,  5.96it/s][A
 21%|██        | 6/29 [00:00<00:03,  6.34it/s][A
 24%|██▍       | 7/29 [00:01<00:03,  6.60it/s][A
 28%|██▊       | 8/29 [00:01<00:03,  6.68it/s][A
 31%|███       | 9/29 [00:01<00:02,  6.72it/s][A
 34%|███▍      | 10/29 [00:01<00:02,  6.76it/s][A
 38%|███▊      | 11/29 [00:01<00:02,  6.70it/s][A
 41%|████▏     | 12/29 [00:01<00:02,  6.70it/s][A
 45%|████▍     | 13/29 [00:01<00:02,  6.73it/s][A
 48%|████▊     | 14/29 [00:02<00:02,  6.77it/s][A
 52%|█████▏    | 15/29 [00:02<00:02,  6.79it/s][A
 55%|█████▌    | 16/29 [00:02<00:01,  6.80it/s][A
 59%|█████▊    | 17/29 [00:02<00:01,  6.71it/s][A
 62%|██████▏   | 18/29 [00:02<00:01,  6.53it/s][A
 66%|██████▌   | 19/29 [00:02<00:01,  6.44it/s][A
 69%|██████▉   | 20/29 [00:03<00:01,  6.56it/s][A
 72%|███████▏  | 21/29 [00:03<00:01,  


	Validation Loss: 0.6890622809015471

	Validation acc: 0.8024803286861315

	Validation prec: 0.34354837347594946

	Validation rec: 0.2793674324703808

	Validation f1: 0.2815262594883792



  2%|▏         | 1/57 [00:00<00:53,  1.05it/s][A

loss: tensor(0.7400, device='cuda:0', grad_fn=<NllLossBackward>)



  4%|▎         | 2/57 [00:02<00:56,  1.03s/it][A

loss: tensor(0.6730, device='cuda:0', grad_fn=<NllLossBackward>)



  5%|▌         | 3/57 [00:03<00:54,  1.00s/it][A

loss: tensor(0.7500, device='cuda:0', grad_fn=<NllLossBackward>)



  7%|▋         | 4/57 [00:03<00:52,  1.01it/s][A

loss: tensor(0.7481, device='cuda:0', grad_fn=<NllLossBackward>)



  9%|▉         | 5/57 [00:05<00:53,  1.03s/it][A

loss: tensor(0.7298, device='cuda:0', grad_fn=<NllLossBackward>)



 11%|█         | 6/57 [00:06<00:51,  1.00s/it][A

loss: tensor(0.7497, device='cuda:0', grad_fn=<NllLossBackward>)



 12%|█▏        | 7/57 [00:07<00:51,  1.02s/it][A

loss: tensor(0.6520, device='cuda:0', grad_fn=<NllLossBackward>)



 14%|█▍        | 8/57 [00:08<00:48,  1.00it/s][A

loss: tensor(0.5939, device='cuda:0', grad_fn=<NllLossBackward>)



 16%|█▌        | 9/57 [00:09<00:48,  1.02s/it][A

loss: tensor(0.6326, device='cuda:0', grad_fn=<NllLossBackward>)



 18%|█▊        | 10/57 [00:10<00:46,  1.01it/s][A

loss: tensor(0.5988, device='cuda:0', grad_fn=<NllLossBackward>)



 19%|█▉        | 11/57 [00:10<00:44,  1.02it/s][A

loss: tensor(0.5690, device='cuda:0', grad_fn=<NllLossBackward>)



 21%|██        | 12/57 [00:12<00:45,  1.01s/it][A

loss: tensor(0.5926, device='cuda:0', grad_fn=<NllLossBackward>)



 23%|██▎       | 13/57 [00:12<00:43,  1.02it/s][A

loss: tensor(0.5846, device='cuda:0', grad_fn=<NllLossBackward>)



 25%|██▍       | 14/57 [00:14<00:43,  1.01s/it][A

loss: tensor(0.7547, device='cuda:0', grad_fn=<NllLossBackward>)



 26%|██▋       | 15/57 [00:14<00:41,  1.01it/s][A

loss: tensor(0.6834, device='cuda:0', grad_fn=<NllLossBackward>)



 28%|██▊       | 16/57 [00:15<00:39,  1.03it/s][A

loss: tensor(0.4843, device='cuda:0', grad_fn=<NllLossBackward>)



 30%|██▉       | 17/57 [00:16<00:39,  1.00it/s][A

loss: tensor(0.7445, device='cuda:0', grad_fn=<NllLossBackward>)



 32%|███▏      | 18/57 [00:17<00:38,  1.01it/s][A

loss: tensor(0.6354, device='cuda:0', grad_fn=<NllLossBackward>)



 33%|███▎      | 19/57 [00:19<00:38,  1.03s/it][A

loss: tensor(0.6053, device='cuda:0', grad_fn=<NllLossBackward>)



 35%|███▌      | 20/57 [00:20<00:39,  1.07s/it][A

loss: tensor(0.6673, device='cuda:0', grad_fn=<NllLossBackward>)



 37%|███▋      | 21/57 [00:21<00:39,  1.09s/it][A

loss: tensor(0.6936, device='cuda:0', grad_fn=<NllLossBackward>)



 39%|███▊      | 22/57 [00:22<00:37,  1.06s/it][A

loss: tensor(0.5903, device='cuda:0', grad_fn=<NllLossBackward>)



 40%|████      | 23/57 [00:23<00:35,  1.04s/it][A

loss: tensor(0.7331, device='cuda:0', grad_fn=<NllLossBackward>)



 42%|████▏     | 24/57 [00:24<00:35,  1.08s/it][A

loss: tensor(0.8031, device='cuda:0', grad_fn=<NllLossBackward>)



 44%|████▍     | 25/57 [00:25<00:33,  1.05s/it][A

loss: tensor(0.6531, device='cuda:0', grad_fn=<NllLossBackward>)



 46%|████▌     | 26/57 [00:26<00:33,  1.08s/it][A

loss: tensor(0.5486, device='cuda:0', grad_fn=<NllLossBackward>)



 47%|████▋     | 27/57 [00:27<00:32,  1.07s/it][A

loss: tensor(0.7893, device='cuda:0', grad_fn=<NllLossBackward>)



 49%|████▉     | 28/57 [00:28<00:31,  1.09s/it][A

loss: tensor(0.6110, device='cuda:0', grad_fn=<NllLossBackward>)



 51%|█████     | 29/57 [00:29<00:29,  1.06s/it][A

loss: tensor(0.6377, device='cuda:0', grad_fn=<NllLossBackward>)



 53%|█████▎    | 30/57 [00:30<00:27,  1.03s/it][A

loss: tensor(0.7824, device='cuda:0', grad_fn=<NllLossBackward>)



 54%|█████▍    | 31/57 [00:31<00:27,  1.04s/it][A

loss: tensor(0.7641, device='cuda:0', grad_fn=<NllLossBackward>)



 56%|█████▌    | 32/57 [00:32<00:25,  1.01s/it][A

loss: tensor(0.6734, device='cuda:0', grad_fn=<NllLossBackward>)



 58%|█████▊    | 33/57 [00:33<00:24,  1.03s/it][A

loss: tensor(0.5337, device='cuda:0', grad_fn=<NllLossBackward>)



 60%|█████▉    | 34/57 [00:34<00:23,  1.00s/it][A

loss: tensor(0.6095, device='cuda:0', grad_fn=<NllLossBackward>)



 61%|██████▏   | 35/57 [00:35<00:22,  1.02s/it][A

loss: tensor(0.7456, device='cuda:0', grad_fn=<NllLossBackward>)



 63%|██████▎   | 36/57 [00:36<00:21,  1.02s/it][A

loss: tensor(0.5067, device='cuda:0', grad_fn=<NllLossBackward>)



 65%|██████▍   | 37/57 [00:37<00:20,  1.00s/it][A

loss: tensor(0.6176, device='cuda:0', grad_fn=<NllLossBackward>)



 67%|██████▋   | 38/57 [00:38<00:19,  1.02s/it][A

loss: tensor(0.6050, device='cuda:0', grad_fn=<NllLossBackward>)



 68%|██████▊   | 39/57 [00:39<00:18,  1.02s/it][A

loss: tensor(0.6296, device='cuda:0', grad_fn=<NllLossBackward>)



 70%|███████   | 40/57 [00:40<00:17,  1.03s/it][A

loss: tensor(0.7778, device='cuda:0', grad_fn=<NllLossBackward>)



 72%|███████▏  | 41/57 [00:41<00:16,  1.01s/it][A

loss: tensor(0.6620, device='cuda:0', grad_fn=<NllLossBackward>)



 74%|███████▎  | 42/57 [00:42<00:14,  1.01it/s][A

loss: tensor(0.7091, device='cuda:0', grad_fn=<NllLossBackward>)



 75%|███████▌  | 43/57 [00:43<00:14,  1.02s/it][A

loss: tensor(0.5045, device='cuda:0', grad_fn=<NllLossBackward>)



 77%|███████▋  | 44/57 [00:44<00:13,  1.02s/it][A

loss: tensor(0.5147, device='cuda:0', grad_fn=<NllLossBackward>)



 79%|███████▉  | 45/57 [00:46<00:12,  1.05s/it][A

loss: tensor(0.8647, device='cuda:0', grad_fn=<NllLossBackward>)



 81%|████████  | 46/57 [00:47<00:11,  1.03s/it][A

loss: tensor(0.7482, device='cuda:0', grad_fn=<NllLossBackward>)



 82%|████████▏ | 47/57 [00:48<00:10,  1.06s/it][A

loss: tensor(0.6227, device='cuda:0', grad_fn=<NllLossBackward>)



 84%|████████▍ | 48/57 [00:49<00:09,  1.04s/it][A

loss: tensor(0.7472, device='cuda:0', grad_fn=<NllLossBackward>)



 86%|████████▌ | 49/57 [00:50<00:08,  1.03s/it][A

loss: tensor(0.6835, device='cuda:0', grad_fn=<NllLossBackward>)



 88%|████████▊ | 50/57 [00:51<00:07,  1.09s/it][A

loss: tensor(0.6335, device='cuda:0', grad_fn=<NllLossBackward>)



 89%|████████▉ | 51/57 [00:52<00:06,  1.12s/it][A

loss: tensor(0.7924, device='cuda:0', grad_fn=<NllLossBackward>)



 91%|█████████ | 52/57 [00:53<00:05,  1.11s/it][A

loss: tensor(0.6506, device='cuda:0', grad_fn=<NllLossBackward>)



 93%|█████████▎| 53/57 [00:54<00:04,  1.08s/it][A

loss: tensor(0.6051, device='cuda:0', grad_fn=<NllLossBackward>)



 95%|█████████▍| 54/57 [00:55<00:03,  1.09s/it][A

loss: tensor(0.5716, device='cuda:0', grad_fn=<NllLossBackward>)



 96%|█████████▋| 55/57 [00:56<00:02,  1.06s/it][A

loss: tensor(0.6362, device='cuda:0', grad_fn=<NllLossBackward>)



 98%|█████████▊| 56/57 [00:57<00:01,  1.03s/it][A

loss: tensor(0.6929, device='cuda:0', grad_fn=<NllLossBackward>)



100%|██████████| 57/57 [00:58<00:00,  1.03s/it][A

  0%|          | 0/29 [00:00<?, ?it/s][A
  3%|▎         | 1/29 [00:00<00:04,  5.94it/s][A

loss: tensor(0.7223, device='cuda:0', grad_fn=<NllLossBackward>)

	Training Loss: 0.6641339424409365

	Training acc: 0.8140523912017466

	Training prec: 0.39618652980049873

	Training rec: 0.25643003187568536

	Training f1: 0.2651539674398589

	Current Learning rate:  0.0008285714285714286



  7%|▋         | 2/29 [00:00<00:04,  5.96it/s][A
 10%|█         | 3/29 [00:00<00:04,  5.93it/s][A
 14%|█▍        | 4/29 [00:00<00:04,  6.07it/s][A
 17%|█▋        | 5/29 [00:00<00:03,  6.35it/s][A
 21%|██        | 6/29 [00:00<00:03,  6.41it/s][A
 24%|██▍       | 7/29 [00:01<00:03,  6.10it/s][A
 28%|██▊       | 8/29 [00:01<00:03,  5.78it/s][A
 31%|███       | 9/29 [00:01<00:03,  5.79it/s][A
 34%|███▍      | 10/29 [00:01<00:03,  5.79it/s][A
 38%|███▊      | 11/29 [00:01<00:03,  5.86it/s][A
 41%|████▏     | 12/29 [00:02<00:02,  6.02it/s][A
 45%|████▍     | 13/29 [00:02<00:02,  6.23it/s][A
 48%|████▊     | 14/29 [00:02<00:02,  6.04it/s][A
 52%|█████▏    | 15/29 [00:02<00:02,  5.61it/s][A
 55%|█████▌    | 16/29 [00:02<00:02,  5.48it/s][A
 59%|█████▊    | 17/29 [00:02<00:02,  5.74it/s][A
 62%|██████▏   | 18/29 [00:03<00:01,  5.89it/s][A
 66%|██████▌   | 19/29 [00:03<00:01,  6.03it/s][A
 69%|██████▉   | 20/29 [00:03<00:01,  6.09it/s][A
 72%|███████▏  | 21/29 [00:03<00:01,  


	Validation Loss: 0.7342769328890175

	Validation acc: 0.7965035250693275

	Validation prec: 0.25931231736790794

	Validation rec: 0.2342250751633978

	Validation f1: 0.22743732324044988



  2%|▏         | 1/57 [00:01<00:58,  1.05s/it][A

loss: tensor(0.7761, device='cuda:0', grad_fn=<NllLossBackward>)



  4%|▎         | 2/57 [00:02<01:00,  1.10s/it][A

loss: tensor(0.5862, device='cuda:0', grad_fn=<NllLossBackward>)



  5%|▌         | 3/57 [00:03<00:56,  1.04s/it][A

loss: tensor(0.5875, device='cuda:0', grad_fn=<NllLossBackward>)



  7%|▋         | 4/57 [00:04<00:56,  1.07s/it][A

loss: tensor(0.6598, device='cuda:0', grad_fn=<NllLossBackward>)



  9%|▉         | 5/57 [00:05<00:54,  1.04s/it][A

loss: tensor(0.7493, device='cuda:0', grad_fn=<NllLossBackward>)



 11%|█         | 6/57 [00:06<00:56,  1.10s/it][A

loss: tensor(0.7664, device='cuda:0', grad_fn=<NllLossBackward>)



 12%|█▏        | 7/57 [00:07<00:55,  1.11s/it][A

loss: tensor(0.6513, device='cuda:0', grad_fn=<NllLossBackward>)



 14%|█▍        | 8/57 [00:08<00:53,  1.09s/it][A

loss: tensor(0.6361, device='cuda:0', grad_fn=<NllLossBackward>)



 16%|█▌        | 9/57 [00:09<00:51,  1.08s/it][A

loss: tensor(0.5875, device='cuda:0', grad_fn=<NllLossBackward>)



 18%|█▊        | 10/57 [00:10<00:49,  1.05s/it][A

loss: tensor(0.6997, device='cuda:0', grad_fn=<NllLossBackward>)



 19%|█▉        | 11/57 [00:11<00:48,  1.06s/it][A

loss: tensor(0.5899, device='cuda:0', grad_fn=<NllLossBackward>)



 21%|██        | 12/57 [00:12<00:46,  1.04s/it][A

loss: tensor(0.4873, device='cuda:0', grad_fn=<NllLossBackward>)



 23%|██▎       | 13/57 [00:13<00:45,  1.04s/it][A

loss: tensor(0.5605, device='cuda:0', grad_fn=<NllLossBackward>)



 25%|██▍       | 14/57 [00:14<00:43,  1.02s/it][A

loss: tensor(0.6916, device='cuda:0', grad_fn=<NllLossBackward>)



 26%|██▋       | 15/57 [00:15<00:42,  1.01s/it][A

loss: tensor(0.7492, device='cuda:0', grad_fn=<NllLossBackward>)



 28%|██▊       | 16/57 [00:16<00:42,  1.05s/it][A

loss: tensor(0.6693, device='cuda:0', grad_fn=<NllLossBackward>)



 30%|██▉       | 17/57 [00:17<00:41,  1.03s/it][A

loss: tensor(0.5056, device='cuda:0', grad_fn=<NllLossBackward>)



 32%|███▏      | 18/57 [00:18<00:41,  1.05s/it][A

loss: tensor(0.7576, device='cuda:0', grad_fn=<NllLossBackward>)



 33%|███▎      | 19/57 [00:19<00:39,  1.03s/it][A

loss: tensor(0.7290, device='cuda:0', grad_fn=<NllLossBackward>)



 35%|███▌      | 20/57 [00:21<00:38,  1.04s/it][A

loss: tensor(0.6640, device='cuda:0', grad_fn=<NllLossBackward>)



 37%|███▋      | 21/57 [00:22<00:37,  1.05s/it][A

loss: tensor(0.6963, device='cuda:0', grad_fn=<NllLossBackward>)



 39%|███▊      | 22/57 [00:23<00:35,  1.03s/it][A

loss: tensor(0.6823, device='cuda:0', grad_fn=<NllLossBackward>)



 40%|████      | 23/57 [00:24<00:35,  1.04s/it][A

loss: tensor(0.7010, device='cuda:0', grad_fn=<NllLossBackward>)



 42%|████▏     | 24/57 [00:25<00:33,  1.02s/it][A

loss: tensor(0.6942, device='cuda:0', grad_fn=<NllLossBackward>)



 44%|████▍     | 25/57 [00:26<00:33,  1.05s/it][A

loss: tensor(0.6255, device='cuda:0', grad_fn=<NllLossBackward>)



 46%|████▌     | 26/57 [00:27<00:31,  1.01s/it][A

loss: tensor(0.5776, device='cuda:0', grad_fn=<NllLossBackward>)



 47%|████▋     | 27/57 [00:28<00:29,  1.00it/s][A

loss: tensor(0.6522, device='cuda:0', grad_fn=<NllLossBackward>)



 49%|████▉     | 28/57 [00:29<00:29,  1.01s/it][A

loss: tensor(0.6497, device='cuda:0', grad_fn=<NllLossBackward>)



 51%|█████     | 29/57 [00:30<00:27,  1.01it/s][A

loss: tensor(0.6187, device='cuda:0', grad_fn=<NllLossBackward>)



 53%|█████▎    | 30/57 [00:31<00:27,  1.02s/it][A

loss: tensor(0.7798, device='cuda:0', grad_fn=<NllLossBackward>)



 54%|█████▍    | 31/57 [00:32<00:26,  1.00s/it][A

loss: tensor(0.7093, device='cuda:0', grad_fn=<NllLossBackward>)



 56%|█████▌    | 32/57 [00:33<00:25,  1.03s/it][A

loss: tensor(0.6217, device='cuda:0', grad_fn=<NllLossBackward>)



 58%|█████▊    | 33/57 [00:34<00:24,  1.01s/it][A

loss: tensor(0.7061, device='cuda:0', grad_fn=<NllLossBackward>)



 60%|█████▉    | 34/57 [00:35<00:22,  1.00it/s][A

loss: tensor(0.5788, device='cuda:0', grad_fn=<NllLossBackward>)



 61%|██████▏   | 35/57 [00:36<00:22,  1.01s/it][A

loss: tensor(0.5840, device='cuda:0', grad_fn=<NllLossBackward>)



 63%|██████▎   | 36/57 [00:37<00:20,  1.01it/s][A

loss: tensor(0.5789, device='cuda:0', grad_fn=<NllLossBackward>)



 65%|██████▍   | 37/57 [00:38<00:20,  1.03s/it][A

loss: tensor(0.8562, device='cuda:0', grad_fn=<NllLossBackward>)



 67%|██████▋   | 38/57 [00:39<00:19,  1.01s/it][A

loss: tensor(0.5393, device='cuda:0', grad_fn=<NllLossBackward>)



 68%|██████▊   | 39/57 [00:40<00:18,  1.05s/it][A

loss: tensor(0.7306, device='cuda:0', grad_fn=<NllLossBackward>)



 70%|███████   | 40/57 [00:41<00:17,  1.03s/it][A

loss: tensor(0.5587, device='cuda:0', grad_fn=<NllLossBackward>)



 72%|███████▏  | 41/57 [00:42<00:16,  1.02s/it][A

loss: tensor(0.7585, device='cuda:0', grad_fn=<NllLossBackward>)



 74%|███████▎  | 42/57 [00:43<00:15,  1.05s/it][A

loss: tensor(0.6353, device='cuda:0', grad_fn=<NllLossBackward>)



 75%|███████▌  | 43/57 [00:44<00:14,  1.03s/it][A

loss: tensor(0.8379, device='cuda:0', grad_fn=<NllLossBackward>)



 77%|███████▋  | 44/57 [00:45<00:13,  1.05s/it][A

loss: tensor(0.4688, device='cuda:0', grad_fn=<NllLossBackward>)



 79%|███████▉  | 45/57 [00:46<00:12,  1.04s/it][A

loss: tensor(0.6162, device='cuda:0', grad_fn=<NllLossBackward>)



 81%|████████  | 46/57 [00:47<00:11,  1.03s/it][A

loss: tensor(0.6769, device='cuda:0', grad_fn=<NllLossBackward>)



 82%|████████▏ | 47/57 [00:48<00:10,  1.05s/it][A

loss: tensor(0.6884, device='cuda:0', grad_fn=<NllLossBackward>)



 84%|████████▍ | 48/57 [00:49<00:09,  1.04s/it][A

loss: tensor(0.7362, device='cuda:0', grad_fn=<NllLossBackward>)



 86%|████████▌ | 49/57 [00:50<00:08,  1.06s/it][A

loss: tensor(0.5557, device='cuda:0', grad_fn=<NllLossBackward>)



 88%|████████▊ | 50/57 [00:51<00:07,  1.04s/it][A

loss: tensor(0.6160, device='cuda:0', grad_fn=<NllLossBackward>)



 89%|████████▉ | 51/57 [00:52<00:06,  1.05s/it][A

loss: tensor(0.9133, device='cuda:0', grad_fn=<NllLossBackward>)



 91%|█████████ | 52/57 [00:53<00:05,  1.03s/it][A

loss: tensor(0.5116, device='cuda:0', grad_fn=<NllLossBackward>)



 93%|█████████▎| 53/57 [00:54<00:04,  1.00s/it][A

loss: tensor(0.7161, device='cuda:0', grad_fn=<NllLossBackward>)



 95%|█████████▍| 54/57 [00:55<00:03,  1.02s/it][A

loss: tensor(0.7310, device='cuda:0', grad_fn=<NllLossBackward>)



 96%|█████████▋| 55/57 [00:56<00:02,  1.00s/it][A

loss: tensor(0.7530, device='cuda:0', grad_fn=<NllLossBackward>)



 98%|█████████▊| 56/57 [00:57<00:01,  1.02s/it][A

loss: tensor(0.7432, device='cuda:0', grad_fn=<NllLossBackward>)



100%|██████████| 57/57 [00:58<00:00,  1.03s/it][A

  0%|          | 0/29 [00:00<?, ?it/s][A
  3%|▎         | 1/29 [00:00<00:04,  6.68it/s][A

loss: tensor(0.7725, device='cuda:0', grad_fn=<NllLossBackward>)

	Training Loss: 0.6662268283074362

	Training acc: 0.8138193746928999

	Training prec: 0.38325793186896445

	Training rec: 0.25348020141168914

	Training f1: 0.25959153205477287

	Current Learning rate:  0.0008



  7%|▋         | 2/29 [00:00<00:04,  6.66it/s][A
 10%|█         | 3/29 [00:00<00:03,  6.65it/s][A
 14%|█▍        | 4/29 [00:00<00:03,  6.63it/s][A
 17%|█▋        | 5/29 [00:00<00:03,  6.66it/s][A
 21%|██        | 6/29 [00:00<00:03,  6.65it/s][A
 24%|██▍       | 7/29 [00:01<00:03,  6.66it/s][A
 28%|██▊       | 8/29 [00:01<00:03,  6.62it/s][A
 31%|███       | 9/29 [00:01<00:03,  6.60it/s][A
 34%|███▍      | 10/29 [00:01<00:02,  6.56it/s][A
 38%|███▊      | 11/29 [00:01<00:02,  6.58it/s][A
 41%|████▏     | 12/29 [00:01<00:02,  6.59it/s][A
 45%|████▍     | 13/29 [00:01<00:02,  6.60it/s][A
 48%|████▊     | 14/29 [00:02<00:02,  6.61it/s][A
 52%|█████▏    | 15/29 [00:02<00:02,  6.64it/s][A
 55%|█████▌    | 16/29 [00:02<00:01,  6.63it/s][A
 59%|█████▊    | 17/29 [00:02<00:01,  6.64it/s][A
 62%|██████▏   | 18/29 [00:02<00:01,  6.64it/s][A
 66%|██████▌   | 19/29 [00:02<00:01,  6.64it/s][A
 69%|██████▉   | 20/29 [00:03<00:01,  6.62it/s][A
 72%|███████▏  | 21/29 [00:03<00:01,  


	Validation Loss: 0.7030956149101257

	Validation acc: 0.7967459881370774

	Validation prec: 0.3503620109360978

	Validation rec: 0.26021592790096126

	Validation f1: 0.2647008034669893



  2%|▏         | 1/57 [00:01<00:59,  1.07s/it][A

loss: tensor(0.4883, device='cuda:0', grad_fn=<NllLossBackward>)



  4%|▎         | 2/57 [00:02<00:55,  1.00s/it][A

loss: tensor(0.6143, device='cuda:0', grad_fn=<NllLossBackward>)



  5%|▌         | 3/57 [00:03<00:55,  1.04s/it][A

loss: tensor(0.5715, device='cuda:0', grad_fn=<NllLossBackward>)



  7%|▋         | 4/57 [00:04<00:53,  1.01s/it][A

loss: tensor(0.4753, device='cuda:0', grad_fn=<NllLossBackward>)



  9%|▉         | 5/57 [00:05<00:51,  1.01it/s][A

loss: tensor(0.6338, device='cuda:0', grad_fn=<NllLossBackward>)



 11%|█         | 6/57 [00:06<00:51,  1.02s/it][A

loss: tensor(0.6635, device='cuda:0', grad_fn=<NllLossBackward>)



 12%|█▏        | 7/57 [00:07<00:49,  1.00it/s][A

loss: tensor(0.6414, device='cuda:0', grad_fn=<NllLossBackward>)



 14%|█▍        | 8/57 [00:08<00:50,  1.02s/it][A

loss: tensor(0.6351, device='cuda:0', grad_fn=<NllLossBackward>)



 16%|█▌        | 9/57 [00:09<00:47,  1.00it/s][A

loss: tensor(0.6295, device='cuda:0', grad_fn=<NllLossBackward>)



 18%|█▊        | 10/57 [00:10<00:47,  1.02s/it][A

loss: tensor(0.6752, device='cuda:0', grad_fn=<NllLossBackward>)



 19%|█▉        | 11/57 [00:11<00:45,  1.00it/s][A

loss: tensor(0.5679, device='cuda:0', grad_fn=<NllLossBackward>)



 21%|██        | 12/57 [00:12<00:44,  1.02it/s][A

loss: tensor(0.5663, device='cuda:0', grad_fn=<NllLossBackward>)



 23%|██▎       | 13/57 [00:13<00:44,  1.01s/it][A

loss: tensor(0.5401, device='cuda:0', grad_fn=<NllLossBackward>)



 25%|██▍       | 14/57 [00:14<00:42,  1.01it/s][A

loss: tensor(0.6654, device='cuda:0', grad_fn=<NllLossBackward>)



 26%|██▋       | 15/57 [00:15<00:42,  1.02s/it][A

loss: tensor(0.7809, device='cuda:0', grad_fn=<NllLossBackward>)



 28%|██▊       | 16/57 [00:16<00:40,  1.00it/s][A

loss: tensor(0.6987, device='cuda:0', grad_fn=<NllLossBackward>)



 30%|██▉       | 17/57 [00:17<00:40,  1.01s/it][A

loss: tensor(0.7554, device='cuda:0', grad_fn=<NllLossBackward>)



 32%|███▏      | 18/57 [00:18<00:38,  1.01it/s][A

loss: tensor(0.6822, device='cuda:0', grad_fn=<NllLossBackward>)



 33%|███▎      | 19/57 [00:19<00:37,  1.02it/s][A

loss: tensor(0.5518, device='cuda:0', grad_fn=<NllLossBackward>)



 35%|███▌      | 20/57 [00:20<00:37,  1.01s/it][A

loss: tensor(0.7247, device='cuda:0', grad_fn=<NllLossBackward>)



 37%|███▋      | 21/57 [00:21<00:35,  1.01it/s][A

loss: tensor(0.6146, device='cuda:0', grad_fn=<NllLossBackward>)



 39%|███▊      | 22/57 [00:22<00:35,  1.02s/it][A

loss: tensor(0.6573, device='cuda:0', grad_fn=<NllLossBackward>)



 40%|████      | 23/57 [00:23<00:33,  1.00it/s][A

loss: tensor(0.5349, device='cuda:0', grad_fn=<NllLossBackward>)



 42%|████▏     | 24/57 [00:24<00:32,  1.02it/s][A

loss: tensor(0.7983, device='cuda:0', grad_fn=<NllLossBackward>)



 44%|████▍     | 25/57 [00:25<00:32,  1.01s/it][A

loss: tensor(0.5394, device='cuda:0', grad_fn=<NllLossBackward>)



 46%|████▌     | 26/57 [00:26<00:30,  1.01it/s][A

loss: tensor(0.7990, device='cuda:0', grad_fn=<NllLossBackward>)



 47%|████▋     | 27/57 [00:27<00:30,  1.02s/it][A

loss: tensor(0.7487, device='cuda:0', grad_fn=<NllLossBackward>)



 49%|████▉     | 28/57 [00:28<00:28,  1.00it/s][A

loss: tensor(0.6873, device='cuda:0', grad_fn=<NllLossBackward>)



 51%|█████     | 29/57 [00:29<00:28,  1.02s/it][A

loss: tensor(0.7769, device='cuda:0', grad_fn=<NllLossBackward>)



 53%|█████▎    | 30/57 [00:30<00:26,  1.00it/s][A

loss: tensor(0.7433, device='cuda:0', grad_fn=<NllLossBackward>)



 54%|█████▍    | 31/57 [00:31<00:25,  1.02it/s][A

loss: tensor(0.6328, device='cuda:0', grad_fn=<NllLossBackward>)



 56%|█████▌    | 32/57 [00:32<00:25,  1.01s/it][A

loss: tensor(0.6159, device='cuda:0', grad_fn=<NllLossBackward>)



 58%|█████▊    | 33/57 [00:33<00:23,  1.01it/s][A

loss: tensor(0.7046, device='cuda:0', grad_fn=<NllLossBackward>)



 60%|█████▉    | 34/57 [00:34<00:23,  1.02s/it][A

loss: tensor(0.6449, device='cuda:0', grad_fn=<NllLossBackward>)



 61%|██████▏   | 35/57 [00:35<00:21,  1.00it/s][A

loss: tensor(0.6605, device='cuda:0', grad_fn=<NllLossBackward>)



 63%|██████▎   | 36/57 [00:36<00:21,  1.02s/it][A

loss: tensor(0.6117, device='cuda:0', grad_fn=<NllLossBackward>)



 65%|██████▍   | 37/57 [00:37<00:19,  1.00it/s][A

loss: tensor(0.7081, device='cuda:0', grad_fn=<NllLossBackward>)



 67%|██████▋   | 38/57 [00:38<00:18,  1.02it/s][A

loss: tensor(0.7378, device='cuda:0', grad_fn=<NllLossBackward>)



 68%|██████▊   | 39/57 [00:39<00:18,  1.01s/it][A

loss: tensor(0.7610, device='cuda:0', grad_fn=<NllLossBackward>)



 70%|███████   | 40/57 [00:40<00:16,  1.01it/s][A

loss: tensor(0.5613, device='cuda:0', grad_fn=<NllLossBackward>)



 72%|███████▏  | 41/57 [00:41<00:16,  1.01s/it][A

loss: tensor(0.6624, device='cuda:0', grad_fn=<NllLossBackward>)



 74%|███████▎  | 42/57 [00:42<00:14,  1.01it/s][A

loss: tensor(0.6304, device='cuda:0', grad_fn=<NllLossBackward>)



 75%|███████▌  | 43/57 [00:43<00:13,  1.02it/s][A

loss: tensor(0.6780, device='cuda:0', grad_fn=<NllLossBackward>)



 77%|███████▋  | 44/57 [00:44<00:13,  1.01s/it][A

loss: tensor(0.5836, device='cuda:0', grad_fn=<NllLossBackward>)



 79%|███████▉  | 45/57 [00:45<00:11,  1.01it/s][A

loss: tensor(0.5200, device='cuda:0', grad_fn=<NllLossBackward>)



 81%|████████  | 46/57 [00:46<00:11,  1.01s/it][A

loss: tensor(0.6105, device='cuda:0', grad_fn=<NllLossBackward>)



 82%|████████▏ | 47/57 [00:47<00:09,  1.01it/s][A

loss: tensor(0.7490, device='cuda:0', grad_fn=<NllLossBackward>)



 84%|████████▍ | 48/57 [00:48<00:09,  1.01s/it][A

loss: tensor(0.6756, device='cuda:0', grad_fn=<NllLossBackward>)



 86%|████████▌ | 49/57 [00:49<00:07,  1.01it/s][A

loss: tensor(0.8342, device='cuda:0', grad_fn=<NllLossBackward>)



 88%|████████▊ | 50/57 [00:50<00:06,  1.02it/s][A

loss: tensor(0.7202, device='cuda:0', grad_fn=<NllLossBackward>)



 89%|████████▉ | 51/57 [00:51<00:06,  1.01s/it][A

loss: tensor(0.4913, device='cuda:0', grad_fn=<NllLossBackward>)



 91%|█████████ | 52/57 [00:52<00:04,  1.01it/s][A

loss: tensor(0.8754, device='cuda:0', grad_fn=<NllLossBackward>)



 93%|█████████▎| 53/57 [00:53<00:04,  1.01s/it][A

loss: tensor(0.5916, device='cuda:0', grad_fn=<NllLossBackward>)



 95%|█████████▍| 54/57 [00:54<00:02,  1.01it/s][A

loss: tensor(0.8213, device='cuda:0', grad_fn=<NllLossBackward>)



 96%|█████████▋| 55/57 [00:55<00:02,  1.02s/it][A

loss: tensor(0.6439, device='cuda:0', grad_fn=<NllLossBackward>)



 98%|█████████▊| 56/57 [00:56<00:00,  1.01it/s][A

loss: tensor(0.7115, device='cuda:0', grad_fn=<NllLossBackward>)



100%|██████████| 57/57 [00:56<00:00,  1.00it/s][A

  0%|          | 0/29 [00:00<?, ?it/s][A
  3%|▎         | 1/29 [00:00<00:04,  6.76it/s][A

loss: tensor(0.7400, device='cuda:0', grad_fn=<NllLossBackward>)

	Training Loss: 0.6603302986998307

	Training acc: 0.8147645254723005

	Training prec: 0.388719946498818

	Training rec: 0.2528606251860393

	Training f1: 0.25944382255015164

	Current Learning rate:  0.0007714285714285715



  7%|▋         | 2/29 [00:00<00:04,  6.73it/s][A
 10%|█         | 3/29 [00:00<00:03,  6.74it/s][A
 14%|█▍        | 4/29 [00:00<00:03,  6.77it/s][A
 17%|█▋        | 5/29 [00:00<00:03,  6.78it/s][A
 21%|██        | 6/29 [00:00<00:03,  6.80it/s][A
 24%|██▍       | 7/29 [00:01<00:03,  6.80it/s][A
 28%|██▊       | 8/29 [00:01<00:03,  6.76it/s][A
 31%|███       | 9/29 [00:01<00:02,  6.77it/s][A
 34%|███▍      | 10/29 [00:01<00:02,  6.77it/s][A
 38%|███▊      | 11/29 [00:01<00:02,  6.78it/s][A
 41%|████▏     | 12/29 [00:01<00:02,  6.78it/s][A
 45%|████▍     | 13/29 [00:01<00:02,  6.79it/s][A
 48%|████▊     | 14/29 [00:02<00:02,  6.73it/s][A
 52%|█████▏    | 15/29 [00:02<00:02,  6.72it/s][A
 55%|█████▌    | 16/29 [00:02<00:01,  6.67it/s][A
 59%|█████▊    | 17/29 [00:02<00:01,  6.66it/s][A
 62%|██████▏   | 18/29 [00:02<00:02,  5.42it/s][A
 66%|██████▌   | 19/29 [00:02<00:01,  5.75it/s][A
 69%|██████▉   | 20/29 [00:03<00:01,  5.99it/s][A
 72%|███████▏  | 21/29 [00:03<00:01,  


	Validation Loss: 0.6918191776193422

	Validation acc: 0.8011418227027916

	Validation prec: 0.35015527219114223

	Validation rec: 0.2688230004307011

	Validation f1: 0.27316590992901624



  2%|▏         | 1/57 [00:00<00:53,  1.05it/s][A

loss: tensor(0.8629, device='cuda:0', grad_fn=<NllLossBackward>)



  4%|▎         | 2/57 [00:01<00:55,  1.01s/it][A

loss: tensor(0.6876, device='cuda:0', grad_fn=<NllLossBackward>)



  5%|▌         | 3/57 [00:02<00:53,  1.01it/s][A

loss: tensor(0.8069, device='cuda:0', grad_fn=<NllLossBackward>)



  7%|▋         | 4/57 [00:03<00:51,  1.03it/s][A

loss: tensor(0.5733, device='cuda:0', grad_fn=<NllLossBackward>)



  9%|▉         | 5/57 [00:04<00:52,  1.01s/it][A

loss: tensor(0.5839, device='cuda:0', grad_fn=<NllLossBackward>)



 11%|█         | 6/57 [00:05<00:50,  1.01it/s][A

loss: tensor(0.6665, device='cuda:0', grad_fn=<NllLossBackward>)



 12%|█▏        | 7/57 [00:06<00:50,  1.01s/it][A

loss: tensor(0.7787, device='cuda:0', grad_fn=<NllLossBackward>)



 14%|█▍        | 8/57 [00:07<00:48,  1.01it/s][A

loss: tensor(0.6875, device='cuda:0', grad_fn=<NllLossBackward>)



 16%|█▌        | 9/57 [00:08<00:47,  1.02it/s][A

loss: tensor(0.8434, device='cuda:0', grad_fn=<NllLossBackward>)



 18%|█▊        | 10/57 [00:09<00:47,  1.01s/it][A

loss: tensor(0.8647, device='cuda:0', grad_fn=<NllLossBackward>)



 19%|█▉        | 11/57 [00:10<00:45,  1.01it/s][A

loss: tensor(0.6190, device='cuda:0', grad_fn=<NllLossBackward>)



 21%|██        | 12/57 [00:11<00:45,  1.01s/it][A

loss: tensor(0.6123, device='cuda:0', grad_fn=<NllLossBackward>)



 23%|██▎       | 13/57 [00:12<00:43,  1.01it/s][A

loss: tensor(0.6419, device='cuda:0', grad_fn=<NllLossBackward>)



 25%|██▍       | 14/57 [00:13<00:43,  1.01s/it][A

loss: tensor(0.6230, device='cuda:0', grad_fn=<NllLossBackward>)



 26%|██▋       | 15/57 [00:14<00:41,  1.01it/s][A

loss: tensor(0.5418, device='cuda:0', grad_fn=<NllLossBackward>)



 28%|██▊       | 16/57 [00:15<00:40,  1.02it/s][A

loss: tensor(0.5772, device='cuda:0', grad_fn=<NllLossBackward>)



 30%|██▉       | 17/57 [00:16<00:40,  1.00s/it][A

loss: tensor(0.6783, device='cuda:0', grad_fn=<NllLossBackward>)



 32%|███▏      | 18/57 [00:17<00:38,  1.01it/s][A

loss: tensor(0.6236, device='cuda:0', grad_fn=<NllLossBackward>)



 33%|███▎      | 19/57 [00:18<00:38,  1.01s/it][A

loss: tensor(0.6708, device='cuda:0', grad_fn=<NllLossBackward>)



 35%|███▌      | 20/57 [00:19<00:36,  1.01it/s][A

loss: tensor(0.7615, device='cuda:0', grad_fn=<NllLossBackward>)



 37%|███▋      | 21/57 [00:20<00:36,  1.01s/it][A

loss: tensor(0.6237, device='cuda:0', grad_fn=<NllLossBackward>)



 39%|███▊      | 22/57 [00:21<00:34,  1.01it/s][A

loss: tensor(0.6054, device='cuda:0', grad_fn=<NllLossBackward>)



 40%|████      | 23/57 [00:22<00:33,  1.02it/s][A

loss: tensor(0.6618, device='cuda:0', grad_fn=<NllLossBackward>)



 42%|████▏     | 24/57 [00:23<00:33,  1.01s/it][A

loss: tensor(0.7387, device='cuda:0', grad_fn=<NllLossBackward>)



 44%|████▍     | 25/57 [00:24<00:31,  1.01it/s][A

loss: tensor(0.5446, device='cuda:0', grad_fn=<NllLossBackward>)



 46%|████▌     | 26/57 [00:25<00:31,  1.02s/it][A

loss: tensor(0.6036, device='cuda:0', grad_fn=<NllLossBackward>)



 47%|████▋     | 27/57 [00:26<00:29,  1.00it/s][A

loss: tensor(0.6894, device='cuda:0', grad_fn=<NllLossBackward>)



 49%|████▉     | 28/57 [00:27<00:28,  1.02it/s][A

loss: tensor(0.6499, device='cuda:0', grad_fn=<NllLossBackward>)



 51%|█████     | 29/57 [00:28<00:28,  1.01s/it][A

loss: tensor(0.7728, device='cuda:0', grad_fn=<NllLossBackward>)



 53%|█████▎    | 30/57 [00:29<00:26,  1.01it/s][A

loss: tensor(0.7632, device='cuda:0', grad_fn=<NllLossBackward>)



 54%|█████▍    | 31/57 [00:30<00:26,  1.02s/it][A

loss: tensor(0.8463, device='cuda:0', grad_fn=<NllLossBackward>)



 56%|█████▌    | 32/57 [00:31<00:24,  1.01it/s][A

loss: tensor(0.7323, device='cuda:0', grad_fn=<NllLossBackward>)



 58%|█████▊    | 33/57 [00:32<00:24,  1.02s/it][A

loss: tensor(0.6120, device='cuda:0', grad_fn=<NllLossBackward>)



 60%|█████▉    | 34/57 [00:33<00:22,  1.00it/s][A

loss: tensor(0.7465, device='cuda:0', grad_fn=<NllLossBackward>)



 61%|██████▏   | 35/57 [00:34<00:21,  1.02it/s][A

loss: tensor(0.5688, device='cuda:0', grad_fn=<NllLossBackward>)



 63%|██████▎   | 36/57 [00:35<00:21,  1.01s/it][A

loss: tensor(0.6391, device='cuda:0', grad_fn=<NllLossBackward>)



 65%|██████▍   | 37/57 [00:36<00:19,  1.01it/s][A

loss: tensor(0.4308, device='cuda:0', grad_fn=<NllLossBackward>)



 67%|██████▋   | 38/57 [00:37<00:19,  1.01s/it][A

loss: tensor(0.6153, device='cuda:0', grad_fn=<NllLossBackward>)



 68%|██████▊   | 39/57 [00:38<00:17,  1.00it/s][A

loss: tensor(0.8151, device='cuda:0', grad_fn=<NllLossBackward>)



 70%|███████   | 40/57 [00:39<00:17,  1.01s/it][A

loss: tensor(0.6539, device='cuda:0', grad_fn=<NllLossBackward>)



 72%|███████▏  | 41/57 [00:40<00:15,  1.01it/s][A

loss: tensor(0.6553, device='cuda:0', grad_fn=<NllLossBackward>)



 74%|███████▎  | 42/57 [00:41<00:14,  1.02it/s][A

loss: tensor(0.5504, device='cuda:0', grad_fn=<NllLossBackward>)



 75%|███████▌  | 43/57 [00:42<00:14,  1.01s/it][A

loss: tensor(0.7284, device='cuda:0', grad_fn=<NllLossBackward>)



 77%|███████▋  | 44/57 [00:43<00:12,  1.01it/s][A

loss: tensor(0.8082, device='cuda:0', grad_fn=<NllLossBackward>)



 79%|███████▉  | 45/57 [00:44<00:12,  1.01s/it][A

loss: tensor(0.5505, device='cuda:0', grad_fn=<NllLossBackward>)



 81%|████████  | 46/57 [00:45<00:10,  1.01it/s][A

loss: tensor(0.7639, device='cuda:0', grad_fn=<NllLossBackward>)



 82%|████████▏ | 47/57 [00:46<00:10,  1.01s/it][A

loss: tensor(0.5482, device='cuda:0', grad_fn=<NllLossBackward>)



 84%|████████▍ | 48/57 [00:47<00:08,  1.01it/s][A

loss: tensor(0.7138, device='cuda:0', grad_fn=<NllLossBackward>)



 86%|████████▌ | 49/57 [00:48<00:07,  1.02it/s][A

loss: tensor(0.5981, device='cuda:0', grad_fn=<NllLossBackward>)



 88%|████████▊ | 50/57 [00:49<00:07,  1.01s/it][A

loss: tensor(0.6883, device='cuda:0', grad_fn=<NllLossBackward>)



 89%|████████▉ | 51/57 [00:50<00:05,  1.01it/s][A

loss: tensor(0.6216, device='cuda:0', grad_fn=<NllLossBackward>)



 91%|█████████ | 52/57 [00:51<00:05,  1.01s/it][A

loss: tensor(0.5690, device='cuda:0', grad_fn=<NllLossBackward>)



 93%|█████████▎| 53/57 [00:52<00:03,  1.01it/s][A

loss: tensor(0.5317, device='cuda:0', grad_fn=<NllLossBackward>)



 95%|█████████▍| 54/57 [00:53<00:02,  1.02it/s][A

loss: tensor(0.6999, device='cuda:0', grad_fn=<NllLossBackward>)



 96%|█████████▋| 55/57 [00:54<00:02,  1.00s/it][A

loss: tensor(0.5222, device='cuda:0', grad_fn=<NllLossBackward>)



 98%|█████████▊| 56/57 [00:55<00:00,  1.01it/s][A

loss: tensor(0.6211, device='cuda:0', grad_fn=<NllLossBackward>)



100%|██████████| 57/57 [00:56<00:00,  1.00it/s][A

  0%|          | 0/29 [00:00<?, ?it/s][A
  3%|▎         | 1/29 [00:00<00:04,  6.64it/s][A

loss: tensor(0.6416, device='cuda:0', grad_fn=<NllLossBackward>)

	Training Loss: 0.6636901968403867

	Training acc: 0.8146798106638524

	Training prec: 0.40145063260709435

	Training rec: 0.2524218401857537

	Training f1: 0.2598124884023604

	Current Learning rate:  0.0007428571428571429



  7%|▋         | 2/29 [00:00<00:04,  6.59it/s][A
 10%|█         | 3/29 [00:00<00:03,  6.63it/s][A
 14%|█▍        | 4/29 [00:00<00:03,  6.61it/s][A
 17%|█▋        | 5/29 [00:00<00:03,  6.63it/s][A
 21%|██        | 6/29 [00:00<00:03,  6.62it/s][A
 24%|██▍       | 7/29 [00:01<00:03,  6.59it/s][A
 28%|██▊       | 8/29 [00:01<00:03,  6.56it/s][A
 31%|███       | 9/29 [00:01<00:03,  6.46it/s][A
 34%|███▍      | 10/29 [00:01<00:02,  6.50it/s][A
 38%|███▊      | 11/29 [00:01<00:02,  6.53it/s][A
 41%|████▏     | 12/29 [00:01<00:02,  6.55it/s][A
 45%|████▍     | 13/29 [00:01<00:02,  6.55it/s][A
 48%|████▊     | 14/29 [00:02<00:02,  6.53it/s][A
 52%|█████▏    | 15/29 [00:02<00:02,  6.55it/s][A
 55%|█████▌    | 16/29 [00:02<00:02,  6.50it/s][A
 59%|█████▊    | 17/29 [00:02<00:01,  6.52it/s][A
 62%|██████▏   | 18/29 [00:02<00:01,  6.52it/s][A
 66%|██████▌   | 19/29 [00:02<00:01,  6.55it/s][A
 69%|██████▉   | 20/29 [00:03<00:01,  6.51it/s][A
 72%|███████▏  | 21/29 [00:03<00:01,  


	Validation Loss: 0.746670151578969

	Validation acc: 0.793925547327706

	Validation prec: 0.26941241037190483

	Validation rec: 0.23913775340627705

	Validation f1: 0.23122893327416338



  2%|▏         | 1/57 [00:00<00:53,  1.04it/s][A

loss: tensor(0.5875, device='cuda:0', grad_fn=<NllLossBackward>)



  4%|▎         | 2/57 [00:02<00:56,  1.02s/it][A

loss: tensor(0.7477, device='cuda:0', grad_fn=<NllLossBackward>)



  5%|▌         | 3/57 [00:02<00:53,  1.00it/s][A

loss: tensor(0.7491, device='cuda:0', grad_fn=<NllLossBackward>)



  7%|▋         | 4/57 [00:04<00:54,  1.03s/it][A

loss: tensor(0.4590, device='cuda:0', grad_fn=<NllLossBackward>)



  9%|▉         | 5/57 [00:05<00:51,  1.00it/s][A

loss: tensor(0.7719, device='cuda:0', grad_fn=<NllLossBackward>)



 11%|█         | 6/57 [00:05<00:50,  1.02it/s][A

loss: tensor(0.7128, device='cuda:0', grad_fn=<NllLossBackward>)



 12%|█▏        | 7/57 [00:07<00:50,  1.01s/it][A

loss: tensor(0.7400, device='cuda:0', grad_fn=<NllLossBackward>)



 14%|█▍        | 8/57 [00:07<00:48,  1.01it/s][A

loss: tensor(0.5912, device='cuda:0', grad_fn=<NllLossBackward>)



 16%|█▌        | 9/57 [00:09<00:48,  1.02s/it][A

loss: tensor(0.6615, device='cuda:0', grad_fn=<NllLossBackward>)



 18%|█▊        | 10/57 [00:10<00:46,  1.00it/s][A

loss: tensor(0.6187, device='cuda:0', grad_fn=<NllLossBackward>)



 19%|█▉        | 11/57 [00:11<00:46,  1.02s/it][A

loss: tensor(0.5579, device='cuda:0', grad_fn=<NllLossBackward>)



 21%|██        | 12/57 [00:12<00:44,  1.00it/s][A

loss: tensor(0.7091, device='cuda:0', grad_fn=<NllLossBackward>)



 23%|██▎       | 13/57 [00:13<00:43,  1.01it/s][A

loss: tensor(0.6655, device='cuda:0', grad_fn=<NllLossBackward>)



 25%|██▍       | 14/57 [00:14<00:43,  1.02s/it][A

loss: tensor(0.6571, device='cuda:0', grad_fn=<NllLossBackward>)



 26%|██▋       | 15/57 [00:15<00:42,  1.00s/it][A

loss: tensor(0.7212, device='cuda:0', grad_fn=<NllLossBackward>)



 28%|██▊       | 16/57 [00:16<00:42,  1.03s/it][A

loss: tensor(0.6116, device='cuda:0', grad_fn=<NllLossBackward>)



 30%|██▉       | 17/57 [00:17<00:40,  1.01s/it][A

loss: tensor(0.5134, device='cuda:0', grad_fn=<NllLossBackward>)



 32%|███▏      | 18/57 [00:18<00:40,  1.03s/it][A

loss: tensor(0.6174, device='cuda:0', grad_fn=<NllLossBackward>)



 33%|███▎      | 19/57 [00:19<00:37,  1.00it/s][A

loss: tensor(0.7190, device='cuda:0', grad_fn=<NllLossBackward>)



 35%|███▌      | 20/57 [00:20<00:36,  1.02it/s][A

loss: tensor(0.5813, device='cuda:0', grad_fn=<NllLossBackward>)



 37%|███▋      | 21/57 [00:21<00:35,  1.00it/s][A

loss: tensor(0.6229, device='cuda:0', grad_fn=<NllLossBackward>)



 39%|███▊      | 22/57 [00:22<00:34,  1.02it/s][A

loss: tensor(0.7759, device='cuda:0', grad_fn=<NllLossBackward>)



 40%|████      | 23/57 [00:23<00:34,  1.01s/it][A

loss: tensor(0.5258, device='cuda:0', grad_fn=<NllLossBackward>)



 42%|████▏     | 24/57 [00:24<00:32,  1.01it/s][A

loss: tensor(0.6226, device='cuda:0', grad_fn=<NllLossBackward>)



 44%|████▍     | 25/57 [00:25<00:32,  1.01s/it][A

loss: tensor(0.7093, device='cuda:0', grad_fn=<NllLossBackward>)



 46%|████▌     | 26/57 [00:26<00:30,  1.01it/s][A

loss: tensor(0.8044, device='cuda:0', grad_fn=<NllLossBackward>)



 47%|████▋     | 27/57 [00:26<00:29,  1.02it/s][A

loss: tensor(0.6616, device='cuda:0', grad_fn=<NllLossBackward>)



 49%|████▉     | 28/57 [00:28<00:28,  1.00it/s][A

loss: tensor(0.7075, device='cuda:0', grad_fn=<NllLossBackward>)



 51%|█████     | 29/57 [00:28<00:27,  1.02it/s][A

loss: tensor(0.8520, device='cuda:0', grad_fn=<NllLossBackward>)



 53%|█████▎    | 30/57 [00:30<00:27,  1.01s/it][A

loss: tensor(0.6803, device='cuda:0', grad_fn=<NllLossBackward>)



 54%|█████▍    | 31/57 [00:30<00:25,  1.01it/s][A

loss: tensor(0.6317, device='cuda:0', grad_fn=<NllLossBackward>)



 56%|█████▌    | 32/57 [00:31<00:24,  1.02it/s][A

loss: tensor(0.5845, device='cuda:0', grad_fn=<NllLossBackward>)



 58%|█████▊    | 33/57 [00:32<00:24,  1.00s/it][A

loss: tensor(0.6583, device='cuda:0', grad_fn=<NllLossBackward>)



 60%|█████▉    | 34/57 [00:33<00:22,  1.02it/s][A

loss: tensor(0.5763, device='cuda:0', grad_fn=<NllLossBackward>)



 61%|██████▏   | 35/57 [00:34<00:22,  1.01s/it][A

loss: tensor(0.5557, device='cuda:0', grad_fn=<NllLossBackward>)



 63%|██████▎   | 36/57 [00:35<00:20,  1.01it/s][A

loss: tensor(0.6346, device='cuda:0', grad_fn=<NllLossBackward>)



 65%|██████▍   | 37/57 [00:37<00:20,  1.01s/it][A

loss: tensor(0.6844, device='cuda:0', grad_fn=<NllLossBackward>)



 67%|██████▋   | 38/57 [00:37<00:18,  1.01it/s][A

loss: tensor(0.7646, device='cuda:0', grad_fn=<NllLossBackward>)



 68%|██████▊   | 39/57 [00:38<00:17,  1.03it/s][A

loss: tensor(0.8605, device='cuda:0', grad_fn=<NllLossBackward>)



 70%|███████   | 40/57 [00:39<00:17,  1.00s/it][A

loss: tensor(0.6966, device='cuda:0', grad_fn=<NllLossBackward>)



 72%|███████▏  | 41/57 [00:40<00:15,  1.02it/s][A

loss: tensor(0.6811, device='cuda:0', grad_fn=<NllLossBackward>)



 74%|███████▎  | 42/57 [00:41<00:15,  1.01s/it][A

loss: tensor(0.6290, device='cuda:0', grad_fn=<NllLossBackward>)



 75%|███████▌  | 43/57 [00:42<00:13,  1.01it/s][A

loss: tensor(0.6664, device='cuda:0', grad_fn=<NllLossBackward>)



 77%|███████▋  | 44/57 [00:43<00:13,  1.01s/it][A

loss: tensor(0.7596, device='cuda:0', grad_fn=<NllLossBackward>)



 79%|███████▉  | 45/57 [00:44<00:11,  1.01it/s][A

loss: tensor(0.6068, device='cuda:0', grad_fn=<NllLossBackward>)



 81%|████████  | 46/57 [00:45<00:10,  1.02it/s][A

loss: tensor(0.7353, device='cuda:0', grad_fn=<NllLossBackward>)



 82%|████████▏ | 47/57 [00:46<00:10,  1.01s/it][A

loss: tensor(0.5886, device='cuda:0', grad_fn=<NllLossBackward>)



 84%|████████▍ | 48/57 [00:47<00:08,  1.01it/s][A

loss: tensor(0.6180, device='cuda:0', grad_fn=<NllLossBackward>)



 86%|████████▌ | 49/57 [00:48<00:08,  1.01s/it][A

loss: tensor(0.7005, device='cuda:0', grad_fn=<NllLossBackward>)



 88%|████████▊ | 50/57 [00:49<00:06,  1.01it/s][A

loss: tensor(0.8201, device='cuda:0', grad_fn=<NllLossBackward>)



 89%|████████▉ | 51/57 [00:50<00:06,  1.01s/it][A

loss: tensor(0.6667, device='cuda:0', grad_fn=<NllLossBackward>)



 91%|█████████ | 52/57 [00:51<00:04,  1.01it/s][A

loss: tensor(0.7543, device='cuda:0', grad_fn=<NllLossBackward>)



 93%|█████████▎| 53/57 [00:52<00:03,  1.02it/s][A

loss: tensor(0.5719, device='cuda:0', grad_fn=<NllLossBackward>)



 95%|█████████▍| 54/57 [00:53<00:03,  1.01s/it][A

loss: tensor(0.6677, device='cuda:0', grad_fn=<NllLossBackward>)



 96%|█████████▋| 55/57 [00:54<00:01,  1.00it/s][A

loss: tensor(0.5500, device='cuda:0', grad_fn=<NllLossBackward>)



 98%|█████████▊| 56/57 [00:55<00:01,  1.02s/it][A

loss: tensor(0.6638, device='cuda:0', grad_fn=<NllLossBackward>)



100%|██████████| 57/57 [00:56<00:00,  1.00it/s][A

  0%|          | 0/29 [00:00<?, ?it/s][A
  3%|▎         | 1/29 [00:00<00:04,  6.58it/s][A

loss: tensor(0.6744, device='cuda:0', grad_fn=<NllLossBackward>)

	Training Loss: 0.6659106084129267

	Training acc: 0.8150749997545921

	Training prec: 0.37033519799433706

	Training rec: 0.2562483886563159

	Training f1: 0.26231292634961145

	Current Learning rate:  0.0007142857142857143



  7%|▋         | 2/29 [00:00<00:04,  6.41it/s][A
 10%|█         | 3/29 [00:00<00:03,  6.51it/s][A
 14%|█▍        | 4/29 [00:00<00:03,  6.57it/s][A
 17%|█▋        | 5/29 [00:00<00:03,  6.61it/s][A
 21%|██        | 6/29 [00:00<00:03,  6.62it/s][A
 24%|██▍       | 7/29 [00:01<00:03,  6.64it/s][A
 28%|██▊       | 8/29 [00:01<00:03,  6.60it/s][A
 31%|███       | 9/29 [00:01<00:03,  6.58it/s][A
 34%|███▍      | 10/29 [00:01<00:02,  6.55it/s][A
 38%|███▊      | 11/29 [00:01<00:02,  6.58it/s][A
 41%|████▏     | 12/29 [00:01<00:02,  6.58it/s][A
 45%|████▍     | 13/29 [00:01<00:02,  6.61it/s][A
 48%|████▊     | 14/29 [00:02<00:02,  6.60it/s][A
 52%|█████▏    | 15/29 [00:02<00:02,  6.63it/s][A
 55%|█████▌    | 16/29 [00:02<00:01,  6.62it/s][A
 59%|█████▊    | 17/29 [00:02<00:01,  6.62it/s][A
 62%|██████▏   | 18/29 [00:02<00:01,  6.59it/s][A
 66%|██████▌   | 19/29 [00:02<00:01,  6.60it/s][A
 69%|██████▉   | 20/29 [00:03<00:01,  6.59it/s][A
 72%|███████▏  | 21/29 [00:03<00:01,  


	Validation Loss: 0.7226354767536295

	Validation acc: 0.797153865507675

	Validation prec: 0.2856903576802411

	Validation rec: 0.25940131602787064

	Validation f1: 0.2552497449945875



  2%|▏         | 1/57 [00:01<01:00,  1.07s/it][A

loss: tensor(0.6602, device='cuda:0', grad_fn=<NllLossBackward>)



  4%|▎         | 2/57 [00:02<00:55,  1.01s/it][A

loss: tensor(0.4900, device='cuda:0', grad_fn=<NllLossBackward>)



  5%|▌         | 3/57 [00:03<00:53,  1.01it/s][A

loss: tensor(0.8051, device='cuda:0', grad_fn=<NllLossBackward>)



  7%|▋         | 4/57 [00:04<00:54,  1.02s/it][A

loss: tensor(0.5342, device='cuda:0', grad_fn=<NllLossBackward>)



  9%|▉         | 5/57 [00:05<00:52,  1.00s/it][A

loss: tensor(0.4675, device='cuda:0', grad_fn=<NllLossBackward>)



 11%|█         | 6/57 [00:06<00:52,  1.03s/it][A

loss: tensor(0.6516, device='cuda:0', grad_fn=<NllLossBackward>)



 12%|█▏        | 7/57 [00:07<00:50,  1.01s/it][A

loss: tensor(0.6011, device='cuda:0', grad_fn=<NllLossBackward>)



 14%|█▍        | 8/57 [00:08<00:50,  1.03s/it][A

loss: tensor(0.6589, device='cuda:0', grad_fn=<NllLossBackward>)



 16%|█▌        | 9/57 [00:09<00:48,  1.01s/it][A

loss: tensor(0.7858, device='cuda:0', grad_fn=<NllLossBackward>)



 18%|█▊        | 10/57 [00:10<00:46,  1.01it/s][A

loss: tensor(0.7098, device='cuda:0', grad_fn=<NllLossBackward>)



 19%|█▉        | 11/57 [00:11<00:46,  1.01s/it][A

loss: tensor(0.7443, device='cuda:0', grad_fn=<NllLossBackward>)



 21%|██        | 12/57 [00:12<00:44,  1.01it/s][A

loss: tensor(0.5799, device='cuda:0', grad_fn=<NllLossBackward>)



 23%|██▎       | 13/57 [00:13<00:44,  1.01s/it][A

loss: tensor(0.6612, device='cuda:0', grad_fn=<NllLossBackward>)



 25%|██▍       | 14/57 [00:14<00:42,  1.01it/s][A

loss: tensor(0.5634, device='cuda:0', grad_fn=<NllLossBackward>)



 26%|██▋       | 15/57 [00:15<00:42,  1.01s/it][A

loss: tensor(0.7868, device='cuda:0', grad_fn=<NllLossBackward>)



 28%|██▊       | 16/57 [00:16<00:40,  1.01it/s][A

loss: tensor(0.6360, device='cuda:0', grad_fn=<NllLossBackward>)



 30%|██▉       | 17/57 [00:17<00:38,  1.03it/s][A

loss: tensor(0.6966, device='cuda:0', grad_fn=<NllLossBackward>)



 32%|███▏      | 18/57 [00:18<00:39,  1.00s/it][A

loss: tensor(0.6859, device='cuda:0', grad_fn=<NllLossBackward>)



 33%|███▎      | 19/57 [00:19<00:37,  1.02it/s][A

loss: tensor(0.5815, device='cuda:0', grad_fn=<NllLossBackward>)



 35%|███▌      | 20/57 [00:20<00:37,  1.01s/it][A

loss: tensor(0.6811, device='cuda:0', grad_fn=<NllLossBackward>)



 37%|███▋      | 21/57 [00:21<00:35,  1.01it/s][A

loss: tensor(0.8859, device='cuda:0', grad_fn=<NllLossBackward>)



 39%|███▊      | 22/57 [00:22<00:35,  1.01s/it][A

loss: tensor(0.7500, device='cuda:0', grad_fn=<NllLossBackward>)



 40%|████      | 23/57 [00:23<00:33,  1.01it/s][A

loss: tensor(0.6026, device='cuda:0', grad_fn=<NllLossBackward>)



 42%|████▏     | 24/57 [00:24<00:32,  1.02it/s][A

loss: tensor(0.7457, device='cuda:0', grad_fn=<NllLossBackward>)



 44%|████▍     | 25/57 [00:25<00:32,  1.01s/it][A

loss: tensor(0.6994, device='cuda:0', grad_fn=<NllLossBackward>)



 46%|████▌     | 26/57 [00:26<00:30,  1.01it/s][A

loss: tensor(0.5845, device='cuda:0', grad_fn=<NllLossBackward>)



 47%|████▋     | 27/57 [00:27<00:30,  1.01s/it][A

loss: tensor(0.5718, device='cuda:0', grad_fn=<NllLossBackward>)



 49%|████▉     | 28/57 [00:28<00:28,  1.01it/s][A

loss: tensor(0.6604, device='cuda:0', grad_fn=<NllLossBackward>)



 51%|█████     | 29/57 [00:28<00:27,  1.03it/s][A

loss: tensor(0.7016, device='cuda:0', grad_fn=<NllLossBackward>)



 53%|█████▎    | 30/57 [00:30<00:27,  1.00s/it][A

loss: tensor(0.7343, device='cuda:0', grad_fn=<NllLossBackward>)



 54%|█████▍    | 31/57 [00:30<00:25,  1.01it/s][A

loss: tensor(0.6322, device='cuda:0', grad_fn=<NllLossBackward>)



 56%|█████▌    | 32/57 [00:32<00:25,  1.01s/it][A

loss: tensor(0.6686, device='cuda:0', grad_fn=<NllLossBackward>)



 58%|█████▊    | 33/57 [00:32<00:23,  1.01it/s][A

loss: tensor(0.7178, device='cuda:0', grad_fn=<NllLossBackward>)



 60%|█████▉    | 34/57 [00:34<00:23,  1.01s/it][A

loss: tensor(0.5772, device='cuda:0', grad_fn=<NllLossBackward>)



 61%|██████▏   | 35/57 [00:35<00:21,  1.01it/s][A

loss: tensor(0.5988, device='cuda:0', grad_fn=<NllLossBackward>)



 63%|██████▎   | 36/57 [00:35<00:20,  1.02it/s][A

loss: tensor(0.4344, device='cuda:0', grad_fn=<NllLossBackward>)



 65%|██████▍   | 37/57 [00:37<00:20,  1.00s/it][A

loss: tensor(0.8106, device='cuda:0', grad_fn=<NllLossBackward>)



 67%|██████▋   | 38/57 [00:37<00:18,  1.02it/s][A

loss: tensor(0.6653, device='cuda:0', grad_fn=<NllLossBackward>)



 68%|██████▊   | 39/57 [00:39<00:18,  1.01s/it][A

loss: tensor(0.7985, device='cuda:0', grad_fn=<NllLossBackward>)



 70%|███████   | 40/57 [00:39<00:16,  1.01it/s][A

loss: tensor(0.5123, device='cuda:0', grad_fn=<NllLossBackward>)



 72%|███████▏  | 41/57 [00:41<00:16,  1.01s/it][A

loss: tensor(0.5556, device='cuda:0', grad_fn=<NllLossBackward>)



 74%|███████▎  | 42/57 [00:41<00:14,  1.01it/s][A

loss: tensor(0.7066, device='cuda:0', grad_fn=<NllLossBackward>)



 75%|███████▌  | 43/57 [00:42<00:13,  1.03it/s][A

loss: tensor(0.6192, device='cuda:0', grad_fn=<NllLossBackward>)



 77%|███████▋  | 44/57 [00:43<00:13,  1.01s/it][A

loss: tensor(0.6265, device='cuda:0', grad_fn=<NllLossBackward>)



 79%|███████▉  | 45/57 [00:44<00:11,  1.01it/s][A

loss: tensor(0.6791, device='cuda:0', grad_fn=<NllLossBackward>)



 81%|████████  | 46/57 [00:46<00:11,  1.02s/it][A

loss: tensor(0.8520, device='cuda:0', grad_fn=<NllLossBackward>)



 82%|████████▏ | 47/57 [00:46<00:09,  1.01it/s][A

loss: tensor(0.7930, device='cuda:0', grad_fn=<NllLossBackward>)



 84%|████████▍ | 48/57 [00:47<00:09,  1.01s/it][A

loss: tensor(0.6949, device='cuda:0', grad_fn=<NllLossBackward>)



 86%|████████▌ | 49/57 [00:48<00:07,  1.01it/s][A

loss: tensor(0.6508, device='cuda:0', grad_fn=<NllLossBackward>)



 88%|████████▊ | 50/57 [00:49<00:06,  1.02it/s][A

loss: tensor(0.6514, device='cuda:0', grad_fn=<NllLossBackward>)



 89%|████████▉ | 51/57 [00:50<00:06,  1.00s/it][A

loss: tensor(0.5978, device='cuda:0', grad_fn=<NllLossBackward>)



 91%|█████████ | 52/57 [00:51<00:04,  1.02it/s][A

loss: tensor(0.6382, device='cuda:0', grad_fn=<NllLossBackward>)



 93%|█████████▎| 53/57 [00:52<00:04,  1.01s/it][A

loss: tensor(0.6063, device='cuda:0', grad_fn=<NllLossBackward>)



 95%|█████████▍| 54/57 [00:53<00:02,  1.01it/s][A

loss: tensor(0.6845, device='cuda:0', grad_fn=<NllLossBackward>)



 96%|█████████▋| 55/57 [00:54<00:01,  1.03it/s][A

loss: tensor(0.5985, device='cuda:0', grad_fn=<NllLossBackward>)



 98%|█████████▊| 56/57 [00:55<00:00,  1.00it/s][A

loss: tensor(0.5422, device='cuda:0', grad_fn=<NllLossBackward>)



100%|██████████| 57/57 [00:56<00:00,  1.01it/s][A

  0%|          | 0/29 [00:00<?, ?it/s][A
  3%|▎         | 1/29 [00:00<00:04,  6.77it/s][A

loss: tensor(0.5773, device='cuda:0', grad_fn=<NllLossBackward>)

	Training Loss: 0.6562600742306626

	Training acc: 0.8150740396229592

	Training prec: 0.3952030528545324

	Training rec: 0.25626758777720954

	Training f1: 0.26349100892550836

	Current Learning rate:  0.0006857142857142857



  7%|▋         | 2/29 [00:00<00:04,  6.56it/s][A
 10%|█         | 3/29 [00:00<00:03,  6.56it/s][A
 14%|█▍        | 4/29 [00:00<00:03,  6.59it/s][A
 17%|█▋        | 5/29 [00:00<00:03,  6.60it/s][A
 21%|██        | 6/29 [00:00<00:03,  6.62it/s][A
 24%|██▍       | 7/29 [00:01<00:03,  6.66it/s][A
 28%|██▊       | 8/29 [00:01<00:03,  6.67it/s][A
 31%|███       | 9/29 [00:01<00:02,  6.67it/s][A
 34%|███▍      | 10/29 [00:01<00:02,  6.69it/s][A
 38%|███▊      | 11/29 [00:01<00:02,  6.69it/s][A
 41%|████▏     | 12/29 [00:01<00:02,  6.67it/s][A
 45%|████▍     | 13/29 [00:01<00:02,  6.69it/s][A
 48%|████▊     | 14/29 [00:02<00:02,  6.68it/s][A
 52%|█████▏    | 15/29 [00:02<00:02,  6.70it/s][A
 55%|█████▌    | 16/29 [00:02<00:01,  6.69it/s][A
 59%|█████▊    | 17/29 [00:02<00:01,  6.66it/s][A
 62%|██████▏   | 18/29 [00:02<00:01,  6.66it/s][A
 66%|██████▌   | 19/29 [00:02<00:01,  6.64it/s][A
 69%|██████▉   | 20/29 [00:03<00:01,  6.64it/s][A
 72%|███████▏  | 21/29 [00:03<00:01,  


	Validation Loss: 0.744574806813536

	Validation acc: 0.7959570355513297

	Validation prec: 0.2819304977952754

	Validation rec: 0.2483702059070258

	Validation f1: 0.24489217014464865



  2%|▏         | 1/57 [00:00<00:53,  1.05it/s][A

loss: tensor(0.7639, device='cuda:0', grad_fn=<NllLossBackward>)



  4%|▎         | 2/57 [00:01<00:52,  1.06it/s][A

loss: tensor(0.4950, device='cuda:0', grad_fn=<NllLossBackward>)



  5%|▌         | 3/57 [00:02<00:54,  1.00s/it][A

loss: tensor(0.5713, device='cuda:0', grad_fn=<NllLossBackward>)



  7%|▋         | 4/57 [00:03<00:51,  1.02it/s][A

loss: tensor(0.6451, device='cuda:0', grad_fn=<NllLossBackward>)



  9%|▉         | 5/57 [00:04<00:52,  1.01s/it][A

loss: tensor(0.7610, device='cuda:0', grad_fn=<NllLossBackward>)



 11%|█         | 6/57 [00:05<00:50,  1.01it/s][A

loss: tensor(0.6812, device='cuda:0', grad_fn=<NllLossBackward>)



 12%|█▏        | 7/57 [00:06<00:50,  1.01s/it][A

loss: tensor(0.7378, device='cuda:0', grad_fn=<NllLossBackward>)



 14%|█▍        | 8/57 [00:07<00:48,  1.01it/s][A

loss: tensor(0.6487, device='cuda:0', grad_fn=<NllLossBackward>)



 16%|█▌        | 9/57 [00:08<00:46,  1.02it/s][A

loss: tensor(0.5674, device='cuda:0', grad_fn=<NllLossBackward>)



 18%|█▊        | 10/57 [00:09<00:47,  1.00s/it][A

loss: tensor(0.5925, device='cuda:0', grad_fn=<NllLossBackward>)



 19%|█▉        | 11/57 [00:10<00:45,  1.02it/s][A

loss: tensor(0.7711, device='cuda:0', grad_fn=<NllLossBackward>)



 21%|██        | 12/57 [00:11<00:45,  1.01s/it][A

loss: tensor(0.6167, device='cuda:0', grad_fn=<NllLossBackward>)



 23%|██▎       | 13/57 [00:12<00:43,  1.01it/s][A

loss: tensor(0.8078, device='cuda:0', grad_fn=<NllLossBackward>)



 25%|██▍       | 14/57 [00:13<00:42,  1.02it/s][A

loss: tensor(0.7437, device='cuda:0', grad_fn=<NllLossBackward>)



 26%|██▋       | 15/57 [00:14<00:42,  1.01s/it][A

loss: tensor(0.5043, device='cuda:0', grad_fn=<NllLossBackward>)



 28%|██▊       | 16/57 [00:15<00:40,  1.00it/s][A

loss: tensor(0.5532, device='cuda:0', grad_fn=<NllLossBackward>)



 30%|██▉       | 17/57 [00:16<00:40,  1.02s/it][A

loss: tensor(0.7182, device='cuda:0', grad_fn=<NllLossBackward>)



 32%|███▏      | 18/57 [00:17<00:39,  1.00s/it][A

loss: tensor(0.4310, device='cuda:0', grad_fn=<NllLossBackward>)



 33%|███▎      | 19/57 [00:18<00:38,  1.02s/it][A

loss: tensor(0.7851, device='cuda:0', grad_fn=<NllLossBackward>)



 35%|███▌      | 20/57 [00:19<00:36,  1.00it/s][A

loss: tensor(0.7295, device='cuda:0', grad_fn=<NllLossBackward>)



 37%|███▋      | 21/57 [00:20<00:35,  1.02it/s][A

loss: tensor(0.7280, device='cuda:0', grad_fn=<NllLossBackward>)



 39%|███▊      | 22/57 [00:21<00:35,  1.01s/it][A

loss: tensor(0.6820, device='cuda:0', grad_fn=<NllLossBackward>)



 40%|████      | 23/57 [00:22<00:33,  1.01it/s][A

loss: tensor(0.6929, device='cuda:0', grad_fn=<NllLossBackward>)



 42%|████▏     | 24/57 [00:23<00:33,  1.01s/it][A

loss: tensor(0.6581, device='cuda:0', grad_fn=<NllLossBackward>)



 44%|████▍     | 25/57 [00:24<00:31,  1.01it/s][A

loss: tensor(0.7537, device='cuda:0', grad_fn=<NllLossBackward>)



 46%|████▌     | 26/57 [00:25<00:31,  1.01s/it][A

loss: tensor(0.6870, device='cuda:0', grad_fn=<NllLossBackward>)



 47%|████▋     | 27/57 [00:26<00:29,  1.01it/s][A

loss: tensor(0.6626, device='cuda:0', grad_fn=<NllLossBackward>)



 49%|████▉     | 28/57 [00:27<00:28,  1.03it/s][A

loss: tensor(0.5340, device='cuda:0', grad_fn=<NllLossBackward>)



 51%|█████     | 29/57 [00:28<00:27,  1.00it/s][A

loss: tensor(0.7910, device='cuda:0', grad_fn=<NllLossBackward>)



 53%|█████▎    | 30/57 [00:29<00:26,  1.02it/s][A

loss: tensor(0.6392, device='cuda:0', grad_fn=<NllLossBackward>)



 54%|█████▍    | 31/57 [00:30<00:26,  1.00s/it][A

loss: tensor(0.6017, device='cuda:0', grad_fn=<NllLossBackward>)



 56%|█████▌    | 32/57 [00:31<00:24,  1.02it/s][A

loss: tensor(0.6410, device='cuda:0', grad_fn=<NllLossBackward>)



 58%|█████▊    | 33/57 [00:32<00:23,  1.00it/s][A

loss: tensor(0.6875, device='cuda:0', grad_fn=<NllLossBackward>)



 60%|█████▉    | 34/57 [00:33<00:22,  1.01it/s][A

loss: tensor(0.6405, device='cuda:0', grad_fn=<NllLossBackward>)



 61%|██████▏   | 35/57 [00:34<00:21,  1.03it/s][A

loss: tensor(0.7548, device='cuda:0', grad_fn=<NllLossBackward>)



 63%|██████▎   | 36/57 [00:35<00:20,  1.00it/s][A

loss: tensor(0.7554, device='cuda:0', grad_fn=<NllLossBackward>)



 65%|██████▍   | 37/57 [00:36<00:19,  1.02it/s][A

loss: tensor(0.4762, device='cuda:0', grad_fn=<NllLossBackward>)



 67%|██████▋   | 38/57 [00:37<00:19,  1.00s/it][A

loss: tensor(0.6931, device='cuda:0', grad_fn=<NllLossBackward>)



 68%|██████▊   | 39/57 [00:38<00:17,  1.02it/s][A

loss: tensor(0.6359, device='cuda:0', grad_fn=<NllLossBackward>)



 70%|███████   | 40/57 [00:39<00:16,  1.03it/s][A

loss: tensor(0.6873, device='cuda:0', grad_fn=<NllLossBackward>)



 72%|███████▏  | 41/57 [00:40<00:16,  1.01s/it][A

loss: tensor(0.5259, device='cuda:0', grad_fn=<NllLossBackward>)



 74%|███████▎  | 42/57 [00:41<00:14,  1.01it/s][A

loss: tensor(0.6624, device='cuda:0', grad_fn=<NllLossBackward>)



 75%|███████▌  | 43/57 [00:42<00:14,  1.01s/it][A

loss: tensor(0.5826, device='cuda:0', grad_fn=<NllLossBackward>)



 77%|███████▋  | 44/57 [00:43<00:12,  1.01it/s][A

loss: tensor(0.7415, device='cuda:0', grad_fn=<NllLossBackward>)



 79%|███████▉  | 45/57 [00:44<00:12,  1.01s/it][A

loss: tensor(0.6407, device='cuda:0', grad_fn=<NllLossBackward>)



 81%|████████  | 46/57 [00:45<00:10,  1.01it/s][A

loss: tensor(0.5957, device='cuda:0', grad_fn=<NllLossBackward>)



 82%|████████▏ | 47/57 [00:46<00:09,  1.02it/s][A

loss: tensor(0.6218, device='cuda:0', grad_fn=<NllLossBackward>)



 84%|████████▍ | 48/57 [00:47<00:09,  1.01s/it][A

loss: tensor(0.6591, device='cuda:0', grad_fn=<NllLossBackward>)



 86%|████████▌ | 49/57 [00:48<00:07,  1.01it/s][A

loss: tensor(0.5673, device='cuda:0', grad_fn=<NllLossBackward>)



 88%|████████▊ | 50/57 [00:49<00:07,  1.01s/it][A

loss: tensor(0.7091, device='cuda:0', grad_fn=<NllLossBackward>)



 89%|████████▉ | 51/57 [00:50<00:05,  1.01it/s][A

loss: tensor(0.6166, device='cuda:0', grad_fn=<NllLossBackward>)



 91%|█████████ | 52/57 [00:51<00:05,  1.01s/it][A

loss: tensor(0.7601, device='cuda:0', grad_fn=<NllLossBackward>)



 93%|█████████▎| 53/57 [00:52<00:03,  1.01it/s][A

loss: tensor(0.6627, device='cuda:0', grad_fn=<NllLossBackward>)



 95%|█████████▍| 54/57 [00:53<00:02,  1.03it/s][A

loss: tensor(0.6053, device='cuda:0', grad_fn=<NllLossBackward>)



 96%|█████████▋| 55/57 [00:54<00:02,  1.01s/it][A

loss: tensor(0.6945, device='cuda:0', grad_fn=<NllLossBackward>)



 98%|█████████▊| 56/57 [00:55<00:00,  1.01it/s][A

loss: tensor(0.7005, device='cuda:0', grad_fn=<NllLossBackward>)



100%|██████████| 57/57 [00:56<00:00,  1.01it/s][A

  0%|          | 0/29 [00:00<?, ?it/s][A
  3%|▎         | 1/29 [00:00<00:04,  6.77it/s][A

loss: tensor(0.6426, device='cuda:0', grad_fn=<NllLossBackward>)

	Training Loss: 0.6581577888706274

	Training acc: 0.8138711033152373

	Training prec: 0.3998537166622808

	Training rec: 0.26108049772004416

	Training f1: 0.2709666547522796

	Current Learning rate:  0.0006571428571428571



  7%|▋         | 2/29 [00:00<00:04,  6.75it/s][A
 10%|█         | 3/29 [00:00<00:03,  6.73it/s][A
 14%|█▍        | 4/29 [00:00<00:03,  6.75it/s][A
 17%|█▋        | 5/29 [00:00<00:03,  6.75it/s][A
 21%|██        | 6/29 [00:00<00:03,  6.74it/s][A
 24%|██▍       | 7/29 [00:01<00:03,  6.71it/s][A
 28%|██▊       | 8/29 [00:01<00:03,  6.66it/s][A
 31%|███       | 9/29 [00:01<00:03,  6.64it/s][A
 34%|███▍      | 10/29 [00:01<00:02,  6.60it/s][A
 38%|███▊      | 11/29 [00:01<00:02,  6.58it/s][A
 41%|████▏     | 12/29 [00:01<00:02,  6.54it/s][A
 45%|████▍     | 13/29 [00:01<00:02,  6.53it/s][A
 48%|████▊     | 14/29 [00:02<00:02,  6.53it/s][A
 52%|█████▏    | 15/29 [00:02<00:02,  6.50it/s][A
 55%|█████▌    | 16/29 [00:02<00:02,  6.49it/s][A
 59%|█████▊    | 17/29 [00:02<00:01,  6.49it/s][A
 62%|██████▏   | 18/29 [00:02<00:01,  6.55it/s][A
 66%|██████▌   | 19/29 [00:02<00:01,  6.55it/s][A
 69%|██████▉   | 20/29 [00:03<00:01,  6.57it/s][A
 72%|███████▏  | 21/29 [00:03<00:01,  


	Validation Loss: 0.7034025346410686

	Validation acc: 0.7996938779456252

	Validation prec: 0.33163251716178915

	Validation rec: 0.25316238419090525

	Validation f1: 0.2541832675264271



  2%|▏         | 1/57 [00:00<00:52,  1.06it/s][A

loss: tensor(0.7174, device='cuda:0', grad_fn=<NllLossBackward>)



  4%|▎         | 2/57 [00:01<00:55,  1.01s/it][A

loss: tensor(0.6251, device='cuda:0', grad_fn=<NllLossBackward>)



  5%|▌         | 3/57 [00:02<00:52,  1.02it/s][A

loss: tensor(0.6048, device='cuda:0', grad_fn=<NllLossBackward>)



  7%|▋         | 4/57 [00:03<00:52,  1.00it/s][A

loss: tensor(0.6571, device='cuda:0', grad_fn=<NllLossBackward>)



  9%|▉         | 5/57 [00:04<00:50,  1.02it/s][A

loss: tensor(0.6143, device='cuda:0', grad_fn=<NllLossBackward>)



 11%|█         | 6/57 [00:05<00:49,  1.04it/s][A

loss: tensor(0.5795, device='cuda:0', grad_fn=<NllLossBackward>)



 12%|█▏        | 7/57 [00:06<00:49,  1.00it/s][A

loss: tensor(0.5772, device='cuda:0', grad_fn=<NllLossBackward>)



 14%|█▍        | 8/57 [00:07<00:47,  1.02it/s][A

loss: tensor(0.5914, device='cuda:0', grad_fn=<NllLossBackward>)



 16%|█▌        | 9/57 [00:08<00:48,  1.01s/it][A

loss: tensor(0.5047, device='cuda:0', grad_fn=<NllLossBackward>)



 18%|█▊        | 10/57 [00:09<00:46,  1.02it/s][A

loss: tensor(0.6200, device='cuda:0', grad_fn=<NllLossBackward>)



 19%|█▉        | 11/57 [00:10<00:44,  1.03it/s][A

loss: tensor(0.6673, device='cuda:0', grad_fn=<NllLossBackward>)



 21%|██        | 12/57 [00:11<00:44,  1.00it/s][A

loss: tensor(0.6986, device='cuda:0', grad_fn=<NllLossBackward>)



 23%|██▎       | 13/57 [00:12<00:43,  1.02it/s][A

loss: tensor(0.6606, device='cuda:0', grad_fn=<NllLossBackward>)



 25%|██▍       | 14/57 [00:13<00:43,  1.00s/it][A

loss: tensor(0.6581, device='cuda:0', grad_fn=<NllLossBackward>)



 26%|██▋       | 15/57 [00:14<00:41,  1.02it/s][A

loss: tensor(0.6356, device='cuda:0', grad_fn=<NllLossBackward>)



 28%|██▊       | 16/57 [00:15<00:41,  1.01s/it][A

loss: tensor(0.6978, device='cuda:0', grad_fn=<NllLossBackward>)



 30%|██▉       | 17/57 [00:16<00:39,  1.01it/s][A

loss: tensor(0.8294, device='cuda:0', grad_fn=<NllLossBackward>)



 32%|███▏      | 18/57 [00:17<00:38,  1.02it/s][A

loss: tensor(0.6447, device='cuda:0', grad_fn=<NllLossBackward>)



 33%|███▎      | 19/57 [00:18<00:38,  1.01s/it][A

loss: tensor(0.6658, device='cuda:0', grad_fn=<NllLossBackward>)



 35%|███▌      | 20/57 [00:19<00:36,  1.00it/s][A

loss: tensor(0.6158, device='cuda:0', grad_fn=<NllLossBackward>)



 37%|███▋      | 21/57 [00:20<00:36,  1.02s/it][A

loss: tensor(0.6147, device='cuda:0', grad_fn=<NllLossBackward>)



 39%|███▊      | 22/57 [00:21<00:35,  1.00s/it][A

loss: tensor(0.5473, device='cuda:0', grad_fn=<NllLossBackward>)



 40%|████      | 23/57 [00:22<00:34,  1.02s/it][A

loss: tensor(0.6946, device='cuda:0', grad_fn=<NllLossBackward>)



 42%|████▏     | 24/57 [00:23<00:33,  1.01s/it][A

loss: tensor(0.5904, device='cuda:0', grad_fn=<NllLossBackward>)



 44%|████▍     | 25/57 [00:24<00:31,  1.01it/s][A

loss: tensor(0.6773, device='cuda:0', grad_fn=<NllLossBackward>)



 46%|████▌     | 26/57 [00:25<00:31,  1.02s/it][A

loss: tensor(0.6548, device='cuda:0', grad_fn=<NllLossBackward>)



 47%|████▋     | 27/57 [00:26<00:30,  1.01s/it][A

loss: tensor(0.6267, device='cuda:0', grad_fn=<NllLossBackward>)



 49%|████▉     | 28/57 [00:27<00:29,  1.03s/it][A

loss: tensor(0.5701, device='cuda:0', grad_fn=<NllLossBackward>)



 51%|█████     | 29/57 [00:28<00:28,  1.01s/it][A

loss: tensor(0.6697, device='cuda:0', grad_fn=<NllLossBackward>)



 53%|█████▎    | 30/57 [00:29<00:26,  1.00it/s][A

loss: tensor(0.6078, device='cuda:0', grad_fn=<NllLossBackward>)



 54%|█████▍    | 31/57 [00:31<00:26,  1.02s/it][A

loss: tensor(0.6113, device='cuda:0', grad_fn=<NllLossBackward>)



 56%|█████▌    | 32/57 [00:31<00:25,  1.00s/it][A

loss: tensor(0.7327, device='cuda:0', grad_fn=<NllLossBackward>)



 58%|█████▊    | 33/57 [00:33<00:24,  1.03s/it][A

loss: tensor(0.5753, device='cuda:0', grad_fn=<NllLossBackward>)



 60%|█████▉    | 34/57 [00:34<00:23,  1.01s/it][A

loss: tensor(0.6089, device='cuda:0', grad_fn=<NllLossBackward>)



 61%|██████▏   | 35/57 [00:35<00:22,  1.03s/it][A

loss: tensor(0.5697, device='cuda:0', grad_fn=<NllLossBackward>)



 63%|██████▎   | 36/57 [00:36<00:21,  1.01s/it][A

loss: tensor(0.5560, device='cuda:0', grad_fn=<NllLossBackward>)



 65%|██████▍   | 37/57 [00:37<00:19,  1.00it/s][A

loss: tensor(0.6569, device='cuda:0', grad_fn=<NllLossBackward>)



 67%|██████▋   | 38/57 [00:38<00:19,  1.02s/it][A

loss: tensor(0.6754, device='cuda:0', grad_fn=<NllLossBackward>)



 68%|██████▊   | 39/57 [00:39<00:18,  1.00s/it][A

loss: tensor(0.7092, device='cuda:0', grad_fn=<NllLossBackward>)



 70%|███████   | 40/57 [00:40<00:17,  1.03s/it][A

loss: tensor(0.7336, device='cuda:0', grad_fn=<NllLossBackward>)



 72%|███████▏  | 41/57 [00:41<00:16,  1.01s/it][A

loss: tensor(0.6171, device='cuda:0', grad_fn=<NllLossBackward>)



 74%|███████▎  | 42/57 [00:42<00:15,  1.03s/it][A

loss: tensor(0.7588, device='cuda:0', grad_fn=<NllLossBackward>)



 75%|███████▌  | 43/57 [00:43<00:14,  1.01s/it][A

loss: tensor(0.6687, device='cuda:0', grad_fn=<NllLossBackward>)



 77%|███████▋  | 44/57 [00:44<00:12,  1.00it/s][A

loss: tensor(0.6895, device='cuda:0', grad_fn=<NllLossBackward>)



 79%|███████▉  | 45/57 [00:45<00:12,  1.02s/it][A

loss: tensor(0.6770, device='cuda:0', grad_fn=<NllLossBackward>)



 81%|████████  | 46/57 [00:46<00:11,  1.01s/it][A

loss: tensor(0.7297, device='cuda:0', grad_fn=<NllLossBackward>)



 82%|████████▏ | 47/57 [00:47<00:10,  1.03s/it][A

loss: tensor(0.6635, device='cuda:0', grad_fn=<NllLossBackward>)



 84%|████████▍ | 48/57 [00:48<00:09,  1.01s/it][A

loss: tensor(0.7028, device='cuda:0', grad_fn=<NllLossBackward>)



 86%|████████▌ | 49/57 [00:49<00:08,  1.03s/it][A

loss: tensor(0.8088, device='cuda:0', grad_fn=<NllLossBackward>)



 88%|████████▊ | 50/57 [00:50<00:07,  1.01s/it][A

loss: tensor(0.5358, device='cuda:0', grad_fn=<NllLossBackward>)



 89%|████████▉ | 51/57 [00:51<00:05,  1.00it/s][A

loss: tensor(0.7095, device='cuda:0', grad_fn=<NllLossBackward>)



 91%|█████████ | 52/57 [00:52<00:05,  1.02s/it][A

loss: tensor(0.6600, device='cuda:0', grad_fn=<NllLossBackward>)



 93%|█████████▎| 53/57 [00:53<00:04,  1.01s/it][A

loss: tensor(0.6178, device='cuda:0', grad_fn=<NllLossBackward>)



 95%|█████████▍| 54/57 [00:54<00:03,  1.03s/it][A

loss: tensor(0.7208, device='cuda:0', grad_fn=<NllLossBackward>)



 96%|█████████▋| 55/57 [00:55<00:02,  1.01s/it][A

loss: tensor(0.7012, device='cuda:0', grad_fn=<NllLossBackward>)



 98%|█████████▊| 56/57 [00:56<00:00,  1.02it/s][A

loss: tensor(0.7890, device='cuda:0', grad_fn=<NllLossBackward>)



100%|██████████| 57/57 [00:57<00:00,  1.00s/it][A

  0%|          | 0/29 [00:00<?, ?it/s][A
  3%|▎         | 1/29 [00:00<00:04,  6.63it/s][A

loss: tensor(0.7626, device='cuda:0', grad_fn=<NllLossBackward>)

	Training Loss: 0.6554408554445234

	Training acc: 0.8173643155838347

	Training prec: 0.4102329395506033

	Training rec: 0.25832533808845515

	Training f1: 0.2676940476901238

	Current Learning rate:  0.0006285714285714285



  7%|▋         | 2/29 [00:00<00:04,  6.58it/s][A
 10%|█         | 3/29 [00:00<00:04,  6.37it/s][A
 14%|█▍        | 4/29 [00:00<00:03,  6.47it/s][A
 17%|█▋        | 5/29 [00:00<00:03,  6.50it/s][A
 21%|██        | 6/29 [00:00<00:03,  6.55it/s][A
 24%|██▍       | 7/29 [00:01<00:03,  6.57it/s][A
 28%|██▊       | 8/29 [00:01<00:03,  6.59it/s][A
 31%|███       | 9/29 [00:01<00:03,  6.62it/s][A
 34%|███▍      | 10/29 [00:01<00:02,  6.62it/s][A
 38%|███▊      | 11/29 [00:01<00:02,  6.62it/s][A
 41%|████▏     | 12/29 [00:01<00:02,  6.63it/s][A
 45%|████▍     | 13/29 [00:01<00:02,  6.60it/s][A
 48%|████▊     | 14/29 [00:02<00:02,  6.59it/s][A
 52%|█████▏    | 15/29 [00:02<00:02,  6.59it/s][A
 55%|█████▌    | 16/29 [00:02<00:01,  6.58it/s][A
 59%|█████▊    | 17/29 [00:02<00:01,  6.57it/s][A
 62%|██████▏   | 18/29 [00:02<00:01,  6.57it/s][A
 66%|██████▌   | 19/29 [00:02<00:01,  6.60it/s][A
 69%|██████▉   | 20/29 [00:03<00:01,  6.61it/s][A
 72%|███████▏  | 21/29 [00:03<00:01,  


	Validation Loss: 0.6902989190200279

	Validation acc: 0.80169074614018

	Validation prec: 0.3416177725678616

	Validation rec: 0.2742612376788639

	Validation f1: 0.27602396522161465



  2%|▏         | 1/57 [00:01<01:00,  1.07s/it][A

loss: tensor(0.6858, device='cuda:0', grad_fn=<NllLossBackward>)



  4%|▎         | 2/57 [00:01<00:53,  1.02it/s][A

loss: tensor(0.7032, device='cuda:0', grad_fn=<NllLossBackward>)



  5%|▌         | 3/57 [00:02<00:51,  1.05it/s][A

loss: tensor(0.6257, device='cuda:0', grad_fn=<NllLossBackward>)



  7%|▋         | 4/57 [00:03<00:52,  1.01it/s][A

loss: tensor(0.6635, device='cuda:0', grad_fn=<NllLossBackward>)



  9%|▉         | 5/57 [00:04<00:51,  1.02it/s][A

loss: tensor(0.7825, device='cuda:0', grad_fn=<NllLossBackward>)



 11%|█         | 6/57 [00:05<00:51,  1.01s/it][A

loss: tensor(0.5694, device='cuda:0', grad_fn=<NllLossBackward>)



 12%|█▏        | 7/57 [00:06<00:49,  1.01it/s][A

loss: tensor(0.5034, device='cuda:0', grad_fn=<NllLossBackward>)



 14%|█▍        | 8/57 [00:07<00:48,  1.02it/s][A

loss: tensor(0.6241, device='cuda:0', grad_fn=<NllLossBackward>)



 16%|█▌        | 9/57 [00:08<00:48,  1.01s/it][A

loss: tensor(0.6572, device='cuda:0', grad_fn=<NllLossBackward>)



 18%|█▊        | 10/57 [00:09<00:46,  1.00it/s][A

loss: tensor(0.6462, device='cuda:0', grad_fn=<NllLossBackward>)



 19%|█▉        | 11/57 [00:11<00:46,  1.02s/it][A

loss: tensor(0.7021, device='cuda:0', grad_fn=<NllLossBackward>)



 21%|██        | 12/57 [00:11<00:45,  1.00s/it][A

loss: tensor(0.5612, device='cuda:0', grad_fn=<NllLossBackward>)



 23%|██▎       | 13/57 [00:13<00:44,  1.02s/it][A

loss: tensor(0.7568, device='cuda:0', grad_fn=<NllLossBackward>)



 25%|██▍       | 14/57 [00:14<00:43,  1.00s/it][A

loss: tensor(0.7796, device='cuda:0', grad_fn=<NllLossBackward>)



 26%|██▋       | 15/57 [00:14<00:41,  1.01it/s][A

loss: tensor(0.8165, device='cuda:0', grad_fn=<NllLossBackward>)



 28%|██▊       | 16/57 [00:16<00:41,  1.02s/it][A

loss: tensor(0.6069, device='cuda:0', grad_fn=<NllLossBackward>)



 30%|██▉       | 17/57 [00:16<00:39,  1.01it/s][A

loss: tensor(0.7189, device='cuda:0', grad_fn=<NllLossBackward>)



 32%|███▏      | 18/57 [00:18<00:39,  1.01s/it][A

loss: tensor(0.6092, device='cuda:0', grad_fn=<NllLossBackward>)



 33%|███▎      | 19/57 [00:18<00:37,  1.01it/s][A

loss: tensor(0.5681, device='cuda:0', grad_fn=<NllLossBackward>)



 35%|███▌      | 20/57 [00:20<00:37,  1.01s/it][A

loss: tensor(0.6305, device='cuda:0', grad_fn=<NllLossBackward>)



 37%|███▋      | 21/57 [00:20<00:35,  1.02it/s][A

loss: tensor(0.7615, device='cuda:0', grad_fn=<NllLossBackward>)



 39%|███▊      | 22/57 [00:21<00:33,  1.03it/s][A

loss: tensor(0.6078, device='cuda:0', grad_fn=<NllLossBackward>)



 40%|████      | 23/57 [00:22<00:33,  1.01it/s][A

loss: tensor(0.8232, device='cuda:0', grad_fn=<NllLossBackward>)



 42%|████▏     | 24/57 [00:23<00:32,  1.02it/s][A

loss: tensor(0.6931, device='cuda:0', grad_fn=<NllLossBackward>)



 44%|████▍     | 25/57 [00:24<00:31,  1.00it/s][A

loss: tensor(0.5867, device='cuda:0', grad_fn=<NllLossBackward>)



 46%|████▌     | 26/57 [00:25<00:30,  1.02it/s][A

loss: tensor(0.5622, device='cuda:0', grad_fn=<NllLossBackward>)



 47%|████▋     | 27/57 [00:26<00:29,  1.00it/s][A

loss: tensor(0.5446, device='cuda:0', grad_fn=<NllLossBackward>)



 49%|████▉     | 28/57 [00:27<00:28,  1.02it/s][A

loss: tensor(0.6831, device='cuda:0', grad_fn=<NllLossBackward>)



 51%|█████     | 29/57 [00:28<00:27,  1.04it/s][A

loss: tensor(0.7478, device='cuda:0', grad_fn=<NllLossBackward>)



 53%|█████▎    | 30/57 [00:29<00:26,  1.01it/s][A

loss: tensor(0.6346, device='cuda:0', grad_fn=<NllLossBackward>)



 54%|█████▍    | 31/57 [00:30<00:25,  1.03it/s][A

loss: tensor(0.5726, device='cuda:0', grad_fn=<NllLossBackward>)



 56%|█████▌    | 32/57 [00:31<00:24,  1.00it/s][A

loss: tensor(0.6028, device='cuda:0', grad_fn=<NllLossBackward>)



 58%|█████▊    | 33/57 [00:32<00:23,  1.02it/s][A

loss: tensor(0.6462, device='cuda:0', grad_fn=<NllLossBackward>)



 60%|█████▉    | 34/57 [00:33<00:22,  1.03it/s][A

loss: tensor(0.5955, device='cuda:0', grad_fn=<NllLossBackward>)



 61%|██████▏   | 35/57 [00:34<00:21,  1.01it/s][A

loss: tensor(0.6171, device='cuda:0', grad_fn=<NllLossBackward>)



 63%|██████▎   | 36/57 [00:35<00:20,  1.03it/s][A

loss: tensor(0.6047, device='cuda:0', grad_fn=<NllLossBackward>)



 65%|██████▍   | 37/57 [00:36<00:19,  1.00it/s][A

loss: tensor(0.7231, device='cuda:0', grad_fn=<NllLossBackward>)



 67%|██████▋   | 38/57 [00:37<00:18,  1.02it/s][A

loss: tensor(0.5760, device='cuda:0', grad_fn=<NllLossBackward>)



 68%|██████▊   | 39/57 [00:38<00:17,  1.00it/s][A

loss: tensor(0.6465, device='cuda:0', grad_fn=<NllLossBackward>)



 70%|███████   | 40/57 [00:39<00:16,  1.02it/s][A

loss: tensor(0.5290, device='cuda:0', grad_fn=<NllLossBackward>)



 72%|███████▏  | 41/57 [00:40<00:15,  1.04it/s][A

loss: tensor(0.6455, device='cuda:0', grad_fn=<NllLossBackward>)



 74%|███████▎  | 42/57 [00:41<00:14,  1.01it/s][A

loss: tensor(0.8163, device='cuda:0', grad_fn=<NllLossBackward>)



 75%|███████▌  | 43/57 [00:42<00:13,  1.03it/s][A

loss: tensor(0.5807, device='cuda:0', grad_fn=<NllLossBackward>)



 77%|███████▋  | 44/57 [00:43<00:12,  1.00it/s][A

loss: tensor(0.6461, device='cuda:0', grad_fn=<NllLossBackward>)



 79%|███████▉  | 45/57 [00:44<00:11,  1.02it/s][A

loss: tensor(0.7060, device='cuda:0', grad_fn=<NllLossBackward>)



 81%|████████  | 46/57 [00:45<00:11,  1.00s/it][A

loss: tensor(0.6450, device='cuda:0', grad_fn=<NllLossBackward>)



 82%|████████▏ | 47/57 [00:46<00:09,  1.02it/s][A

loss: tensor(0.7227, device='cuda:0', grad_fn=<NllLossBackward>)



 84%|████████▍ | 48/57 [00:47<00:08,  1.04it/s][A

loss: tensor(0.7192, device='cuda:0', grad_fn=<NllLossBackward>)



 86%|████████▌ | 49/57 [00:48<00:07,  1.01it/s][A

loss: tensor(0.6221, device='cuda:0', grad_fn=<NllLossBackward>)



 88%|████████▊ | 50/57 [00:49<00:06,  1.02it/s][A

loss: tensor(0.5835, device='cuda:0', grad_fn=<NllLossBackward>)



 89%|████████▉ | 51/57 [00:50<00:05,  1.00it/s][A

loss: tensor(0.5462, device='cuda:0', grad_fn=<NllLossBackward>)



 91%|█████████ | 52/57 [00:51<00:04,  1.02it/s][A

loss: tensor(0.5400, device='cuda:0', grad_fn=<NllLossBackward>)



 93%|█████████▎| 53/57 [00:52<00:03,  1.01it/s][A

loss: tensor(0.6077, device='cuda:0', grad_fn=<NllLossBackward>)



 95%|█████████▍| 54/57 [00:53<00:02,  1.02it/s][A

loss: tensor(0.5980, device='cuda:0', grad_fn=<NllLossBackward>)



 96%|█████████▋| 55/57 [00:54<00:01,  1.04it/s][A

loss: tensor(0.6622, device='cuda:0', grad_fn=<NllLossBackward>)



 98%|█████████▊| 56/57 [00:55<00:00,  1.01it/s][A

loss: tensor(0.9283, device='cuda:0', grad_fn=<NllLossBackward>)



100%|██████████| 57/57 [00:56<00:00,  1.02it/s][A

  0%|          | 0/29 [00:00<?, ?it/s][A
  3%|▎         | 1/29 [00:00<00:04,  6.63it/s][A

loss: tensor(0.7959, device='cuda:0', grad_fn=<NllLossBackward>)

	Training Loss: 0.6549876457766483

	Training acc: 0.8147359733872259

	Training prec: 0.3787437530258805

	Training rec: 0.2544903570975765

	Training f1: 0.2612351035032541

	Current Learning rate:  0.0006



  7%|▋         | 2/29 [00:00<00:04,  6.42it/s][A
 10%|█         | 3/29 [00:00<00:03,  6.53it/s][A
 14%|█▍        | 4/29 [00:00<00:03,  6.55it/s][A
 17%|█▋        | 5/29 [00:00<00:03,  6.56it/s][A
 21%|██        | 6/29 [00:00<00:03,  6.62it/s][A
 24%|██▍       | 7/29 [00:01<00:03,  6.61it/s][A
 28%|██▊       | 8/29 [00:01<00:03,  6.59it/s][A
 31%|███       | 9/29 [00:01<00:03,  6.62it/s][A
 34%|███▍      | 10/29 [00:01<00:02,  6.64it/s][A
 38%|███▊      | 11/29 [00:01<00:02,  6.63it/s][A
 41%|████▏     | 12/29 [00:01<00:02,  6.68it/s][A
 45%|████▍     | 13/29 [00:01<00:02,  6.69it/s][A
 48%|████▊     | 14/29 [00:02<00:02,  6.62it/s][A
 52%|█████▏    | 15/29 [00:02<00:02,  6.65it/s][A
 55%|█████▌    | 16/29 [00:02<00:01,  6.69it/s][A
 59%|█████▊    | 17/29 [00:02<00:01,  6.69it/s][A
 62%|██████▏   | 18/29 [00:02<00:01,  6.72it/s][A
 66%|██████▌   | 19/29 [00:02<00:01,  6.74it/s][A
 69%|██████▉   | 20/29 [00:03<00:01,  6.69it/s][A
 72%|███████▏  | 21/29 [00:03<00:01,  


	Validation Loss: 0.6834793306630234

	Validation acc: 0.8042026717475999

	Validation prec: 0.3431488649248814

	Validation rec: 0.27076369515947585

	Validation f1: 0.27383495418785214



  2%|▏         | 1/57 [00:01<00:59,  1.06s/it][A

loss: tensor(0.7606, device='cuda:0', grad_fn=<NllLossBackward>)



  4%|▎         | 2/57 [00:01<00:54,  1.01it/s][A

loss: tensor(0.6876, device='cuda:0', grad_fn=<NllLossBackward>)



  5%|▌         | 3/57 [00:03<00:54,  1.02s/it][A

loss: tensor(0.7911, device='cuda:0', grad_fn=<NllLossBackward>)



  7%|▋         | 4/57 [00:03<00:52,  1.02it/s][A

loss: tensor(0.6757, device='cuda:0', grad_fn=<NllLossBackward>)



  9%|▉         | 5/57 [00:05<00:52,  1.00s/it][A

loss: tensor(0.5420, device='cuda:0', grad_fn=<NllLossBackward>)



 11%|█         | 6/57 [00:05<00:50,  1.02it/s][A

loss: tensor(0.6580, device='cuda:0', grad_fn=<NllLossBackward>)



 12%|█▏        | 7/57 [00:06<00:48,  1.03it/s][A

loss: tensor(0.6127, device='cuda:0', grad_fn=<NllLossBackward>)



 14%|█▍        | 8/57 [00:07<00:48,  1.00it/s][A

loss: tensor(0.5049, device='cuda:0', grad_fn=<NllLossBackward>)



 16%|█▌        | 9/57 [00:08<00:46,  1.02it/s][A

loss: tensor(0.7205, device='cuda:0', grad_fn=<NllLossBackward>)



 18%|█▊        | 10/57 [00:09<00:46,  1.00it/s][A

loss: tensor(0.7223, device='cuda:0', grad_fn=<NllLossBackward>)



 19%|█▉        | 11/57 [00:10<00:44,  1.02it/s][A

loss: tensor(0.5942, device='cuda:0', grad_fn=<NllLossBackward>)



 21%|██        | 12/57 [00:11<00:43,  1.04it/s][A

loss: tensor(0.7461, device='cuda:0', grad_fn=<NllLossBackward>)



 23%|██▎       | 13/57 [00:12<00:43,  1.01it/s][A

loss: tensor(0.8657, device='cuda:0', grad_fn=<NllLossBackward>)



 25%|██▍       | 14/57 [00:13<00:41,  1.03it/s][A

loss: tensor(0.8258, device='cuda:0', grad_fn=<NllLossBackward>)



 26%|██▋       | 15/57 [00:14<00:41,  1.00it/s][A

loss: tensor(0.6320, device='cuda:0', grad_fn=<NllLossBackward>)



 28%|██▊       | 16/57 [00:15<00:40,  1.02it/s][A

loss: tensor(0.6506, device='cuda:0', grad_fn=<NllLossBackward>)



 30%|██▉       | 17/57 [00:16<00:39,  1.00it/s][A

loss: tensor(0.7343, device='cuda:0', grad_fn=<NllLossBackward>)



 32%|███▏      | 18/57 [00:17<00:38,  1.02it/s][A

loss: tensor(0.8391, device='cuda:0', grad_fn=<NllLossBackward>)



 33%|███▎      | 19/57 [00:18<00:36,  1.03it/s][A

loss: tensor(0.6534, device='cuda:0', grad_fn=<NllLossBackward>)



 35%|███▌      | 20/57 [00:19<00:36,  1.01it/s][A

loss: tensor(0.6565, device='cuda:0', grad_fn=<NllLossBackward>)



 37%|███▋      | 21/57 [00:20<00:35,  1.02it/s][A

loss: tensor(0.5874, device='cuda:0', grad_fn=<NllLossBackward>)



 39%|███▊      | 22/57 [00:21<00:35,  1.00s/it][A

loss: tensor(0.5858, device='cuda:0', grad_fn=<NllLossBackward>)



 40%|████      | 23/57 [00:22<00:33,  1.02it/s][A

loss: tensor(0.6672, device='cuda:0', grad_fn=<NllLossBackward>)



 42%|████▏     | 24/57 [00:23<00:33,  1.00s/it][A

loss: tensor(0.6038, device='cuda:0', grad_fn=<NllLossBackward>)



 44%|████▍     | 25/57 [00:24<00:31,  1.02it/s][A

loss: tensor(0.8054, device='cuda:0', grad_fn=<NllLossBackward>)



 46%|████▌     | 26/57 [00:25<00:29,  1.03it/s][A

loss: tensor(0.7629, device='cuda:0', grad_fn=<NllLossBackward>)



 47%|████▋     | 27/57 [00:26<00:29,  1.01it/s][A

loss: tensor(0.5671, device='cuda:0', grad_fn=<NllLossBackward>)



 49%|████▉     | 28/57 [00:27<00:28,  1.03it/s][A

loss: tensor(0.6090, device='cuda:0', grad_fn=<NllLossBackward>)



 51%|█████     | 29/57 [00:28<00:27,  1.00it/s][A

loss: tensor(0.4733, device='cuda:0', grad_fn=<NllLossBackward>)



 53%|█████▎    | 30/57 [00:29<00:26,  1.02it/s][A

loss: tensor(0.6943, device='cuda:0', grad_fn=<NllLossBackward>)



 54%|█████▍    | 31/57 [00:30<00:25,  1.01it/s][A

loss: tensor(0.8605, device='cuda:0', grad_fn=<NllLossBackward>)



 56%|█████▌    | 32/57 [00:31<00:24,  1.02it/s][A

loss: tensor(0.6446, device='cuda:0', grad_fn=<NllLossBackward>)



 58%|█████▊    | 33/57 [00:32<00:23,  1.04it/s][A

loss: tensor(0.4966, device='cuda:0', grad_fn=<NllLossBackward>)



 60%|█████▉    | 34/57 [00:33<00:22,  1.01it/s][A

loss: tensor(0.5038, device='cuda:0', grad_fn=<NllLossBackward>)



 61%|██████▏   | 35/57 [00:34<00:21,  1.03it/s][A

loss: tensor(0.6916, device='cuda:0', grad_fn=<NllLossBackward>)



 63%|██████▎   | 36/57 [00:35<00:20,  1.00it/s][A

loss: tensor(0.4904, device='cuda:0', grad_fn=<NllLossBackward>)



 65%|██████▍   | 37/57 [00:36<00:19,  1.02it/s][A

loss: tensor(0.5707, device='cuda:0', grad_fn=<NllLossBackward>)



 67%|██████▋   | 38/57 [00:37<00:18,  1.04it/s][A

loss: tensor(0.5760, device='cuda:0', grad_fn=<NllLossBackward>)



 68%|██████▊   | 39/57 [00:38<00:17,  1.01it/s][A

loss: tensor(0.6293, device='cuda:0', grad_fn=<NllLossBackward>)



 70%|███████   | 40/57 [00:39<00:16,  1.03it/s][A

loss: tensor(0.6691, device='cuda:0', grad_fn=<NllLossBackward>)



 72%|███████▏  | 41/57 [00:40<00:15,  1.00it/s][A

loss: tensor(0.4983, device='cuda:0', grad_fn=<NllLossBackward>)



 74%|███████▎  | 42/57 [00:41<00:14,  1.02it/s][A

loss: tensor(0.7680, device='cuda:0', grad_fn=<NllLossBackward>)



 75%|███████▌  | 43/57 [00:42<00:14,  1.00s/it][A

loss: tensor(0.7075, device='cuda:0', grad_fn=<NllLossBackward>)



 77%|███████▋  | 44/57 [00:43<00:12,  1.02it/s][A

loss: tensor(0.7062, device='cuda:0', grad_fn=<NllLossBackward>)



 79%|███████▉  | 45/57 [00:44<00:11,  1.03it/s][A

loss: tensor(0.6715, device='cuda:0', grad_fn=<NllLossBackward>)



 81%|████████  | 46/57 [00:45<00:10,  1.01it/s][A

loss: tensor(0.7226, device='cuda:0', grad_fn=<NllLossBackward>)



 82%|████████▏ | 47/57 [00:46<00:09,  1.03it/s][A

loss: tensor(0.6625, device='cuda:0', grad_fn=<NllLossBackward>)



 84%|████████▍ | 48/57 [00:47<00:08,  1.00it/s][A

loss: tensor(0.6053, device='cuda:0', grad_fn=<NllLossBackward>)



 86%|████████▌ | 49/57 [00:48<00:07,  1.02it/s][A

loss: tensor(0.5449, device='cuda:0', grad_fn=<NllLossBackward>)



 88%|████████▊ | 50/57 [00:49<00:07,  1.00s/it][A

loss: tensor(0.7570, device='cuda:0', grad_fn=<NllLossBackward>)



 89%|████████▉ | 51/57 [00:50<00:05,  1.02it/s][A

loss: tensor(0.7137, device='cuda:0', grad_fn=<NllLossBackward>)



 91%|█████████ | 52/57 [00:51<00:04,  1.04it/s][A

loss: tensor(0.7438, device='cuda:0', grad_fn=<NllLossBackward>)



 93%|█████████▎| 53/57 [00:52<00:03,  1.01it/s][A

loss: tensor(0.6563, device='cuda:0', grad_fn=<NllLossBackward>)



 95%|█████████▍| 54/57 [00:53<00:02,  1.03it/s][A

loss: tensor(0.8160, device='cuda:0', grad_fn=<NllLossBackward>)



 96%|█████████▋| 55/57 [00:54<00:01,  1.00it/s][A

loss: tensor(0.5928, device='cuda:0', grad_fn=<NllLossBackward>)



 98%|█████████▊| 56/57 [00:55<00:00,  1.02it/s][A

loss: tensor(0.6367, device='cuda:0', grad_fn=<NllLossBackward>)



100%|██████████| 57/57 [00:55<00:00,  1.02it/s][A

  0%|          | 0/29 [00:00<?, ?it/s][A
  3%|▎         | 1/29 [00:00<00:04,  6.74it/s][A

loss: tensor(0.6178, device='cuda:0', grad_fn=<NllLossBackward>)

	Training Loss: 0.6628523505570596

	Training acc: 0.8133006898728005

	Training prec: 0.40892625514161507

	Training rec: 0.25729105036864436

	Training f1: 0.2659170175011855

	Current Learning rate:  0.0005714285714285714



  7%|▋         | 2/29 [00:00<00:04,  6.69it/s][A
 10%|█         | 3/29 [00:00<00:03,  6.67it/s][A
 14%|█▍        | 4/29 [00:00<00:03,  6.67it/s][A
 17%|█▋        | 5/29 [00:00<00:03,  6.72it/s][A
 21%|██        | 6/29 [00:00<00:03,  6.71it/s][A
 24%|██▍       | 7/29 [00:01<00:03,  6.54it/s][A
 28%|██▊       | 8/29 [00:01<00:03,  6.52it/s][A
 31%|███       | 9/29 [00:01<00:03,  6.53it/s][A
 34%|███▍      | 10/29 [00:01<00:03,  5.45it/s][A
 38%|███▊      | 11/29 [00:01<00:03,  5.65it/s][A
 41%|████▏     | 12/29 [00:01<00:02,  5.94it/s][A
 45%|████▍     | 13/29 [00:02<00:02,  6.17it/s][A
 48%|████▊     | 14/29 [00:02<00:02,  6.36it/s][A
 52%|█████▏    | 15/29 [00:02<00:02,  6.48it/s][A
 55%|█████▌    | 16/29 [00:02<00:01,  6.58it/s][A
 59%|█████▊    | 17/29 [00:02<00:01,  6.64it/s][A
 62%|██████▏   | 18/29 [00:02<00:01,  6.66it/s][A
 66%|██████▌   | 19/29 [00:02<00:01,  6.70it/s][A
 69%|██████▉   | 20/29 [00:03<00:01,  6.73it/s][A
 72%|███████▏  | 21/29 [00:03<00:01,  


	Validation Loss: 0.6944304129173016

	Validation acc: 0.8017866921101758

	Validation prec: 0.34066749172819316

	Validation rec: 0.2745659487331224

	Validation f1: 0.2771263731817965



  2%|▏         | 1/57 [00:00<00:52,  1.06it/s][A

loss: tensor(0.7523, device='cuda:0', grad_fn=<NllLossBackward>)



  4%|▎         | 2/57 [00:02<00:56,  1.03s/it][A

loss: tensor(0.6510, device='cuda:0', grad_fn=<NllLossBackward>)



  5%|▌         | 3/57 [00:02<00:53,  1.01it/s][A

loss: tensor(0.6183, device='cuda:0', grad_fn=<NllLossBackward>)



  7%|▋         | 4/57 [00:03<00:51,  1.02it/s][A

loss: tensor(0.6646, device='cuda:0', grad_fn=<NllLossBackward>)



  9%|▉         | 5/57 [00:05<00:53,  1.03s/it][A

loss: tensor(0.6400, device='cuda:0', grad_fn=<NllLossBackward>)



 11%|█         | 6/57 [00:05<00:50,  1.01it/s][A

loss: tensor(0.5347, device='cuda:0', grad_fn=<NllLossBackward>)



 12%|█▏        | 7/57 [00:07<00:51,  1.03s/it][A

loss: tensor(0.7585, device='cuda:0', grad_fn=<NllLossBackward>)



 14%|█▍        | 8/57 [00:08<00:49,  1.01s/it][A

loss: tensor(0.5986, device='cuda:0', grad_fn=<NllLossBackward>)



 16%|█▌        | 9/57 [00:09<00:48,  1.02s/it][A

loss: tensor(0.8273, device='cuda:0', grad_fn=<NllLossBackward>)



 18%|█▊        | 10/57 [00:10<00:47,  1.00s/it][A

loss: tensor(0.6829, device='cuda:0', grad_fn=<NllLossBackward>)



 19%|█▉        | 11/57 [00:11<00:45,  1.00it/s][A

loss: tensor(0.8549, device='cuda:0', grad_fn=<NllLossBackward>)



 21%|██        | 12/57 [00:12<00:45,  1.02s/it][A

loss: tensor(0.6767, device='cuda:0', grad_fn=<NllLossBackward>)



 23%|██▎       | 13/57 [00:13<00:45,  1.03s/it][A

loss: tensor(0.7428, device='cuda:0', grad_fn=<NllLossBackward>)



 25%|██▍       | 14/57 [00:14<00:45,  1.07s/it][A

loss: tensor(0.6573, device='cuda:0', grad_fn=<NllLossBackward>)



 26%|██▋       | 15/57 [00:15<00:44,  1.05s/it][A

loss: tensor(0.6729, device='cuda:0', grad_fn=<NllLossBackward>)



 28%|██▊       | 16/57 [00:16<00:42,  1.04s/it][A

loss: tensor(0.6639, device='cuda:0', grad_fn=<NllLossBackward>)



 30%|██▉       | 17/57 [00:17<00:42,  1.06s/it][A

loss: tensor(0.5876, device='cuda:0', grad_fn=<NllLossBackward>)



 32%|███▏      | 18/57 [00:18<00:40,  1.03s/it][A

loss: tensor(0.5228, device='cuda:0', grad_fn=<NllLossBackward>)



 33%|███▎      | 19/57 [00:19<00:39,  1.04s/it][A

loss: tensor(0.6459, device='cuda:0', grad_fn=<NllLossBackward>)



 35%|███▌      | 20/57 [00:20<00:37,  1.02s/it][A

loss: tensor(0.5231, device='cuda:0', grad_fn=<NllLossBackward>)



 37%|███▋      | 21/57 [00:21<00:37,  1.04s/it][A

loss: tensor(0.7300, device='cuda:0', grad_fn=<NllLossBackward>)



 39%|███▊      | 22/57 [00:22<00:35,  1.01s/it][A

loss: tensor(0.8527, device='cuda:0', grad_fn=<NllLossBackward>)



 40%|████      | 23/57 [00:23<00:33,  1.00it/s][A

loss: tensor(0.5428, device='cuda:0', grad_fn=<NllLossBackward>)



 42%|████▏     | 24/57 [00:24<00:33,  1.02s/it][A

loss: tensor(0.7727, device='cuda:0', grad_fn=<NllLossBackward>)



 44%|████▍     | 25/57 [00:25<00:32,  1.01s/it][A

loss: tensor(0.6106, device='cuda:0', grad_fn=<NllLossBackward>)



 46%|████▌     | 26/57 [00:26<00:33,  1.07s/it][A

loss: tensor(0.6653, device='cuda:0', grad_fn=<NllLossBackward>)



 47%|████▋     | 27/57 [00:27<00:31,  1.04s/it][A

loss: tensor(0.6462, device='cuda:0', grad_fn=<NllLossBackward>)



 49%|████▉     | 28/57 [00:28<00:30,  1.04s/it][A

loss: tensor(0.6357, device='cuda:0', grad_fn=<NllLossBackward>)



 51%|█████     | 29/57 [00:29<00:28,  1.01s/it][A

loss: tensor(0.5202, device='cuda:0', grad_fn=<NllLossBackward>)



 53%|█████▎    | 30/57 [00:30<00:26,  1.00it/s][A

loss: tensor(0.5837, device='cuda:0', grad_fn=<NllLossBackward>)



 54%|█████▍    | 31/57 [00:31<00:26,  1.02s/it][A

loss: tensor(0.5820, device='cuda:0', grad_fn=<NllLossBackward>)



 56%|█████▌    | 32/57 [00:32<00:24,  1.01it/s][A

loss: tensor(0.7726, device='cuda:0', grad_fn=<NllLossBackward>)



 58%|█████▊    | 33/57 [00:33<00:24,  1.01s/it][A

loss: tensor(0.7712, device='cuda:0', grad_fn=<NllLossBackward>)



 60%|█████▉    | 34/57 [00:34<00:22,  1.01it/s][A

loss: tensor(0.5652, device='cuda:0', grad_fn=<NllLossBackward>)



 61%|██████▏   | 35/57 [00:35<00:22,  1.01s/it][A

loss: tensor(0.7849, device='cuda:0', grad_fn=<NllLossBackward>)



 63%|██████▎   | 36/57 [00:36<00:20,  1.01it/s][A

loss: tensor(0.6165, device='cuda:0', grad_fn=<NllLossBackward>)



 65%|██████▍   | 37/57 [00:37<00:19,  1.00it/s][A

loss: tensor(0.7533, device='cuda:0', grad_fn=<NllLossBackward>)



 67%|██████▋   | 38/57 [00:38<00:19,  1.04s/it][A

loss: tensor(0.6454, device='cuda:0', grad_fn=<NllLossBackward>)



 68%|██████▊   | 39/57 [00:39<00:18,  1.03s/it][A

loss: tensor(0.6810, device='cuda:0', grad_fn=<NllLossBackward>)



 70%|███████   | 40/57 [00:40<00:18,  1.07s/it][A

loss: tensor(0.5709, device='cuda:0', grad_fn=<NllLossBackward>)



 72%|███████▏  | 41/57 [00:42<00:17,  1.07s/it][A

loss: tensor(0.6541, device='cuda:0', grad_fn=<NllLossBackward>)



 74%|███████▎  | 42/57 [00:43<00:16,  1.08s/it][A

loss: tensor(0.5981, device='cuda:0', grad_fn=<NllLossBackward>)



 75%|███████▌  | 43/57 [00:44<00:16,  1.15s/it][A

loss: tensor(0.5777, device='cuda:0', grad_fn=<NllLossBackward>)



 77%|███████▋  | 44/57 [00:45<00:14,  1.12s/it][A

loss: tensor(0.5562, device='cuda:0', grad_fn=<NllLossBackward>)



 79%|███████▉  | 45/57 [00:46<00:13,  1.12s/it][A

loss: tensor(0.5040, device='cuda:0', grad_fn=<NllLossBackward>)



 81%|████████  | 46/57 [00:47<00:11,  1.08s/it][A

loss: tensor(0.7013, device='cuda:0', grad_fn=<NllLossBackward>)



 82%|████████▏ | 47/57 [00:48<00:10,  1.08s/it][A

loss: tensor(0.9051, device='cuda:0', grad_fn=<NllLossBackward>)



 84%|████████▍ | 48/57 [00:49<00:09,  1.06s/it][A

loss: tensor(0.5099, device='cuda:0', grad_fn=<NllLossBackward>)



 86%|████████▌ | 49/57 [00:50<00:08,  1.05s/it][A

loss: tensor(0.5944, device='cuda:0', grad_fn=<NllLossBackward>)



 88%|████████▊ | 50/57 [00:51<00:07,  1.06s/it][A

loss: tensor(0.6238, device='cuda:0', grad_fn=<NllLossBackward>)



 89%|████████▉ | 51/57 [00:52<00:06,  1.03s/it][A

loss: tensor(0.5738, device='cuda:0', grad_fn=<NllLossBackward>)



 91%|█████████ | 52/57 [00:53<00:05,  1.05s/it][A

loss: tensor(0.6113, device='cuda:0', grad_fn=<NllLossBackward>)



 93%|█████████▎| 53/57 [00:54<00:04,  1.02s/it][A

loss: tensor(0.7263, device='cuda:0', grad_fn=<NllLossBackward>)



 95%|█████████▍| 54/57 [00:55<00:03,  1.04s/it][A

loss: tensor(0.6843, device='cuda:0', grad_fn=<NllLossBackward>)



 96%|█████████▋| 55/57 [00:56<00:02,  1.01s/it][A

loss: tensor(0.6012, device='cuda:0', grad_fn=<NllLossBackward>)



 98%|█████████▊| 56/57 [00:57<00:00,  1.01it/s][A

loss: tensor(0.5205, device='cuda:0', grad_fn=<NllLossBackward>)



100%|██████████| 57/57 [00:58<00:00,  1.03s/it][A

  0%|          | 0/29 [00:00<?, ?it/s][A
  3%|▎         | 1/29 [00:00<00:03,  7.00it/s][A

loss: tensor(0.7263, device='cuda:0', grad_fn=<NllLossBackward>)

	Training Loss: 0.6534611750067326

	Training acc: 0.8149680546083357

	Training prec: 0.3652539441514742

	Training rec: 0.25490955373090446

	Training f1: 0.2611978422647115

	Current Learning rate:  0.0005428571428571428



  7%|▋         | 2/29 [00:00<00:03,  6.96it/s][A
 10%|█         | 3/29 [00:00<00:03,  6.94it/s][A
 14%|█▍        | 4/29 [00:00<00:03,  6.94it/s][A
 17%|█▋        | 5/29 [00:00<00:03,  6.94it/s][A
 21%|██        | 6/29 [00:00<00:03,  6.93it/s][A
 24%|██▍       | 7/29 [00:01<00:03,  6.89it/s][A
 28%|██▊       | 8/29 [00:01<00:03,  6.90it/s][A
 31%|███       | 9/29 [00:01<00:02,  6.91it/s][A
 34%|███▍      | 10/29 [00:01<00:02,  6.93it/s][A
 38%|███▊      | 11/29 [00:01<00:02,  6.90it/s][A
 41%|████▏     | 12/29 [00:01<00:02,  6.85it/s][A
 45%|████▍     | 13/29 [00:01<00:02,  6.85it/s][A
 48%|████▊     | 14/29 [00:02<00:02,  6.83it/s][A
 52%|█████▏    | 15/29 [00:02<00:02,  6.84it/s][A
 55%|█████▌    | 16/29 [00:02<00:01,  6.82it/s][A
 59%|█████▊    | 17/29 [00:02<00:01,  6.84it/s][A
 62%|██████▏   | 18/29 [00:02<00:01,  6.84it/s][A
 66%|██████▌   | 19/29 [00:02<00:01,  6.84it/s][A
 69%|██████▉   | 20/29 [00:02<00:01,  6.84it/s][A
 72%|███████▏  | 21/29 [00:03<00:01,  


	Validation Loss: 0.7058015496566378

	Validation acc: 0.8023319602309641

	Validation prec: 0.3098885748807133

	Validation rec: 0.26306569473969915

	Validation f1: 0.2616402636239501



  2%|▏         | 1/57 [00:00<00:51,  1.08it/s][A

loss: tensor(0.5867, device='cuda:0', grad_fn=<NllLossBackward>)



  4%|▎         | 2/57 [00:01<00:53,  1.02it/s][A

loss: tensor(0.6729, device='cuda:0', grad_fn=<NllLossBackward>)



  5%|▌         | 3/57 [00:02<00:52,  1.02it/s][A

loss: tensor(0.6269, device='cuda:0', grad_fn=<NllLossBackward>)



  7%|▋         | 4/57 [00:04<00:54,  1.02s/it][A

loss: tensor(0.6899, device='cuda:0', grad_fn=<NllLossBackward>)



  9%|▉         | 5/57 [00:04<00:52,  1.00s/it][A

loss: tensor(0.7004, device='cuda:0', grad_fn=<NllLossBackward>)



 11%|█         | 6/57 [00:06<00:52,  1.04s/it][A

loss: tensor(0.7040, device='cuda:0', grad_fn=<NllLossBackward>)



 12%|█▏        | 7/57 [00:07<00:50,  1.01s/it][A

loss: tensor(0.7149, device='cuda:0', grad_fn=<NllLossBackward>)



 14%|█▍        | 8/57 [00:08<00:49,  1.00s/it][A

loss: tensor(0.6755, device='cuda:0', grad_fn=<NllLossBackward>)



 16%|█▌        | 9/57 [00:09<00:49,  1.04s/it][A

loss: tensor(0.7104, device='cuda:0', grad_fn=<NllLossBackward>)



 18%|█▊        | 10/57 [00:10<00:48,  1.02s/it][A

loss: tensor(0.6599, device='cuda:0', grad_fn=<NllLossBackward>)



 19%|█▉        | 11/57 [00:11<00:48,  1.05s/it][A

loss: tensor(0.6185, device='cuda:0', grad_fn=<NllLossBackward>)



 21%|██        | 12/57 [00:12<00:46,  1.03s/it][A

loss: tensor(0.6029, device='cuda:0', grad_fn=<NllLossBackward>)



 23%|██▎       | 13/57 [00:13<00:44,  1.02s/it][A

loss: tensor(0.7124, device='cuda:0', grad_fn=<NllLossBackward>)



 25%|██▍       | 14/57 [00:14<00:44,  1.04s/it][A

loss: tensor(0.5338, device='cuda:0', grad_fn=<NllLossBackward>)



 26%|██▋       | 15/57 [00:15<00:43,  1.03s/it][A

loss: tensor(0.5771, device='cuda:0', grad_fn=<NllLossBackward>)



 28%|██▊       | 16/57 [00:16<00:43,  1.07s/it][A

loss: tensor(0.5749, device='cuda:0', grad_fn=<NllLossBackward>)



 30%|██▉       | 17/57 [00:17<00:42,  1.05s/it][A

loss: tensor(0.6928, device='cuda:0', grad_fn=<NllLossBackward>)



 32%|███▏      | 18/57 [00:18<00:41,  1.07s/it][A

loss: tensor(0.6337, device='cuda:0', grad_fn=<NllLossBackward>)



 33%|███▎      | 19/57 [00:19<00:39,  1.05s/it][A

loss: tensor(0.6191, device='cuda:0', grad_fn=<NllLossBackward>)



 35%|███▌      | 20/57 [00:20<00:38,  1.04s/it][A

loss: tensor(0.7952, device='cuda:0', grad_fn=<NllLossBackward>)



 37%|███▋      | 21/57 [00:21<00:38,  1.07s/it][A

loss: tensor(0.6535, device='cuda:0', grad_fn=<NllLossBackward>)



 39%|███▊      | 22/57 [00:22<00:36,  1.05s/it][A

loss: tensor(0.7133, device='cuda:0', grad_fn=<NllLossBackward>)



 40%|████      | 23/57 [00:23<00:36,  1.07s/it][A

loss: tensor(0.5061, device='cuda:0', grad_fn=<NllLossBackward>)



 42%|████▏     | 24/57 [00:24<00:34,  1.05s/it][A

loss: tensor(0.7234, device='cuda:0', grad_fn=<NllLossBackward>)



 44%|████▍     | 25/57 [00:25<00:33,  1.05s/it][A

loss: tensor(0.5657, device='cuda:0', grad_fn=<NllLossBackward>)



 46%|████▌     | 26/57 [00:26<00:31,  1.03s/it][A

loss: tensor(0.6212, device='cuda:0', grad_fn=<NllLossBackward>)



 47%|████▋     | 27/57 [00:27<00:30,  1.02s/it][A

loss: tensor(0.6448, device='cuda:0', grad_fn=<NllLossBackward>)



 49%|████▉     | 28/57 [00:29<00:30,  1.04s/it][A

loss: tensor(0.5748, device='cuda:0', grad_fn=<NllLossBackward>)



 51%|█████     | 29/57 [00:30<00:28,  1.03s/it][A

loss: tensor(0.8548, device='cuda:0', grad_fn=<NllLossBackward>)



 53%|█████▎    | 30/57 [00:31<00:28,  1.05s/it][A

loss: tensor(0.6939, device='cuda:0', grad_fn=<NllLossBackward>)



 54%|█████▍    | 31/57 [00:32<00:26,  1.03s/it][A

loss: tensor(0.5862, device='cuda:0', grad_fn=<NllLossBackward>)



 56%|█████▌    | 32/57 [00:33<00:26,  1.06s/it][A

loss: tensor(0.7814, device='cuda:0', grad_fn=<NllLossBackward>)



 58%|█████▊    | 33/57 [00:34<00:25,  1.05s/it][A

loss: tensor(0.6709, device='cuda:0', grad_fn=<NllLossBackward>)



 60%|█████▉    | 34/57 [00:35<00:23,  1.03s/it][A

loss: tensor(0.6726, device='cuda:0', grad_fn=<NllLossBackward>)



 61%|██████▏   | 35/57 [00:36<00:23,  1.06s/it][A

loss: tensor(0.6190, device='cuda:0', grad_fn=<NllLossBackward>)



 63%|██████▎   | 36/57 [00:37<00:21,  1.04s/it][A

loss: tensor(0.6897, device='cuda:0', grad_fn=<NllLossBackward>)



 65%|██████▍   | 37/57 [00:38<00:21,  1.06s/it][A

loss: tensor(0.7646, device='cuda:0', grad_fn=<NllLossBackward>)



 67%|██████▋   | 38/57 [00:39<00:19,  1.04s/it][A

loss: tensor(0.7037, device='cuda:0', grad_fn=<NllLossBackward>)



 68%|██████▊   | 39/57 [00:40<00:18,  1.04s/it][A

loss: tensor(0.6558, device='cuda:0', grad_fn=<NllLossBackward>)



 70%|███████   | 40/57 [00:41<00:18,  1.07s/it][A

loss: tensor(0.5993, device='cuda:0', grad_fn=<NllLossBackward>)



 72%|███████▏  | 41/57 [00:42<00:16,  1.05s/it][A

loss: tensor(0.6784, device='cuda:0', grad_fn=<NllLossBackward>)



 74%|███████▎  | 42/57 [00:43<00:16,  1.07s/it][A

loss: tensor(0.6595, device='cuda:0', grad_fn=<NllLossBackward>)



 75%|███████▌  | 43/57 [00:44<00:14,  1.06s/it][A

loss: tensor(0.7490, device='cuda:0', grad_fn=<NllLossBackward>)



 77%|███████▋  | 44/57 [00:45<00:13,  1.06s/it][A

loss: tensor(0.7647, device='cuda:0', grad_fn=<NllLossBackward>)



 79%|███████▉  | 45/57 [00:46<00:12,  1.03s/it][A

loss: tensor(0.6192, device='cuda:0', grad_fn=<NllLossBackward>)



 81%|████████  | 46/57 [00:47<00:11,  1.02s/it][A

loss: tensor(0.7536, device='cuda:0', grad_fn=<NllLossBackward>)



 82%|████████▏ | 47/57 [00:48<00:10,  1.02s/it][A

loss: tensor(0.6232, device='cuda:0', grad_fn=<NllLossBackward>)



 84%|████████▍ | 48/57 [00:49<00:08,  1.00it/s][A

loss: tensor(0.6282, device='cuda:0', grad_fn=<NllLossBackward>)



 86%|████████▌ | 49/57 [00:50<00:08,  1.02s/it][A

loss: tensor(0.5206, device='cuda:0', grad_fn=<NllLossBackward>)



 88%|████████▊ | 50/57 [00:51<00:06,  1.00it/s][A

loss: tensor(0.6147, device='cuda:0', grad_fn=<NllLossBackward>)



 89%|████████▉ | 51/57 [00:52<00:06,  1.02s/it][A

loss: tensor(0.5476, device='cuda:0', grad_fn=<NllLossBackward>)



 91%|█████████ | 52/57 [00:53<00:04,  1.01it/s][A

loss: tensor(0.5026, device='cuda:0', grad_fn=<NllLossBackward>)



 93%|█████████▎| 53/57 [00:54<00:03,  1.02it/s][A

loss: tensor(0.4014, device='cuda:0', grad_fn=<NllLossBackward>)



 95%|█████████▍| 54/57 [00:55<00:03,  1.01s/it][A

loss: tensor(0.8871, device='cuda:0', grad_fn=<NllLossBackward>)



 96%|█████████▋| 55/57 [00:56<00:01,  1.01it/s][A

loss: tensor(0.7422, device='cuda:0', grad_fn=<NllLossBackward>)



 98%|█████████▊| 56/57 [00:57<00:01,  1.01s/it][A

loss: tensor(0.5523, device='cuda:0', grad_fn=<NllLossBackward>)



100%|██████████| 57/57 [00:58<00:00,  1.03s/it][A

  0%|          | 0/29 [00:00<?, ?it/s][A
  3%|▎         | 1/29 [00:00<00:04,  6.88it/s][A

loss: tensor(0.5869, device='cuda:0', grad_fn=<NllLossBackward>)

	Training Loss: 0.6531200769700503

	Training acc: 0.8155740295915741

	Training prec: 0.3990632501632075

	Training rec: 0.2615308794973291

	Training f1: 0.27003460018996206

	Current Learning rate:  0.0005142857142857142



  7%|▋         | 2/29 [00:00<00:03,  6.87it/s][A
 10%|█         | 3/29 [00:00<00:03,  6.87it/s][A
 14%|█▍        | 4/29 [00:00<00:03,  6.87it/s][A
 17%|█▋        | 5/29 [00:00<00:03,  6.87it/s][A
 21%|██        | 6/29 [00:00<00:03,  6.82it/s][A
 24%|██▍       | 7/29 [00:01<00:03,  6.81it/s][A
 28%|██▊       | 8/29 [00:01<00:03,  6.81it/s][A
 31%|███       | 9/29 [00:01<00:02,  6.81it/s][A
 34%|███▍      | 10/29 [00:01<00:02,  6.79it/s][A
 38%|███▊      | 11/29 [00:01<00:02,  6.79it/s][A
 41%|████▏     | 12/29 [00:01<00:02,  6.82it/s][A
 45%|████▍     | 13/29 [00:01<00:02,  6.81it/s][A
 48%|████▊     | 14/29 [00:02<00:02,  6.77it/s][A
 52%|█████▏    | 15/29 [00:02<00:02,  6.77it/s][A
 55%|█████▌    | 16/29 [00:02<00:01,  6.80it/s][A
 59%|█████▊    | 17/29 [00:02<00:01,  6.81it/s][A
 62%|██████▏   | 18/29 [00:02<00:01,  6.80it/s][A
 66%|██████▌   | 19/29 [00:02<00:01,  6.79it/s][A
 69%|██████▉   | 20/29 [00:02<00:01,  6.77it/s][A
 72%|███████▏  | 21/29 [00:03<00:01,  


	Validation Loss: 0.720523839366847

	Validation acc: 0.7981709425945719

	Validation prec: 0.3005505585230667

	Validation rec: 0.24614132907701014

	Validation f1: 0.2443252967158202



  2%|▏         | 1/57 [00:01<00:59,  1.06s/it][A

loss: tensor(0.5330, device='cuda:0', grad_fn=<NllLossBackward>)



  4%|▎         | 2/57 [00:02<00:55,  1.01s/it][A

loss: tensor(0.6168, device='cuda:0', grad_fn=<NllLossBackward>)



  5%|▌         | 3/57 [00:03<00:56,  1.04s/it][A

loss: tensor(0.7061, device='cuda:0', grad_fn=<NllLossBackward>)



  7%|▋         | 4/57 [00:04<00:53,  1.01s/it][A

loss: tensor(0.5702, device='cuda:0', grad_fn=<NllLossBackward>)



  9%|▉         | 5/57 [00:05<00:51,  1.02it/s][A

loss: tensor(0.7402, device='cuda:0', grad_fn=<NllLossBackward>)



 11%|█         | 6/57 [00:06<00:51,  1.01s/it][A

loss: tensor(0.5658, device='cuda:0', grad_fn=<NllLossBackward>)



 12%|█▏        | 7/57 [00:07<00:49,  1.01it/s][A

loss: tensor(0.6140, device='cuda:0', grad_fn=<NllLossBackward>)



 14%|█▍        | 8/57 [00:08<00:49,  1.02s/it][A

loss: tensor(0.5948, device='cuda:0', grad_fn=<NllLossBackward>)



 16%|█▌        | 9/57 [00:09<00:47,  1.00it/s][A

loss: tensor(0.6154, device='cuda:0', grad_fn=<NllLossBackward>)



 18%|█▊        | 10/57 [00:10<00:47,  1.02s/it][A

loss: tensor(0.6534, device='cuda:0', grad_fn=<NllLossBackward>)



 19%|█▉        | 11/57 [00:11<00:45,  1.00it/s][A

loss: tensor(0.7439, device='cuda:0', grad_fn=<NllLossBackward>)



 21%|██        | 12/57 [00:12<00:44,  1.02it/s][A

loss: tensor(0.6056, device='cuda:0', grad_fn=<NllLossBackward>)



 23%|██▎       | 13/57 [00:13<00:44,  1.01s/it][A

loss: tensor(0.6661, device='cuda:0', grad_fn=<NllLossBackward>)



 25%|██▍       | 14/57 [00:14<00:42,  1.00it/s][A

loss: tensor(0.8094, device='cuda:0', grad_fn=<NllLossBackward>)



 26%|██▋       | 15/57 [00:15<00:43,  1.03s/it][A

loss: tensor(0.6525, device='cuda:0', grad_fn=<NllLossBackward>)



 28%|██▊       | 16/57 [00:16<00:41,  1.02s/it][A

loss: tensor(0.5513, device='cuda:0', grad_fn=<NllLossBackward>)



 30%|██▉       | 17/57 [00:17<00:40,  1.01s/it][A

loss: tensor(0.6499, device='cuda:0', grad_fn=<NllLossBackward>)



 32%|███▏      | 18/57 [00:18<00:40,  1.04s/it][A

loss: tensor(0.6005, device='cuda:0', grad_fn=<NllLossBackward>)



 33%|███▎      | 19/57 [00:19<00:38,  1.02s/it][A

loss: tensor(0.6806, device='cuda:0', grad_fn=<NllLossBackward>)



 35%|███▌      | 20/57 [00:20<00:38,  1.04s/it][A

loss: tensor(0.6654, device='cuda:0', grad_fn=<NllLossBackward>)



 37%|███▋      | 21/57 [00:21<00:37,  1.04s/it][A

loss: tensor(0.5721, device='cuda:0', grad_fn=<NllLossBackward>)



 39%|███▊      | 22/57 [00:22<00:37,  1.07s/it][A

loss: tensor(0.7564, device='cuda:0', grad_fn=<NllLossBackward>)



 40%|████      | 23/57 [00:23<00:36,  1.08s/it][A

loss: tensor(0.6350, device='cuda:0', grad_fn=<NllLossBackward>)



 42%|████▏     | 24/57 [00:24<00:34,  1.04s/it][A

loss: tensor(0.6148, device='cuda:0', grad_fn=<NllLossBackward>)



 44%|████▍     | 25/57 [00:25<00:33,  1.06s/it][A

loss: tensor(0.7167, device='cuda:0', grad_fn=<NllLossBackward>)



 46%|████▌     | 26/57 [00:26<00:31,  1.03s/it][A

loss: tensor(0.5203, device='cuda:0', grad_fn=<NllLossBackward>)



 47%|████▋     | 27/57 [00:27<00:31,  1.05s/it][A

loss: tensor(0.7650, device='cuda:0', grad_fn=<NllLossBackward>)



 49%|████▉     | 28/57 [00:28<00:29,  1.02s/it][A

loss: tensor(0.5190, device='cuda:0', grad_fn=<NllLossBackward>)



 51%|█████     | 29/57 [00:29<00:29,  1.04s/it][A

loss: tensor(0.5981, device='cuda:0', grad_fn=<NllLossBackward>)



 53%|█████▎    | 30/57 [00:30<00:27,  1.01s/it][A

loss: tensor(0.6459, device='cuda:0', grad_fn=<NllLossBackward>)



 54%|█████▍    | 31/57 [00:31<00:25,  1.01it/s][A

loss: tensor(0.6020, device='cuda:0', grad_fn=<NllLossBackward>)



 56%|█████▌    | 32/57 [00:32<00:25,  1.01s/it][A

loss: tensor(0.6422, device='cuda:0', grad_fn=<NllLossBackward>)



 58%|█████▊    | 33/57 [00:33<00:23,  1.00it/s][A

loss: tensor(0.7898, device='cuda:0', grad_fn=<NllLossBackward>)



 60%|█████▉    | 34/57 [00:34<00:23,  1.02s/it][A

loss: tensor(0.7311, device='cuda:0', grad_fn=<NllLossBackward>)



 61%|██████▏   | 35/57 [00:35<00:22,  1.00s/it][A

loss: tensor(0.5368, device='cuda:0', grad_fn=<NllLossBackward>)



 63%|██████▎   | 36/57 [00:36<00:21,  1.01s/it][A

loss: tensor(0.7263, device='cuda:0', grad_fn=<NllLossBackward>)



 65%|██████▍   | 37/57 [00:37<00:19,  1.00it/s][A

loss: tensor(0.7199, device='cuda:0', grad_fn=<NllLossBackward>)



 67%|██████▋   | 38/57 [00:38<00:18,  1.02it/s][A

loss: tensor(0.6262, device='cuda:0', grad_fn=<NllLossBackward>)



 68%|██████▊   | 39/57 [00:39<00:19,  1.07s/it][A

loss: tensor(0.5878, device='cuda:0', grad_fn=<NllLossBackward>)



 70%|███████   | 40/57 [00:40<00:18,  1.08s/it][A

loss: tensor(0.6349, device='cuda:0', grad_fn=<NllLossBackward>)



 72%|███████▏  | 41/57 [00:42<00:18,  1.13s/it][A

loss: tensor(0.6277, device='cuda:0', grad_fn=<NllLossBackward>)



 74%|███████▎  | 42/57 [00:43<00:16,  1.12s/it][A

loss: tensor(0.6988, device='cuda:0', grad_fn=<NllLossBackward>)



 75%|███████▌  | 43/57 [00:44<00:16,  1.15s/it][A

loss: tensor(0.6374, device='cuda:0', grad_fn=<NllLossBackward>)



 77%|███████▋  | 44/57 [00:45<00:15,  1.19s/it][A

loss: tensor(0.5903, device='cuda:0', grad_fn=<NllLossBackward>)



 79%|███████▉  | 45/57 [00:46<00:13,  1.16s/it][A

loss: tensor(0.6515, device='cuda:0', grad_fn=<NllLossBackward>)



 81%|████████  | 46/57 [00:48<00:12,  1.14s/it][A

loss: tensor(0.6415, device='cuda:0', grad_fn=<NllLossBackward>)



 82%|████████▏ | 47/57 [00:49<00:11,  1.10s/it][A

loss: tensor(0.7237, device='cuda:0', grad_fn=<NllLossBackward>)



 84%|████████▍ | 48/57 [00:50<00:09,  1.10s/it][A

loss: tensor(0.6917, device='cuda:0', grad_fn=<NllLossBackward>)



 86%|████████▌ | 49/57 [00:51<00:08,  1.07s/it][A

loss: tensor(0.6886, device='cuda:0', grad_fn=<NllLossBackward>)



 88%|████████▊ | 50/57 [00:52<00:07,  1.04s/it][A

loss: tensor(0.6057, device='cuda:0', grad_fn=<NllLossBackward>)



 89%|████████▉ | 51/57 [00:53<00:06,  1.06s/it][A

loss: tensor(0.6188, device='cuda:0', grad_fn=<NllLossBackward>)



 91%|█████████ | 52/57 [00:54<00:05,  1.04s/it][A

loss: tensor(0.5889, device='cuda:0', grad_fn=<NllLossBackward>)



 93%|█████████▎| 53/57 [00:55<00:04,  1.06s/it][A

loss: tensor(0.6964, device='cuda:0', grad_fn=<NllLossBackward>)



 95%|█████████▍| 54/57 [00:56<00:03,  1.03s/it][A

loss: tensor(0.5111, device='cuda:0', grad_fn=<NllLossBackward>)



 96%|█████████▋| 55/57 [00:57<00:02,  1.05s/it][A

loss: tensor(0.7415, device='cuda:0', grad_fn=<NllLossBackward>)



 98%|█████████▊| 56/57 [00:58<00:01,  1.02s/it][A

loss: tensor(0.6009, device='cuda:0', grad_fn=<NllLossBackward>)



100%|██████████| 57/57 [00:59<00:00,  1.04s/it][A

  0%|          | 0/29 [00:00<?, ?it/s][A
  3%|▎         | 1/29 [00:00<00:04,  6.59it/s][A

loss: tensor(0.7570, device='cuda:0', grad_fn=<NllLossBackward>)

	Training Loss: 0.6464288224253738

	Training acc: 0.8162105616791605

	Training prec: 0.4152137858283523

	Training rec: 0.2619532311807537

	Training f1: 0.2734553643769061

	Current Learning rate:  0.0004857142857142857



  7%|▋         | 2/29 [00:00<00:04,  6.59it/s][A
 10%|█         | 3/29 [00:00<00:03,  6.62it/s][A
 14%|█▍        | 4/29 [00:00<00:03,  6.62it/s][A
 17%|█▋        | 5/29 [00:00<00:03,  6.53it/s][A
 21%|██        | 6/29 [00:00<00:03,  6.58it/s][A
 24%|██▍       | 7/29 [00:01<00:03,  6.61it/s][A
 28%|██▊       | 8/29 [00:01<00:03,  6.62it/s][A
 31%|███       | 9/29 [00:01<00:03,  6.60it/s][A
 34%|███▍      | 10/29 [00:01<00:02,  6.60it/s][A
 38%|███▊      | 11/29 [00:01<00:02,  6.61it/s][A
 41%|████▏     | 12/29 [00:01<00:02,  6.60it/s][A
 45%|████▍     | 13/29 [00:01<00:02,  6.53it/s][A
 48%|████▊     | 14/29 [00:02<00:02,  6.59it/s][A
 52%|█████▏    | 15/29 [00:02<00:02,  6.61it/s][A
 55%|█████▌    | 16/29 [00:02<00:01,  6.64it/s][A
 59%|█████▊    | 17/29 [00:02<00:02,  5.43it/s][A
 62%|██████▏   | 18/29 [00:02<00:01,  5.77it/s][A
 66%|██████▌   | 19/29 [00:02<00:01,  6.02it/s][A
 69%|██████▉   | 20/29 [00:03<00:01,  6.22it/s][A
 72%|███████▏  | 21/29 [00:03<00:01,  


	Validation Loss: 0.7298030390821654

	Validation acc: 0.7932867913273247

	Validation prec: 0.32621973265521736

	Validation rec: 0.24456222840022684

	Validation f1: 0.24454873472369715



  2%|▏         | 1/57 [00:00<00:55,  1.01it/s][A

loss: tensor(0.7546, device='cuda:0', grad_fn=<NllLossBackward>)



  4%|▎         | 2/57 [00:01<00:53,  1.02it/s][A

loss: tensor(0.6699, device='cuda:0', grad_fn=<NllLossBackward>)



  5%|▌         | 3/57 [00:03<00:55,  1.02s/it][A

loss: tensor(0.6664, device='cuda:0', grad_fn=<NllLossBackward>)



  7%|▋         | 4/57 [00:04<00:53,  1.01s/it][A

loss: tensor(0.7258, device='cuda:0', grad_fn=<NllLossBackward>)



  9%|▉         | 5/57 [00:05<00:54,  1.04s/it][A

loss: tensor(0.5271, device='cuda:0', grad_fn=<NllLossBackward>)



 11%|█         | 6/57 [00:06<00:51,  1.02s/it][A

loss: tensor(0.8216, device='cuda:0', grad_fn=<NllLossBackward>)



 12%|█▏        | 7/57 [00:07<00:52,  1.05s/it][A

loss: tensor(0.6194, device='cuda:0', grad_fn=<NllLossBackward>)



 14%|█▍        | 8/57 [00:08<00:50,  1.02s/it][A

loss: tensor(0.6957, device='cuda:0', grad_fn=<NllLossBackward>)



 16%|█▌        | 9/57 [00:09<00:48,  1.00s/it][A

loss: tensor(0.5740, device='cuda:0', grad_fn=<NllLossBackward>)



 18%|█▊        | 10/57 [00:10<00:48,  1.02s/it][A

loss: tensor(0.5663, device='cuda:0', grad_fn=<NllLossBackward>)



 19%|█▉        | 11/57 [00:11<00:46,  1.01s/it][A

loss: tensor(0.6793, device='cuda:0', grad_fn=<NllLossBackward>)



 21%|██        | 12/57 [00:12<00:46,  1.02s/it][A

loss: tensor(0.6177, device='cuda:0', grad_fn=<NllLossBackward>)



 23%|██▎       | 13/57 [00:13<00:44,  1.00s/it][A

loss: tensor(0.5107, device='cuda:0', grad_fn=<NllLossBackward>)



 25%|██▍       | 14/57 [00:14<00:43,  1.02s/it][A

loss: tensor(0.8140, device='cuda:0', grad_fn=<NllLossBackward>)



 26%|██▋       | 15/57 [00:15<00:42,  1.00s/it][A

loss: tensor(0.5927, device='cuda:0', grad_fn=<NllLossBackward>)



 28%|██▊       | 16/57 [00:16<00:40,  1.00it/s][A

loss: tensor(0.7853, device='cuda:0', grad_fn=<NllLossBackward>)



 30%|██▉       | 17/57 [00:17<00:40,  1.02s/it][A

loss: tensor(0.6971, device='cuda:0', grad_fn=<NllLossBackward>)



 32%|███▏      | 18/57 [00:18<00:39,  1.01s/it][A

loss: tensor(0.5731, device='cuda:0', grad_fn=<NllLossBackward>)



 33%|███▎      | 19/57 [00:19<00:39,  1.04s/it][A

loss: tensor(0.6655, device='cuda:0', grad_fn=<NllLossBackward>)



 35%|███▌      | 20/57 [00:20<00:37,  1.02s/it][A

loss: tensor(0.6425, device='cuda:0', grad_fn=<NllLossBackward>)



 37%|███▋      | 21/57 [00:21<00:37,  1.04s/it][A

loss: tensor(0.5554, device='cuda:0', grad_fn=<NllLossBackward>)



 39%|███▊      | 22/57 [00:22<00:35,  1.02s/it][A

loss: tensor(0.6452, device='cuda:0', grad_fn=<NllLossBackward>)



 40%|████      | 23/57 [00:23<00:34,  1.01s/it][A

loss: tensor(0.6579, device='cuda:0', grad_fn=<NllLossBackward>)



 42%|████▏     | 24/57 [00:24<00:34,  1.04s/it][A

loss: tensor(0.6629, device='cuda:0', grad_fn=<NllLossBackward>)



 44%|████▍     | 25/57 [00:25<00:32,  1.03s/it][A

loss: tensor(0.5422, device='cuda:0', grad_fn=<NllLossBackward>)



 46%|████▌     | 26/57 [00:26<00:33,  1.09s/it][A

loss: tensor(0.4963, device='cuda:0', grad_fn=<NllLossBackward>)



 47%|████▋     | 27/57 [00:27<00:33,  1.10s/it][A

loss: tensor(0.7104, device='cuda:0', grad_fn=<NllLossBackward>)



 49%|████▉     | 28/57 [00:29<00:32,  1.11s/it][A

loss: tensor(0.6669, device='cuda:0', grad_fn=<NllLossBackward>)



 51%|█████     | 29/57 [00:30<00:31,  1.14s/it][A

loss: tensor(0.5031, device='cuda:0', grad_fn=<NllLossBackward>)



 53%|█████▎    | 30/57 [00:31<00:30,  1.14s/it][A

loss: tensor(0.6272, device='cuda:0', grad_fn=<NllLossBackward>)



 54%|█████▍    | 31/57 [00:32<00:30,  1.17s/it][A

loss: tensor(0.7733, device='cuda:0', grad_fn=<NllLossBackward>)



 56%|█████▌    | 32/57 [00:33<00:28,  1.14s/it][A

loss: tensor(0.6029, device='cuda:0', grad_fn=<NllLossBackward>)



 58%|█████▊    | 33/57 [00:34<00:27,  1.13s/it][A

loss: tensor(0.7559, device='cuda:0', grad_fn=<NllLossBackward>)



 60%|█████▉    | 34/57 [00:35<00:24,  1.08s/it][A

loss: tensor(0.5815, device='cuda:0', grad_fn=<NllLossBackward>)



 61%|██████▏   | 35/57 [00:36<00:23,  1.05s/it][A

loss: tensor(0.5127, device='cuda:0', grad_fn=<NllLossBackward>)



 63%|██████▎   | 36/57 [00:37<00:22,  1.06s/it][A

loss: tensor(0.5517, device='cuda:0', grad_fn=<NllLossBackward>)



 65%|██████▍   | 37/57 [00:38<00:20,  1.04s/it][A

loss: tensor(0.6663, device='cuda:0', grad_fn=<NllLossBackward>)



 67%|██████▋   | 38/57 [00:39<00:19,  1.05s/it][A

loss: tensor(0.5915, device='cuda:0', grad_fn=<NllLossBackward>)



 68%|██████▊   | 39/57 [00:40<00:18,  1.02s/it][A

loss: tensor(0.6438, device='cuda:0', grad_fn=<NllLossBackward>)



 70%|███████   | 40/57 [00:41<00:17,  1.04s/it][A

loss: tensor(0.6113, device='cuda:0', grad_fn=<NllLossBackward>)



 72%|███████▏  | 41/57 [00:42<00:16,  1.01s/it][A

loss: tensor(0.6857, device='cuda:0', grad_fn=<NllLossBackward>)



 74%|███████▎  | 42/57 [00:43<00:14,  1.01it/s][A

loss: tensor(0.6941, device='cuda:0', grad_fn=<NllLossBackward>)



 75%|███████▌  | 43/57 [00:44<00:14,  1.02s/it][A

loss: tensor(0.6398, device='cuda:0', grad_fn=<NllLossBackward>)



 77%|███████▋  | 44/57 [00:45<00:12,  1.00it/s][A

loss: tensor(0.7158, device='cuda:0', grad_fn=<NllLossBackward>)



 79%|███████▉  | 45/57 [00:46<00:12,  1.02s/it][A

loss: tensor(0.6978, device='cuda:0', grad_fn=<NllLossBackward>)



 81%|████████  | 46/57 [00:47<00:10,  1.00it/s][A

loss: tensor(0.6876, device='cuda:0', grad_fn=<NllLossBackward>)



 82%|████████▏ | 47/57 [00:48<00:10,  1.01s/it][A

loss: tensor(0.6721, device='cuda:0', grad_fn=<NllLossBackward>)



 84%|████████▍ | 48/57 [00:49<00:08,  1.00it/s][A

loss: tensor(0.7222, device='cuda:0', grad_fn=<NllLossBackward>)



 86%|████████▌ | 49/57 [00:50<00:07,  1.02it/s][A

loss: tensor(0.5833, device='cuda:0', grad_fn=<NllLossBackward>)



 88%|████████▊ | 50/57 [00:51<00:07,  1.02s/it][A

loss: tensor(0.6878, device='cuda:0', grad_fn=<NllLossBackward>)



 89%|████████▉ | 51/57 [00:52<00:06,  1.02s/it][A

loss: tensor(0.6243, device='cuda:0', grad_fn=<NllLossBackward>)



 91%|█████████ | 52/57 [00:54<00:05,  1.05s/it][A

loss: tensor(0.5070, device='cuda:0', grad_fn=<NllLossBackward>)



 93%|█████████▎| 53/57 [00:54<00:04,  1.03s/it][A

loss: tensor(0.8822, device='cuda:0', grad_fn=<NllLossBackward>)



 95%|█████████▍| 54/57 [00:55<00:03,  1.02s/it][A

loss: tensor(0.8243, device='cuda:0', grad_fn=<NllLossBackward>)



 96%|█████████▋| 55/57 [00:57<00:02,  1.04s/it][A

loss: tensor(0.7361, device='cuda:0', grad_fn=<NllLossBackward>)



 98%|█████████▊| 56/57 [00:58<00:01,  1.02s/it][A

loss: tensor(0.5804, device='cuda:0', grad_fn=<NllLossBackward>)



100%|██████████| 57/57 [00:58<00:00,  1.03s/it][A

  0%|          | 0/29 [00:00<?, ?it/s][A
  3%|▎         | 1/29 [00:00<00:04,  6.38it/s][A

loss: tensor(0.7902, device='cuda:0', grad_fn=<NllLossBackward>)

	Training Loss: 0.6541746577672791

	Training acc: 0.8144285376718163

	Training prec: 0.41543930612601615

	Training rec: 0.25601901507549535

	Training f1: 0.2645278024870855

	Current Learning rate:  0.00045714285714285713



  7%|▋         | 2/29 [00:00<00:04,  6.57it/s][A
 10%|█         | 3/29 [00:00<00:03,  6.63it/s][A
 14%|█▍        | 4/29 [00:00<00:03,  6.67it/s][A
 17%|█▋        | 5/29 [00:00<00:03,  6.69it/s][A
 21%|██        | 6/29 [00:00<00:03,  6.71it/s][A
 24%|██▍       | 7/29 [00:01<00:03,  6.71it/s][A
 28%|██▊       | 8/29 [00:01<00:03,  6.73it/s][A
 31%|███       | 9/29 [00:01<00:03,  6.52it/s][A
 34%|███▍      | 10/29 [00:01<00:02,  6.57it/s][A
 38%|███▊      | 11/29 [00:01<00:02,  6.61it/s][A
 41%|████▏     | 12/29 [00:01<00:02,  6.64it/s][A
 45%|████▍     | 13/29 [00:01<00:02,  6.64it/s][A
 48%|████▊     | 14/29 [00:02<00:02,  6.63it/s][A
 52%|█████▏    | 15/29 [00:02<00:02,  6.62it/s][A
 55%|█████▌    | 16/29 [00:02<00:01,  6.61it/s][A
 59%|█████▊    | 17/29 [00:02<00:01,  6.65it/s][A
 62%|██████▏   | 18/29 [00:02<00:01,  6.67it/s][A
 66%|██████▌   | 19/29 [00:02<00:01,  6.67it/s][A
 69%|██████▉   | 20/29 [00:03<00:01,  6.66it/s][A
 72%|███████▏  | 21/29 [00:03<00:01,  


	Validation Loss: 0.6933211869206922

	Validation acc: 0.8016917845491427

	Validation prec: 0.32790797710913183

	Validation rec: 0.27360663423615594

	Validation f1: 0.2754439991758462



  2%|▏         | 1/57 [00:00<00:53,  1.04it/s][A

loss: tensor(0.6780, device='cuda:0', grad_fn=<NllLossBackward>)



  4%|▎         | 2/57 [00:02<00:56,  1.03s/it][A

loss: tensor(0.7455, device='cuda:0', grad_fn=<NllLossBackward>)



  5%|▌         | 3/57 [00:03<00:53,  1.00it/s][A

loss: tensor(0.6001, device='cuda:0', grad_fn=<NllLossBackward>)



  7%|▋         | 4/57 [00:04<00:54,  1.03s/it][A

loss: tensor(0.6883, device='cuda:0', grad_fn=<NllLossBackward>)



  9%|▉         | 5/57 [00:05<00:52,  1.00s/it][A

loss: tensor(0.5382, device='cuda:0', grad_fn=<NllLossBackward>)



 11%|█         | 6/57 [00:06<00:50,  1.01it/s][A

loss: tensor(0.7567, device='cuda:0', grad_fn=<NllLossBackward>)



 12%|█▏        | 7/57 [00:07<00:50,  1.02s/it][A

loss: tensor(0.5804, device='cuda:0', grad_fn=<NllLossBackward>)



 14%|█▍        | 8/57 [00:08<00:49,  1.00s/it][A

loss: tensor(0.6923, device='cuda:0', grad_fn=<NllLossBackward>)



 16%|█▌        | 9/57 [00:09<00:49,  1.03s/it][A

loss: tensor(0.6062, device='cuda:0', grad_fn=<NllLossBackward>)



 18%|█▊        | 10/57 [00:10<00:47,  1.02s/it][A

loss: tensor(0.6726, device='cuda:0', grad_fn=<NllLossBackward>)



 19%|█▉        | 11/57 [00:11<00:48,  1.05s/it][A

loss: tensor(0.9484, device='cuda:0', grad_fn=<NllLossBackward>)



 21%|██        | 12/57 [00:12<00:46,  1.03s/it][A

loss: tensor(0.5965, device='cuda:0', grad_fn=<NllLossBackward>)



 23%|██▎       | 13/57 [00:13<00:45,  1.03s/it][A

loss: tensor(0.6838, device='cuda:0', grad_fn=<NllLossBackward>)



 25%|██▍       | 14/57 [00:14<00:46,  1.09s/it][A

loss: tensor(0.5475, device='cuda:0', grad_fn=<NllLossBackward>)



 26%|██▋       | 15/57 [00:15<00:45,  1.09s/it][A

loss: tensor(0.7802, device='cuda:0', grad_fn=<NllLossBackward>)



 28%|██▊       | 16/57 [00:16<00:44,  1.09s/it][A

loss: tensor(0.6161, device='cuda:0', grad_fn=<NllLossBackward>)



 30%|██▉       | 17/57 [00:17<00:42,  1.06s/it][A

loss: tensor(0.5754, device='cuda:0', grad_fn=<NllLossBackward>)



 32%|███▏      | 18/57 [00:18<00:41,  1.06s/it][A

loss: tensor(0.6919, device='cuda:0', grad_fn=<NllLossBackward>)



 33%|███▎      | 19/57 [00:19<00:39,  1.04s/it][A

loss: tensor(0.5713, device='cuda:0', grad_fn=<NllLossBackward>)



 35%|███▌      | 20/57 [00:20<00:38,  1.03s/it][A

loss: tensor(0.6087, device='cuda:0', grad_fn=<NllLossBackward>)



 37%|███▋      | 21/57 [00:21<00:39,  1.09s/it][A

loss: tensor(0.5130, device='cuda:0', grad_fn=<NllLossBackward>)



 39%|███▊      | 22/57 [00:23<00:38,  1.10s/it][A

loss: tensor(0.6117, device='cuda:0', grad_fn=<NllLossBackward>)



 40%|████      | 23/57 [00:24<00:38,  1.12s/it][A

loss: tensor(0.5507, device='cuda:0', grad_fn=<NllLossBackward>)



 42%|████▏     | 24/57 [00:25<00:36,  1.12s/it][A

loss: tensor(0.5935, device='cuda:0', grad_fn=<NllLossBackward>)



 44%|████▍     | 25/57 [00:26<00:36,  1.15s/it][A

loss: tensor(0.6997, device='cuda:0', grad_fn=<NllLossBackward>)



 46%|████▌     | 26/57 [00:27<00:36,  1.18s/it][A

loss: tensor(0.7070, device='cuda:0', grad_fn=<NllLossBackward>)



 47%|████▋     | 27/57 [00:28<00:33,  1.13s/it][A

loss: tensor(0.5328, device='cuda:0', grad_fn=<NllLossBackward>)



 49%|████▉     | 28/57 [00:29<00:32,  1.12s/it][A

loss: tensor(0.8474, device='cuda:0', grad_fn=<NllLossBackward>)



 51%|█████     | 29/57 [00:30<00:30,  1.08s/it][A

loss: tensor(0.6018, device='cuda:0', grad_fn=<NllLossBackward>)



 53%|█████▎    | 30/57 [00:32<00:30,  1.12s/it][A

loss: tensor(0.6519, device='cuda:0', grad_fn=<NllLossBackward>)



 54%|█████▍    | 31/57 [00:33<00:28,  1.10s/it][A

loss: tensor(0.7543, device='cuda:0', grad_fn=<NllLossBackward>)



 56%|█████▌    | 32/57 [00:34<00:28,  1.14s/it][A

loss: tensor(0.6039, device='cuda:0', grad_fn=<NllLossBackward>)



 58%|█████▊    | 33/57 [00:35<00:27,  1.15s/it][A

loss: tensor(0.6542, device='cuda:0', grad_fn=<NllLossBackward>)



 60%|█████▉    | 34/57 [00:36<00:25,  1.11s/it][A

loss: tensor(0.7460, device='cuda:0', grad_fn=<NllLossBackward>)



 61%|██████▏   | 35/57 [00:37<00:24,  1.10s/it][A

loss: tensor(0.7927, device='cuda:0', grad_fn=<NllLossBackward>)



 63%|██████▎   | 36/57 [00:38<00:22,  1.06s/it][A

loss: tensor(0.5978, device='cuda:0', grad_fn=<NllLossBackward>)



 65%|██████▍   | 37/57 [00:39<00:21,  1.06s/it][A

loss: tensor(0.6862, device='cuda:0', grad_fn=<NllLossBackward>)



 67%|██████▋   | 38/57 [00:40<00:19,  1.03s/it][A

loss: tensor(0.6716, device='cuda:0', grad_fn=<NllLossBackward>)



 68%|██████▊   | 39/57 [00:41<00:18,  1.01s/it][A

loss: tensor(0.5589, device='cuda:0', grad_fn=<NllLossBackward>)



 70%|███████   | 40/57 [00:42<00:17,  1.04s/it][A

loss: tensor(0.7241, device='cuda:0', grad_fn=<NllLossBackward>)



 72%|███████▏  | 41/57 [00:43<00:16,  1.02s/it][A

loss: tensor(0.6349, device='cuda:0', grad_fn=<NllLossBackward>)



 74%|███████▎  | 42/57 [00:44<00:15,  1.03s/it][A

loss: tensor(0.5482, device='cuda:0', grad_fn=<NllLossBackward>)



 75%|███████▌  | 43/57 [00:45<00:14,  1.01s/it][A

loss: tensor(0.7257, device='cuda:0', grad_fn=<NllLossBackward>)



 77%|███████▋  | 44/57 [00:46<00:12,  1.01it/s][A

loss: tensor(0.6209, device='cuda:0', grad_fn=<NllLossBackward>)



 79%|███████▉  | 45/57 [00:47<00:12,  1.02s/it][A

loss: tensor(0.5820, device='cuda:0', grad_fn=<NllLossBackward>)



 81%|████████  | 46/57 [00:48<00:11,  1.01s/it][A

loss: tensor(0.6907, device='cuda:0', grad_fn=<NllLossBackward>)



 82%|████████▏ | 47/57 [00:49<00:10,  1.04s/it][A

loss: tensor(0.5377, device='cuda:0', grad_fn=<NllLossBackward>)



 84%|████████▍ | 48/57 [00:50<00:09,  1.01s/it][A

loss: tensor(0.6343, device='cuda:0', grad_fn=<NllLossBackward>)



 86%|████████▌ | 49/57 [00:51<00:08,  1.06s/it][A

loss: tensor(0.6307, device='cuda:0', grad_fn=<NllLossBackward>)



 88%|████████▊ | 50/57 [00:53<00:07,  1.06s/it][A

loss: tensor(0.7778, device='cuda:0', grad_fn=<NllLossBackward>)



 89%|████████▉ | 51/57 [00:54<00:06,  1.04s/it][A

loss: tensor(0.5135, device='cuda:0', grad_fn=<NllLossBackward>)



 91%|█████████ | 52/57 [00:55<00:05,  1.06s/it][A

loss: tensor(0.6310, device='cuda:0', grad_fn=<NllLossBackward>)



 93%|█████████▎| 53/57 [00:56<00:04,  1.03s/it][A

loss: tensor(0.6924, device='cuda:0', grad_fn=<NllLossBackward>)



 95%|█████████▍| 54/57 [00:57<00:03,  1.06s/it][A

loss: tensor(0.7926, device='cuda:0', grad_fn=<NllLossBackward>)



 96%|█████████▋| 55/57 [00:58<00:02,  1.04s/it][A

loss: tensor(0.6209, device='cuda:0', grad_fn=<NllLossBackward>)



 98%|█████████▊| 56/57 [00:59<00:01,  1.06s/it][A

loss: tensor(0.6587, device='cuda:0', grad_fn=<NllLossBackward>)



100%|██████████| 57/57 [01:00<00:00,  1.06s/it][A

  0%|          | 0/29 [00:00<?, ?it/s][A
  3%|▎         | 1/29 [00:00<00:04,  6.43it/s][A

loss: tensor(0.6784, device='cuda:0', grad_fn=<NllLossBackward>)

	Training Loss: 0.6534816737760577

	Training acc: 0.8169335266159926

	Training prec: 0.3855712610660455

	Training rec: 0.26015795904522343

	Training f1: 0.2686832124187364

	Current Learning rate:  0.00042857142857142855



  7%|▋         | 2/29 [00:00<00:04,  6.62it/s][A
 10%|█         | 3/29 [00:00<00:03,  6.58it/s][A
 14%|█▍        | 4/29 [00:00<00:03,  6.68it/s][A
 17%|█▋        | 5/29 [00:00<00:03,  6.65it/s][A
 21%|██        | 6/29 [00:00<00:03,  6.69it/s][A
 24%|██▍       | 7/29 [00:01<00:03,  6.67it/s][A
 28%|██▊       | 8/29 [00:01<00:03,  6.72it/s][A
 31%|███       | 9/29 [00:01<00:02,  6.67it/s][A
 34%|███▍      | 10/29 [00:01<00:02,  6.72it/s][A
 38%|███▊      | 11/29 [00:01<00:02,  6.67it/s][A
 41%|████▏     | 12/29 [00:01<00:02,  6.69it/s][A
 45%|████▍     | 13/29 [00:01<00:02,  6.66it/s][A
 48%|████▊     | 14/29 [00:02<00:02,  6.69it/s][A
 52%|█████▏    | 15/29 [00:02<00:02,  6.65it/s][A
 55%|█████▌    | 16/29 [00:02<00:01,  6.65it/s][A
 59%|█████▊    | 17/29 [00:02<00:01,  6.60it/s][A
 62%|██████▏   | 18/29 [00:02<00:01,  6.59it/s][A
 66%|██████▌   | 19/29 [00:02<00:01,  6.53it/s][A
 69%|██████▉   | 20/29 [00:03<00:01,  6.57it/s][A
 72%|███████▏  | 21/29 [00:03<00:01,  


	Validation Loss: 0.6924581383836681

	Validation acc: 0.8027952231860692

	Validation prec: 0.34465171520736104

	Validation rec: 0.27873877082902204

	Validation f1: 0.28084967018440116



  2%|▏         | 1/57 [00:01<01:01,  1.10s/it][A

loss: tensor(0.5911, device='cuda:0', grad_fn=<NllLossBackward>)



  4%|▎         | 2/57 [00:02<00:57,  1.04s/it][A

loss: tensor(0.6136, device='cuda:0', grad_fn=<NllLossBackward>)



  5%|▌         | 3/57 [00:03<00:55,  1.03s/it][A

loss: tensor(0.5554, device='cuda:0', grad_fn=<NllLossBackward>)



  7%|▋         | 4/57 [00:04<00:56,  1.07s/it][A

loss: tensor(0.6190, device='cuda:0', grad_fn=<NllLossBackward>)



  9%|▉         | 5/57 [00:05<00:54,  1.05s/it][A

loss: tensor(0.6538, device='cuda:0', grad_fn=<NllLossBackward>)



 11%|█         | 6/57 [00:06<00:54,  1.06s/it][A

loss: tensor(0.6514, device='cuda:0', grad_fn=<NllLossBackward>)



 12%|█▏        | 7/57 [00:07<00:51,  1.04s/it][A

loss: tensor(0.6490, device='cuda:0', grad_fn=<NllLossBackward>)



 14%|█▍        | 8/57 [00:08<00:51,  1.06s/it][A

loss: tensor(0.7924, device='cuda:0', grad_fn=<NllLossBackward>)



 16%|█▌        | 9/57 [00:09<00:49,  1.03s/it][A

loss: tensor(0.7389, device='cuda:0', grad_fn=<NllLossBackward>)



 18%|█▊        | 10/57 [00:10<00:47,  1.01s/it][A

loss: tensor(0.6587, device='cuda:0', grad_fn=<NllLossBackward>)



 19%|█▉        | 11/57 [00:11<00:47,  1.04s/it][A

loss: tensor(0.7984, device='cuda:0', grad_fn=<NllLossBackward>)



 21%|██        | 12/57 [00:12<00:45,  1.02s/it][A

loss: tensor(0.6165, device='cuda:0', grad_fn=<NllLossBackward>)



 23%|██▎       | 13/57 [00:13<00:45,  1.04s/it][A

loss: tensor(0.7666, device='cuda:0', grad_fn=<NllLossBackward>)



 25%|██▍       | 14/57 [00:14<00:43,  1.02s/it][A

loss: tensor(0.6575, device='cuda:0', grad_fn=<NllLossBackward>)



 26%|██▋       | 15/57 [00:15<00:43,  1.04s/it][A

loss: tensor(0.6001, device='cuda:0', grad_fn=<NllLossBackward>)



 28%|██▊       | 16/57 [00:16<00:41,  1.02s/it][A

loss: tensor(0.6883, device='cuda:0', grad_fn=<NllLossBackward>)



 30%|██▉       | 17/57 [00:17<00:40,  1.02s/it][A

loss: tensor(0.6926, device='cuda:0', grad_fn=<NllLossBackward>)



 32%|███▏      | 18/57 [00:18<00:40,  1.04s/it][A

loss: tensor(0.6102, device='cuda:0', grad_fn=<NllLossBackward>)



 33%|███▎      | 19/57 [00:19<00:39,  1.03s/it][A

loss: tensor(0.6146, device='cuda:0', grad_fn=<NllLossBackward>)



 35%|███▌      | 20/57 [00:20<00:38,  1.05s/it][A

loss: tensor(0.5399, device='cuda:0', grad_fn=<NllLossBackward>)



 37%|███▋      | 21/57 [00:21<00:36,  1.03s/it][A

loss: tensor(0.6749, device='cuda:0', grad_fn=<NllLossBackward>)



 39%|███▊      | 22/57 [00:22<00:35,  1.01s/it][A

loss: tensor(0.7529, device='cuda:0', grad_fn=<NllLossBackward>)



 40%|████      | 23/57 [00:23<00:35,  1.03s/it][A

loss: tensor(0.8438, device='cuda:0', grad_fn=<NllLossBackward>)



 42%|████▏     | 24/57 [00:24<00:33,  1.02s/it][A

loss: tensor(0.6319, device='cuda:0', grad_fn=<NllLossBackward>)



 44%|████▍     | 25/57 [00:25<00:33,  1.04s/it][A

loss: tensor(0.5729, device='cuda:0', grad_fn=<NllLossBackward>)



 46%|████▌     | 26/57 [00:26<00:31,  1.02s/it][A

loss: tensor(0.5426, device='cuda:0', grad_fn=<NllLossBackward>)



 47%|████▋     | 27/57 [00:27<00:31,  1.04s/it][A

loss: tensor(0.6266, device='cuda:0', grad_fn=<NllLossBackward>)



 49%|████▉     | 28/57 [00:28<00:29,  1.03s/it][A

loss: tensor(0.6315, device='cuda:0', grad_fn=<NllLossBackward>)



 51%|█████     | 29/57 [00:29<00:27,  1.00it/s][A

loss: tensor(0.7429, device='cuda:0', grad_fn=<NllLossBackward>)



 53%|█████▎    | 30/57 [00:30<00:27,  1.02s/it][A

loss: tensor(0.7014, device='cuda:0', grad_fn=<NllLossBackward>)



 54%|█████▍    | 31/57 [00:31<00:25,  1.00it/s][A

loss: tensor(0.5540, device='cuda:0', grad_fn=<NllLossBackward>)



 56%|█████▌    | 32/57 [00:33<00:25,  1.03s/it][A

loss: tensor(0.6604, device='cuda:0', grad_fn=<NllLossBackward>)



 58%|█████▊    | 33/57 [00:33<00:24,  1.01s/it][A

loss: tensor(0.7485, device='cuda:0', grad_fn=<NllLossBackward>)



 60%|█████▉    | 34/57 [00:35<00:23,  1.03s/it][A

loss: tensor(0.5320, device='cuda:0', grad_fn=<NllLossBackward>)



 61%|██████▏   | 35/57 [00:36<00:22,  1.00s/it][A

loss: tensor(0.6039, device='cuda:0', grad_fn=<NllLossBackward>)



 63%|██████▎   | 36/57 [00:36<00:20,  1.01it/s][A

loss: tensor(0.7421, device='cuda:0', grad_fn=<NllLossBackward>)



 65%|██████▍   | 37/57 [00:38<00:20,  1.02s/it][A

loss: tensor(0.7529, device='cuda:0', grad_fn=<NllLossBackward>)



 67%|██████▋   | 38/57 [00:38<00:18,  1.00it/s][A

loss: tensor(0.6697, device='cuda:0', grad_fn=<NllLossBackward>)



 68%|██████▊   | 39/57 [00:40<00:18,  1.02s/it][A

loss: tensor(0.6557, device='cuda:0', grad_fn=<NllLossBackward>)



 70%|███████   | 40/57 [00:41<00:16,  1.00it/s][A

loss: tensor(0.5898, device='cuda:0', grad_fn=<NllLossBackward>)



 72%|███████▏  | 41/57 [00:42<00:16,  1.02s/it][A

loss: tensor(0.5366, device='cuda:0', grad_fn=<NllLossBackward>)



 74%|███████▎  | 42/57 [00:43<00:14,  1.00it/s][A

loss: tensor(0.7067, device='cuda:0', grad_fn=<NllLossBackward>)



 75%|███████▌  | 43/57 [00:43<00:13,  1.02it/s][A

loss: tensor(0.5032, device='cuda:0', grad_fn=<NllLossBackward>)



 77%|███████▋  | 44/57 [00:45<00:13,  1.01s/it][A

loss: tensor(0.4272, device='cuda:0', grad_fn=<NllLossBackward>)



 79%|███████▉  | 45/57 [00:46<00:12,  1.00s/it][A

loss: tensor(0.6996, device='cuda:0', grad_fn=<NllLossBackward>)



 81%|████████  | 46/57 [00:47<00:11,  1.03s/it][A

loss: tensor(0.6364, device='cuda:0', grad_fn=<NllLossBackward>)



 82%|████████▏ | 47/57 [00:48<00:10,  1.01s/it][A

loss: tensor(0.6058, device='cuda:0', grad_fn=<NllLossBackward>)



 84%|████████▍ | 48/57 [00:49<00:09,  1.01s/it][A

loss: tensor(0.6604, device='cuda:0', grad_fn=<NllLossBackward>)



 86%|████████▌ | 49/57 [00:50<00:08,  1.03s/it][A

loss: tensor(0.5394, device='cuda:0', grad_fn=<NllLossBackward>)



 88%|████████▊ | 50/57 [00:51<00:07,  1.02s/it][A

loss: tensor(0.7815, device='cuda:0', grad_fn=<NllLossBackward>)



 89%|████████▉ | 51/57 [00:52<00:06,  1.04s/it][A

loss: tensor(0.7007, device='cuda:0', grad_fn=<NllLossBackward>)



 91%|█████████ | 52/57 [00:53<00:05,  1.02s/it][A

loss: tensor(0.6030, device='cuda:0', grad_fn=<NllLossBackward>)



 93%|█████████▎| 53/57 [00:54<00:04,  1.05s/it][A

loss: tensor(0.6558, device='cuda:0', grad_fn=<NllLossBackward>)



 95%|█████████▍| 54/57 [00:55<00:03,  1.03s/it][A

loss: tensor(0.6558, device='cuda:0', grad_fn=<NllLossBackward>)



 96%|█████████▋| 55/57 [00:56<00:02,  1.02s/it][A

loss: tensor(0.6514, device='cuda:0', grad_fn=<NllLossBackward>)



 98%|█████████▊| 56/57 [00:57<00:01,  1.04s/it][A

loss: tensor(0.6595, device='cuda:0', grad_fn=<NllLossBackward>)



100%|██████████| 57/57 [00:58<00:00,  1.02s/it][A

  0%|          | 0/29 [00:00<?, ?it/s][A
  3%|▎         | 1/29 [00:00<00:04,  6.83it/s][A

loss: tensor(0.6183, device='cuda:0', grad_fn=<NllLossBackward>)

	Training Loss: 0.6487670530352676

	Training acc: 0.8169077363603404

	Training prec: 0.3844575602225385

	Training rec: 0.2582375834281664

	Training f1: 0.26513996341524665

	Current Learning rate:  0.0004



  7%|▋         | 2/29 [00:00<00:03,  6.79it/s][A
 10%|█         | 3/29 [00:00<00:03,  6.75it/s][A
 14%|█▍        | 4/29 [00:00<00:03,  6.72it/s][A
 17%|█▋        | 5/29 [00:00<00:03,  6.61it/s][A
 21%|██        | 6/29 [00:00<00:03,  6.61it/s][A
 24%|██▍       | 7/29 [00:01<00:03,  6.70it/s][A
 28%|██▊       | 8/29 [00:01<00:03,  6.75it/s][A
 31%|███       | 9/29 [00:01<00:02,  6.74it/s][A
 34%|███▍      | 10/29 [00:01<00:02,  6.46it/s][A
 38%|███▊      | 11/29 [00:01<00:02,  6.58it/s][A
 41%|████▏     | 12/29 [00:01<00:02,  6.69it/s][A
 45%|████▍     | 13/29 [00:01<00:02,  6.79it/s][A
 48%|████▊     | 14/29 [00:02<00:02,  6.88it/s][A
 52%|█████▏    | 15/29 [00:02<00:02,  6.86it/s][A
 55%|█████▌    | 16/29 [00:02<00:01,  6.79it/s][A
 59%|█████▊    | 17/29 [00:02<00:01,  6.75it/s][A
 62%|██████▏   | 18/29 [00:02<00:01,  6.70it/s][A
 66%|██████▌   | 19/29 [00:02<00:01,  6.69it/s][A
 69%|██████▉   | 20/29 [00:02<00:01,  6.74it/s][A
 72%|███████▏  | 21/29 [00:03<00:01,  


	Validation Loss: 0.6867505096156021

	Validation acc: 0.80254735021287

	Validation prec: 0.3375921045889551

	Validation rec: 0.2655214301642105

	Validation f1: 0.2681902281288609



  2%|▏         | 1/57 [00:01<01:02,  1.11s/it][A

loss: tensor(0.7639, device='cuda:0', grad_fn=<NllLossBackward>)



  4%|▎         | 2/57 [00:02<00:56,  1.03s/it][A

loss: tensor(0.7265, device='cuda:0', grad_fn=<NllLossBackward>)



  5%|▌         | 3/57 [00:03<00:57,  1.07s/it][A

loss: tensor(0.6640, device='cuda:0', grad_fn=<NllLossBackward>)



  7%|▋         | 4/57 [00:04<00:54,  1.03s/it][A

loss: tensor(0.7822, device='cuda:0', grad_fn=<NllLossBackward>)



  9%|▉         | 5/57 [00:05<00:54,  1.05s/it][A

loss: tensor(0.6799, device='cuda:0', grad_fn=<NllLossBackward>)



 11%|█         | 6/57 [00:06<00:52,  1.03s/it][A

loss: tensor(0.5115, device='cuda:0', grad_fn=<NllLossBackward>)



 12%|█▏        | 7/57 [00:07<00:50,  1.01s/it][A

loss: tensor(0.5859, device='cuda:0', grad_fn=<NllLossBackward>)



 14%|█▍        | 8/57 [00:08<00:50,  1.03s/it][A

loss: tensor(0.6129, device='cuda:0', grad_fn=<NllLossBackward>)



 16%|█▌        | 9/57 [00:09<00:48,  1.01s/it][A

loss: tensor(0.6880, device='cuda:0', grad_fn=<NllLossBackward>)



 18%|█▊        | 10/57 [00:10<00:48,  1.03s/it][A

loss: tensor(0.5599, device='cuda:0', grad_fn=<NllLossBackward>)



 19%|█▉        | 11/57 [00:11<00:46,  1.01s/it][A

loss: tensor(0.6401, device='cuda:0', grad_fn=<NllLossBackward>)



 21%|██        | 12/57 [00:12<00:45,  1.02s/it][A

loss: tensor(0.6867, device='cuda:0', grad_fn=<NllLossBackward>)



 23%|██▎       | 13/57 [00:13<00:44,  1.01s/it][A

loss: tensor(0.6624, device='cuda:0', grad_fn=<NllLossBackward>)



 25%|██▍       | 14/57 [00:14<00:42,  1.00it/s][A

loss: tensor(0.5466, device='cuda:0', grad_fn=<NllLossBackward>)



 26%|██▋       | 15/57 [00:15<00:42,  1.02s/it][A

loss: tensor(0.5935, device='cuda:0', grad_fn=<NllLossBackward>)



 28%|██▊       | 16/57 [00:16<00:41,  1.01s/it][A

loss: tensor(0.5598, device='cuda:0', grad_fn=<NllLossBackward>)



 30%|██▉       | 17/57 [00:17<00:40,  1.02s/it][A

loss: tensor(0.6327, device='cuda:0', grad_fn=<NllLossBackward>)



 32%|███▏      | 18/57 [00:18<00:38,  1.00it/s][A

loss: tensor(0.6736, device='cuda:0', grad_fn=<NllLossBackward>)



 33%|███▎      | 19/57 [00:19<00:38,  1.01s/it][A

loss: tensor(0.6539, device='cuda:0', grad_fn=<NllLossBackward>)



 35%|███▌      | 20/57 [00:20<00:36,  1.01it/s][A

loss: tensor(0.5781, device='cuda:0', grad_fn=<NllLossBackward>)



 37%|███▋      | 21/57 [00:21<00:35,  1.03it/s][A

loss: tensor(0.6354, device='cuda:0', grad_fn=<NllLossBackward>)



 39%|███▊      | 22/57 [00:22<00:35,  1.00s/it][A

loss: tensor(0.6100, device='cuda:0', grad_fn=<NllLossBackward>)



 40%|████      | 23/57 [00:23<00:33,  1.01it/s][A

loss: tensor(0.7765, device='cuda:0', grad_fn=<NllLossBackward>)



 42%|████▏     | 24/57 [00:24<00:33,  1.01s/it][A

loss: tensor(0.5777, device='cuda:0', grad_fn=<NllLossBackward>)



 44%|████▍     | 25/57 [00:25<00:31,  1.01it/s][A

loss: tensor(0.7892, device='cuda:0', grad_fn=<NllLossBackward>)



 46%|████▌     | 26/57 [00:26<00:30,  1.02it/s][A

loss: tensor(0.5827, device='cuda:0', grad_fn=<NllLossBackward>)



 47%|████▋     | 27/57 [00:27<00:30,  1.00s/it][A

loss: tensor(0.5794, device='cuda:0', grad_fn=<NllLossBackward>)



 49%|████▉     | 28/57 [00:28<00:28,  1.01it/s][A

loss: tensor(0.6040, device='cuda:0', grad_fn=<NllLossBackward>)



 51%|█████     | 29/57 [00:29<00:28,  1.01s/it][A

loss: tensor(0.6194, device='cuda:0', grad_fn=<NllLossBackward>)



 53%|█████▎    | 30/57 [00:30<00:26,  1.01it/s][A

loss: tensor(0.5783, device='cuda:0', grad_fn=<NllLossBackward>)



 54%|█████▍    | 31/57 [00:31<00:26,  1.03s/it][A

loss: tensor(0.7207, device='cuda:0', grad_fn=<NllLossBackward>)



 56%|█████▌    | 32/57 [00:32<00:25,  1.01s/it][A

loss: tensor(0.6605, device='cuda:0', grad_fn=<NllLossBackward>)



 58%|█████▊    | 33/57 [00:33<00:23,  1.00it/s][A

loss: tensor(0.5608, device='cuda:0', grad_fn=<NllLossBackward>)



 60%|█████▉    | 34/57 [00:34<00:23,  1.03s/it][A

loss: tensor(0.7952, device='cuda:0', grad_fn=<NllLossBackward>)



 61%|██████▏   | 35/57 [00:35<00:22,  1.03s/it][A

loss: tensor(0.5375, device='cuda:0', grad_fn=<NllLossBackward>)



 63%|██████▎   | 36/57 [00:36<00:22,  1.06s/it][A

loss: tensor(0.6837, device='cuda:0', grad_fn=<NllLossBackward>)



 65%|██████▍   | 37/57 [00:37<00:20,  1.03s/it][A

loss: tensor(0.6652, device='cuda:0', grad_fn=<NllLossBackward>)



 67%|██████▋   | 38/57 [00:38<00:19,  1.05s/it][A

loss: tensor(0.8249, device='cuda:0', grad_fn=<NllLossBackward>)



 68%|██████▊   | 39/57 [00:39<00:18,  1.04s/it][A

loss: tensor(0.6107, device='cuda:0', grad_fn=<NllLossBackward>)



 70%|███████   | 40/57 [00:40<00:17,  1.03s/it][A

loss: tensor(0.6529, device='cuda:0', grad_fn=<NllLossBackward>)



 72%|███████▏  | 41/57 [00:41<00:16,  1.06s/it][A

loss: tensor(0.5679, device='cuda:0', grad_fn=<NllLossBackward>)



 74%|███████▎  | 42/57 [00:42<00:15,  1.05s/it][A

loss: tensor(0.6111, device='cuda:0', grad_fn=<NllLossBackward>)



 75%|███████▌  | 43/57 [00:43<00:14,  1.07s/it][A

loss: tensor(0.5575, device='cuda:0', grad_fn=<NllLossBackward>)



 77%|███████▋  | 44/57 [00:44<00:13,  1.05s/it][A

loss: tensor(0.5350, device='cuda:0', grad_fn=<NllLossBackward>)



 79%|███████▉  | 45/57 [00:46<00:12,  1.06s/it][A

loss: tensor(0.5973, device='cuda:0', grad_fn=<NllLossBackward>)



 81%|████████  | 46/57 [00:47<00:11,  1.04s/it][A

loss: tensor(0.6920, device='cuda:0', grad_fn=<NllLossBackward>)



 82%|████████▏ | 47/57 [00:48<00:10,  1.03s/it][A

loss: tensor(0.6239, device='cuda:0', grad_fn=<NllLossBackward>)



 84%|████████▍ | 48/57 [00:49<00:09,  1.06s/it][A

loss: tensor(0.6918, device='cuda:0', grad_fn=<NllLossBackward>)



 86%|████████▌ | 49/57 [00:50<00:08,  1.06s/it][A

loss: tensor(0.5826, device='cuda:0', grad_fn=<NllLossBackward>)



 88%|████████▊ | 50/57 [00:51<00:07,  1.08s/it][A

loss: tensor(0.5994, device='cuda:0', grad_fn=<NllLossBackward>)



 89%|████████▉ | 51/57 [00:52<00:06,  1.06s/it][A

loss: tensor(0.7119, device='cuda:0', grad_fn=<NllLossBackward>)



 91%|█████████ | 52/57 [00:53<00:05,  1.05s/it][A

loss: tensor(0.5376, device='cuda:0', grad_fn=<NllLossBackward>)



 93%|█████████▎| 53/57 [00:54<00:04,  1.07s/it][A

loss: tensor(0.7824, device='cuda:0', grad_fn=<NllLossBackward>)



 95%|█████████▍| 54/57 [00:55<00:03,  1.05s/it][A

loss: tensor(0.6125, device='cuda:0', grad_fn=<NllLossBackward>)



 96%|█████████▋| 55/57 [00:56<00:02,  1.07s/it][A

loss: tensor(0.6401, device='cuda:0', grad_fn=<NllLossBackward>)



 98%|█████████▊| 56/57 [00:57<00:01,  1.05s/it][A

loss: tensor(0.7345, device='cuda:0', grad_fn=<NllLossBackward>)



100%|██████████| 57/57 [00:58<00:00,  1.02s/it][A

  0%|          | 0/29 [00:00<?, ?it/s][A

loss: tensor(0.9593, device='cuda:0', grad_fn=<NllLossBackward>)

	Training Loss: 0.6474236542718452

	Training acc: 0.8157826331915703

	Training prec: 0.407787616517883

	Training rec: 0.2596345635666285

	Training f1: 0.26943179024346664

	Current Learning rate:  0.00037142857142857143



  3%|▎         | 1/29 [00:00<00:07,  3.83it/s][A
  7%|▋         | 2/29 [00:00<00:05,  5.21it/s][A
 10%|█         | 3/29 [00:00<00:04,  5.78it/s][A
 14%|█▍        | 4/29 [00:00<00:04,  6.19it/s][A
 17%|█▋        | 5/29 [00:00<00:03,  6.43it/s][A
 21%|██        | 6/29 [00:01<00:03,  6.42it/s][A
 24%|██▍       | 7/29 [00:01<00:03,  6.44it/s][A
 28%|██▊       | 8/29 [00:01<00:03,  6.47it/s][A
 31%|███       | 9/29 [00:01<00:03,  6.59it/s][A
 34%|███▍      | 10/29 [00:01<00:02,  6.68it/s][A
 38%|███▊      | 11/29 [00:01<00:02,  6.70it/s][A
 41%|████▏     | 12/29 [00:01<00:02,  6.73it/s][A
 45%|████▍     | 13/29 [00:02<00:02,  6.73it/s][A
 48%|████▊     | 14/29 [00:02<00:02,  6.69it/s][A
 52%|█████▏    | 15/29 [00:02<00:02,  6.64it/s][A
 55%|█████▌    | 16/29 [00:02<00:01,  6.59it/s][A
 59%|█████▊    | 17/29 [00:02<00:01,  6.57it/s][A
 62%|██████▏   | 18/29 [00:02<00:01,  6.57it/s][A
 66%|██████▌   | 19/29 [00:02<00:01,  6.55it/s][A
 69%|██████▉   | 20/29 [00:03<00:01,  6


	Validation Loss: 0.6858725003127394

	Validation acc: 0.8039302115933556

	Validation prec: 0.33734866463270563

	Validation rec: 0.2770179093880899

	Validation f1: 0.277906734123971



  2%|▏         | 1/57 [00:01<00:58,  1.04s/it][A

loss: tensor(0.8242, device='cuda:0', grad_fn=<NllLossBackward>)



  4%|▎         | 2/57 [00:02<01:02,  1.14s/it][A

loss: tensor(0.5828, device='cuda:0', grad_fn=<NllLossBackward>)



  5%|▌         | 3/57 [00:03<00:59,  1.09s/it][A

loss: tensor(0.7602, device='cuda:0', grad_fn=<NllLossBackward>)



  7%|▋         | 4/57 [00:04<00:57,  1.09s/it][A

loss: tensor(0.5769, device='cuda:0', grad_fn=<NllLossBackward>)



  9%|▉         | 5/57 [00:05<00:57,  1.11s/it][A

loss: tensor(0.5848, device='cuda:0', grad_fn=<NllLossBackward>)



 11%|█         | 6/57 [00:06<00:55,  1.09s/it][A

loss: tensor(0.7401, device='cuda:0', grad_fn=<NllLossBackward>)



 12%|█▏        | 7/57 [00:07<00:54,  1.09s/it][A

loss: tensor(0.6413, device='cuda:0', grad_fn=<NllLossBackward>)



 14%|█▍        | 8/57 [00:08<00:52,  1.06s/it][A

loss: tensor(0.8649, device='cuda:0', grad_fn=<NllLossBackward>)



 16%|█▌        | 9/57 [00:09<00:51,  1.07s/it][A

loss: tensor(0.5305, device='cuda:0', grad_fn=<NllLossBackward>)



 18%|█▊        | 10/57 [00:10<00:48,  1.04s/it][A

loss: tensor(0.5279, device='cuda:0', grad_fn=<NllLossBackward>)



 19%|█▉        | 11/57 [00:11<00:46,  1.01s/it][A

loss: tensor(0.6352, device='cuda:0', grad_fn=<NllLossBackward>)



 21%|██        | 12/57 [00:13<00:50,  1.12s/it][A

loss: tensor(0.6130, device='cuda:0', grad_fn=<NllLossBackward>)



 23%|██▎       | 13/57 [00:14<00:48,  1.10s/it][A

loss: tensor(0.6154, device='cuda:0', grad_fn=<NllLossBackward>)



 25%|██▍       | 14/57 [00:15<00:47,  1.11s/it][A

loss: tensor(0.7010, device='cuda:0', grad_fn=<NllLossBackward>)



 26%|██▋       | 15/57 [00:16<00:45,  1.08s/it][A

loss: tensor(0.6336, device='cuda:0', grad_fn=<NllLossBackward>)



 28%|██▊       | 16/57 [00:17<00:44,  1.08s/it][A

loss: tensor(0.6099, device='cuda:0', grad_fn=<NllLossBackward>)



 30%|██▉       | 17/57 [00:18<00:42,  1.05s/it][A

loss: tensor(0.5803, device='cuda:0', grad_fn=<NllLossBackward>)



 32%|███▏      | 18/57 [00:19<00:40,  1.04s/it][A

loss: tensor(0.7706, device='cuda:0', grad_fn=<NllLossBackward>)



 33%|███▎      | 19/57 [00:20<00:39,  1.05s/it][A

loss: tensor(0.6770, device='cuda:0', grad_fn=<NllLossBackward>)



 35%|███▌      | 20/57 [00:21<00:38,  1.03s/it][A

loss: tensor(0.7711, device='cuda:0', grad_fn=<NllLossBackward>)



 37%|███▋      | 21/57 [00:22<00:38,  1.06s/it][A

loss: tensor(0.6736, device='cuda:0', grad_fn=<NllLossBackward>)



 39%|███▊      | 22/57 [00:23<00:36,  1.03s/it][A

loss: tensor(0.5043, device='cuda:0', grad_fn=<NllLossBackward>)



 40%|████      | 23/57 [00:24<00:34,  1.03s/it][A

loss: tensor(0.6004, device='cuda:0', grad_fn=<NllLossBackward>)



 42%|████▏     | 24/57 [00:25<00:34,  1.05s/it][A

loss: tensor(0.6457, device='cuda:0', grad_fn=<NllLossBackward>)



 44%|████▍     | 25/57 [00:26<00:33,  1.04s/it][A

loss: tensor(0.6035, device='cuda:0', grad_fn=<NllLossBackward>)



 46%|████▌     | 26/57 [00:27<00:33,  1.08s/it][A

loss: tensor(0.6559, device='cuda:0', grad_fn=<NllLossBackward>)



 47%|████▋     | 27/57 [00:28<00:31,  1.06s/it][A

loss: tensor(0.5841, device='cuda:0', grad_fn=<NllLossBackward>)



 49%|████▉     | 28/57 [00:29<00:31,  1.07s/it][A

loss: tensor(0.6831, device='cuda:0', grad_fn=<NllLossBackward>)



 51%|█████     | 29/57 [00:30<00:29,  1.05s/it][A

loss: tensor(0.5189, device='cuda:0', grad_fn=<NllLossBackward>)



 53%|█████▎    | 30/57 [00:31<00:27,  1.03s/it][A

loss: tensor(0.7505, device='cuda:0', grad_fn=<NllLossBackward>)



 54%|█████▍    | 31/57 [00:33<00:27,  1.07s/it][A

loss: tensor(0.6650, device='cuda:0', grad_fn=<NllLossBackward>)



 56%|█████▌    | 32/57 [00:34<00:26,  1.06s/it][A

loss: tensor(0.6922, device='cuda:0', grad_fn=<NllLossBackward>)



 58%|█████▊    | 33/57 [00:35<00:25,  1.07s/it][A

loss: tensor(0.5446, device='cuda:0', grad_fn=<NllLossBackward>)



 60%|█████▉    | 34/57 [00:36<00:24,  1.06s/it][A

loss: tensor(0.8373, device='cuda:0', grad_fn=<NllLossBackward>)



 61%|██████▏   | 35/57 [00:37<00:23,  1.09s/it][A

loss: tensor(0.6221, device='cuda:0', grad_fn=<NllLossBackward>)



 63%|██████▎   | 36/57 [00:38<00:22,  1.07s/it][A

loss: tensor(0.6257, device='cuda:0', grad_fn=<NllLossBackward>)



 65%|██████▍   | 37/57 [00:39<00:21,  1.08s/it][A

loss: tensor(0.6678, device='cuda:0', grad_fn=<NllLossBackward>)



 67%|██████▋   | 38/57 [00:40<00:20,  1.08s/it][A

loss: tensor(0.5370, device='cuda:0', grad_fn=<NllLossBackward>)



 68%|██████▊   | 39/57 [00:41<00:18,  1.04s/it][A

loss: tensor(0.6566, device='cuda:0', grad_fn=<NllLossBackward>)



 70%|███████   | 40/57 [00:42<00:17,  1.04s/it][A

loss: tensor(0.6371, device='cuda:0', grad_fn=<NllLossBackward>)



 72%|███████▏  | 41/57 [00:43<00:16,  1.02s/it][A

loss: tensor(0.6221, device='cuda:0', grad_fn=<NllLossBackward>)



 74%|███████▎  | 42/57 [00:44<00:15,  1.06s/it][A

loss: tensor(0.7116, device='cuda:0', grad_fn=<NllLossBackward>)



 75%|███████▌  | 43/57 [00:45<00:14,  1.04s/it][A

loss: tensor(0.7245, device='cuda:0', grad_fn=<NllLossBackward>)



 77%|███████▋  | 44/57 [00:46<00:13,  1.03s/it][A

loss: tensor(0.6068, device='cuda:0', grad_fn=<NllLossBackward>)



 79%|███████▉  | 45/57 [00:47<00:12,  1.05s/it][A

loss: tensor(0.7364, device='cuda:0', grad_fn=<NllLossBackward>)



 81%|████████  | 46/57 [00:48<00:11,  1.03s/it][A

loss: tensor(0.5721, device='cuda:0', grad_fn=<NllLossBackward>)



 82%|████████▏ | 47/57 [00:49<00:10,  1.08s/it][A

loss: tensor(0.6849, device='cuda:0', grad_fn=<NllLossBackward>)



 84%|████████▍ | 48/57 [00:50<00:09,  1.05s/it][A

loss: tensor(0.8179, device='cuda:0', grad_fn=<NllLossBackward>)



 86%|████████▌ | 49/57 [00:51<00:08,  1.03s/it][A

loss: tensor(0.5589, device='cuda:0', grad_fn=<NllLossBackward>)



 88%|████████▊ | 50/57 [00:53<00:07,  1.06s/it][A

loss: tensor(0.5449, device='cuda:0', grad_fn=<NllLossBackward>)



 89%|████████▉ | 51/57 [00:54<00:06,  1.04s/it][A

loss: tensor(0.6978, device='cuda:0', grad_fn=<NllLossBackward>)



 91%|█████████ | 52/57 [00:55<00:05,  1.06s/it][A

loss: tensor(0.5281, device='cuda:0', grad_fn=<NllLossBackward>)



 93%|█████████▎| 53/57 [00:56<00:04,  1.04s/it][A

loss: tensor(0.4627, device='cuda:0', grad_fn=<NllLossBackward>)



 95%|█████████▍| 54/57 [00:57<00:03,  1.05s/it][A

loss: tensor(0.7320, device='cuda:0', grad_fn=<NllLossBackward>)



 96%|█████████▋| 55/57 [00:58<00:02,  1.04s/it][A

loss: tensor(0.6374, device='cuda:0', grad_fn=<NllLossBackward>)



 98%|█████████▊| 56/57 [00:59<00:01,  1.03s/it][A

loss: tensor(0.7546, device='cuda:0', grad_fn=<NllLossBackward>)



100%|██████████| 57/57 [01:00<00:00,  1.06s/it][A

  0%|          | 0/29 [00:00<?, ?it/s][A
  3%|▎         | 1/29 [00:00<00:04,  6.36it/s][A

loss: tensor(0.9807, device='cuda:0', grad_fn=<NllLossBackward>)

	Training Loss: 0.6547263729990574

	Training acc: 0.8142496604440822

	Training prec: 0.39137528906890207

	Training rec: 0.25595493703456534

	Training f1: 0.2631174815308196

	Current Learning rate:  0.00034285714285714285



  7%|▋         | 2/29 [00:00<00:04,  6.27it/s][A
 10%|█         | 3/29 [00:00<00:04,  6.20it/s][A
 14%|█▍        | 4/29 [00:00<00:03,  6.25it/s][A
 17%|█▋        | 5/29 [00:00<00:03,  6.20it/s][A
 21%|██        | 6/29 [00:00<00:03,  6.20it/s][A
 24%|██▍       | 7/29 [00:01<00:03,  6.14it/s][A
 28%|██▊       | 8/29 [00:01<00:03,  6.26it/s][A
 31%|███       | 9/29 [00:01<00:03,  6.10it/s][A
 34%|███▍      | 10/29 [00:01<00:03,  6.16it/s][A
 38%|███▊      | 11/29 [00:01<00:02,  6.16it/s][A
 41%|████▏     | 12/29 [00:01<00:02,  6.20it/s][A
 45%|████▍     | 13/29 [00:02<00:02,  6.17it/s][A
 48%|████▊     | 14/29 [00:02<00:02,  6.35it/s][A
 52%|█████▏    | 15/29 [00:02<00:02,  6.43it/s][A
 55%|█████▌    | 16/29 [00:02<00:02,  6.47it/s][A
 59%|█████▊    | 17/29 [00:02<00:01,  6.57it/s][A
 62%|██████▏   | 18/29 [00:02<00:01,  6.68it/s][A
 66%|██████▌   | 19/29 [00:02<00:01,  6.67it/s][A
 69%|██████▉   | 20/29 [00:03<00:01,  6.75it/s][A
 72%|███████▏  | 21/29 [00:03<00:01,  


	Validation Loss: 0.6912859267201917

	Validation acc: 0.8033453165002816

	Validation prec: 0.33268351629526954

	Validation rec: 0.2832816356729503

	Validation f1: 0.2842962293141119



  2%|▏         | 1/57 [00:00<00:54,  1.03it/s][A

loss: tensor(0.4978, device='cuda:0', grad_fn=<NllLossBackward>)



  4%|▎         | 2/57 [00:02<00:57,  1.04s/it][A

loss: tensor(0.5858, device='cuda:0', grad_fn=<NllLossBackward>)



  5%|▌         | 3/57 [00:03<00:54,  1.00s/it][A

loss: tensor(0.6679, device='cuda:0', grad_fn=<NllLossBackward>)



  7%|▋         | 4/57 [00:04<00:55,  1.05s/it][A

loss: tensor(0.7985, device='cuda:0', grad_fn=<NllLossBackward>)



  9%|▉         | 5/57 [00:05<00:53,  1.03s/it][A

loss: tensor(0.5083, device='cuda:0', grad_fn=<NllLossBackward>)



 11%|█         | 6/57 [00:06<00:53,  1.06s/it][A

loss: tensor(0.5444, device='cuda:0', grad_fn=<NllLossBackward>)



 12%|█▏        | 7/57 [00:07<00:51,  1.02s/it][A

loss: tensor(0.6396, device='cuda:0', grad_fn=<NllLossBackward>)



 14%|█▍        | 8/57 [00:08<00:49,  1.02s/it][A

loss: tensor(0.6102, device='cuda:0', grad_fn=<NllLossBackward>)



 16%|█▌        | 9/57 [00:09<00:49,  1.04s/it][A

loss: tensor(0.4876, device='cuda:0', grad_fn=<NllLossBackward>)



 18%|█▊        | 10/57 [00:10<00:47,  1.02s/it][A

loss: tensor(0.6339, device='cuda:0', grad_fn=<NllLossBackward>)



 19%|█▉        | 11/57 [00:11<00:47,  1.04s/it][A

loss: tensor(0.6422, device='cuda:0', grad_fn=<NllLossBackward>)



 21%|██        | 12/57 [00:12<00:46,  1.02s/it][A

loss: tensor(0.6392, device='cuda:0', grad_fn=<NllLossBackward>)



 23%|██▎       | 13/57 [00:13<00:45,  1.04s/it][A

loss: tensor(0.5981, device='cuda:0', grad_fn=<NllLossBackward>)



 25%|██▍       | 14/57 [00:14<00:43,  1.02s/it][A

loss: tensor(0.5149, device='cuda:0', grad_fn=<NllLossBackward>)



 26%|██▋       | 15/57 [00:15<00:42,  1.01s/it][A

loss: tensor(0.6930, device='cuda:0', grad_fn=<NllLossBackward>)



 28%|██▊       | 16/57 [00:16<00:42,  1.04s/it][A

loss: tensor(0.7559, device='cuda:0', grad_fn=<NllLossBackward>)



 30%|██▉       | 17/57 [00:17<00:40,  1.01s/it][A

loss: tensor(0.5655, device='cuda:0', grad_fn=<NllLossBackward>)



 32%|███▏      | 18/57 [00:18<00:40,  1.05s/it][A

loss: tensor(0.7101, device='cuda:0', grad_fn=<NllLossBackward>)



 33%|███▎      | 19/57 [00:19<00:39,  1.03s/it][A

loss: tensor(0.7607, device='cuda:0', grad_fn=<NllLossBackward>)



 35%|███▌      | 20/57 [00:20<00:37,  1.02s/it][A

loss: tensor(0.7767, device='cuda:0', grad_fn=<NllLossBackward>)



 37%|███▋      | 21/57 [00:21<00:37,  1.04s/it][A

loss: tensor(0.6960, device='cuda:0', grad_fn=<NllLossBackward>)



 39%|███▊      | 22/57 [00:22<00:36,  1.03s/it][A

loss: tensor(0.6151, device='cuda:0', grad_fn=<NllLossBackward>)



 40%|████      | 23/57 [00:23<00:35,  1.05s/it][A

loss: tensor(0.5975, device='cuda:0', grad_fn=<NllLossBackward>)



 42%|████▏     | 24/57 [00:24<00:33,  1.03s/it][A

loss: tensor(0.6366, device='cuda:0', grad_fn=<NllLossBackward>)



 44%|████▍     | 25/57 [00:25<00:33,  1.04s/it][A

loss: tensor(0.6056, device='cuda:0', grad_fn=<NllLossBackward>)



 46%|████▌     | 26/57 [00:26<00:31,  1.02s/it][A

loss: tensor(0.6680, device='cuda:0', grad_fn=<NllLossBackward>)



 47%|████▋     | 27/57 [00:27<00:30,  1.00s/it][A

loss: tensor(0.6958, device='cuda:0', grad_fn=<NllLossBackward>)



 49%|████▉     | 28/57 [00:28<00:29,  1.03s/it][A

loss: tensor(0.6924, device='cuda:0', grad_fn=<NllLossBackward>)



 51%|█████     | 29/57 [00:29<00:28,  1.02s/it][A

loss: tensor(0.5529, device='cuda:0', grad_fn=<NllLossBackward>)



 53%|█████▎    | 30/57 [00:30<00:28,  1.05s/it][A

loss: tensor(0.5727, device='cuda:0', grad_fn=<NllLossBackward>)



 54%|█████▍    | 31/57 [00:31<00:26,  1.03s/it][A

loss: tensor(0.7397, device='cuda:0', grad_fn=<NllLossBackward>)



 56%|█████▌    | 32/57 [00:33<00:26,  1.06s/it][A

loss: tensor(0.7476, device='cuda:0', grad_fn=<NllLossBackward>)



 58%|█████▊    | 33/57 [00:34<00:25,  1.04s/it][A

loss: tensor(0.7802, device='cuda:0', grad_fn=<NllLossBackward>)



 60%|█████▉    | 34/57 [00:35<00:23,  1.03s/it][A

loss: tensor(0.6405, device='cuda:0', grad_fn=<NllLossBackward>)



 61%|██████▏   | 35/57 [00:36<00:23,  1.05s/it][A

loss: tensor(0.5243, device='cuda:0', grad_fn=<NllLossBackward>)



 63%|██████▎   | 36/57 [00:37<00:21,  1.02s/it][A

loss: tensor(0.8662, device='cuda:0', grad_fn=<NllLossBackward>)



 65%|██████▍   | 37/57 [00:38<00:20,  1.05s/it][A

loss: tensor(0.5390, device='cuda:0', grad_fn=<NllLossBackward>)



 67%|██████▋   | 38/57 [00:39<00:19,  1.02s/it][A

loss: tensor(0.6252, device='cuda:0', grad_fn=<NllLossBackward>)



 68%|██████▊   | 39/57 [00:40<00:18,  1.03s/it][A

loss: tensor(0.5257, device='cuda:0', grad_fn=<NllLossBackward>)



 70%|███████   | 40/57 [00:41<00:17,  1.02s/it][A

loss: tensor(0.6749, device='cuda:0', grad_fn=<NllLossBackward>)



 72%|███████▏  | 41/57 [00:42<00:16,  1.01s/it][A

loss: tensor(0.7783, device='cuda:0', grad_fn=<NllLossBackward>)



 74%|███████▎  | 42/57 [00:43<00:15,  1.04s/it][A

loss: tensor(0.6553, device='cuda:0', grad_fn=<NllLossBackward>)



 75%|███████▌  | 43/57 [00:44<00:14,  1.03s/it][A

loss: tensor(0.8118, device='cuda:0', grad_fn=<NllLossBackward>)



 77%|███████▋  | 44/57 [00:45<00:13,  1.05s/it][A

loss: tensor(0.5083, device='cuda:0', grad_fn=<NllLossBackward>)



 79%|███████▉  | 45/57 [00:46<00:12,  1.02s/it][A

loss: tensor(0.6248, device='cuda:0', grad_fn=<NllLossBackward>)



 81%|████████  | 46/57 [00:47<00:11,  1.00s/it][A

loss: tensor(0.6169, device='cuda:0', grad_fn=<NllLossBackward>)



 82%|████████▏ | 47/57 [00:48<00:10,  1.02s/it][A

loss: tensor(0.5105, device='cuda:0', grad_fn=<NllLossBackward>)



 84%|████████▍ | 48/57 [00:49<00:08,  1.00it/s][A

loss: tensor(0.5672, device='cuda:0', grad_fn=<NllLossBackward>)



 86%|████████▌ | 49/57 [00:50<00:08,  1.02s/it][A

loss: tensor(0.7822, device='cuda:0', grad_fn=<NllLossBackward>)



 88%|████████▊ | 50/57 [00:51<00:06,  1.00it/s][A

loss: tensor(0.7268, device='cuda:0', grad_fn=<NllLossBackward>)



 89%|████████▉ | 51/57 [00:52<00:06,  1.02s/it][A

loss: tensor(0.6203, device='cuda:0', grad_fn=<NllLossBackward>)



 91%|█████████ | 52/57 [00:53<00:04,  1.00it/s][A

loss: tensor(0.6218, device='cuda:0', grad_fn=<NllLossBackward>)



 93%|█████████▎| 53/57 [00:54<00:03,  1.02it/s][A

loss: tensor(0.8897, device='cuda:0', grad_fn=<NllLossBackward>)



 95%|█████████▍| 54/57 [00:55<00:03,  1.01s/it][A

loss: tensor(0.6295, device='cuda:0', grad_fn=<NllLossBackward>)



 96%|█████████▋| 55/57 [00:56<00:01,  1.01it/s][A

loss: tensor(0.7315, device='cuda:0', grad_fn=<NllLossBackward>)



 98%|█████████▊| 56/57 [00:57<00:01,  1.01s/it][A

loss: tensor(0.6255, device='cuda:0', grad_fn=<NllLossBackward>)



100%|██████████| 57/57 [00:58<00:00,  1.02s/it][A

  0%|          | 0/29 [00:00<?, ?it/s][A
  3%|▎         | 1/29 [00:00<00:04,  6.71it/s][A

loss: tensor(0.5679, device='cuda:0', grad_fn=<NllLossBackward>)

	Training Loss: 0.6472693676488441

	Training acc: 0.815795854918878

	Training prec: 0.3775067798556628

	Training rec: 0.25965903267874463

	Training f1: 0.26759635317146135

	Current Learning rate:  0.00031428571428571427



  7%|▋         | 2/29 [00:00<00:04,  6.72it/s][A
 10%|█         | 3/29 [00:00<00:03,  6.75it/s][A
 14%|█▍        | 4/29 [00:00<00:03,  6.77it/s][A
 17%|█▋        | 5/29 [00:00<00:03,  6.77it/s][A
 21%|██        | 6/29 [00:00<00:03,  6.75it/s][A
 24%|██▍       | 7/29 [00:01<00:03,  6.75it/s][A
 28%|██▊       | 8/29 [00:01<00:03,  6.77it/s][A
 31%|███       | 9/29 [00:01<00:02,  6.78it/s][A
 34%|███▍      | 10/29 [00:01<00:02,  6.80it/s][A
 38%|███▊      | 11/29 [00:01<00:02,  6.80it/s][A
 41%|████▏     | 12/29 [00:01<00:02,  6.80it/s][A
 45%|████▍     | 13/29 [00:01<00:02,  6.80it/s][A
 48%|████▊     | 14/29 [00:02<00:02,  6.75it/s][A
 52%|█████▏    | 15/29 [00:02<00:02,  6.75it/s][A
 55%|█████▌    | 16/29 [00:02<00:01,  6.77it/s][A
 59%|█████▊    | 17/29 [00:02<00:01,  6.77it/s][A
 62%|██████▏   | 18/29 [00:02<00:01,  6.77it/s][A
 66%|██████▌   | 19/29 [00:02<00:01,  6.79it/s][A
 69%|██████▉   | 20/29 [00:02<00:01,  6.80it/s][A
 72%|███████▏  | 21/29 [00:03<00:01,  


	Validation Loss: 0.695780322469514

	Validation acc: 0.7997439016468395

	Validation prec: 0.31790078352449036

	Validation rec: 0.2722073889356978

	Validation f1: 0.27312138569755395



  2%|▏         | 1/57 [00:01<00:59,  1.06s/it][A

loss: tensor(0.6154, device='cuda:0', grad_fn=<NllLossBackward>)



  4%|▎         | 2/57 [00:02<00:54,  1.00it/s][A

loss: tensor(0.6251, device='cuda:0', grad_fn=<NllLossBackward>)



  5%|▌         | 3/57 [00:03<00:55,  1.03s/it][A

loss: tensor(0.6367, device='cuda:0', grad_fn=<NllLossBackward>)



  7%|▋         | 4/57 [00:04<00:52,  1.00it/s][A

loss: tensor(0.6600, device='cuda:0', grad_fn=<NllLossBackward>)



  9%|▉         | 5/57 [00:04<00:50,  1.02it/s][A

loss: tensor(0.5430, device='cuda:0', grad_fn=<NllLossBackward>)



 11%|█         | 6/57 [00:06<00:51,  1.01s/it][A

loss: tensor(0.4982, device='cuda:0', grad_fn=<NllLossBackward>)



 12%|█▏        | 7/57 [00:06<00:49,  1.02it/s][A

loss: tensor(0.7697, device='cuda:0', grad_fn=<NllLossBackward>)



 14%|█▍        | 8/57 [00:08<00:49,  1.01s/it][A

loss: tensor(0.6333, device='cuda:0', grad_fn=<NllLossBackward>)



 16%|█▌        | 9/57 [00:08<00:47,  1.01it/s][A

loss: tensor(0.6917, device='cuda:0', grad_fn=<NllLossBackward>)



 18%|█▊        | 10/57 [00:10<00:47,  1.01s/it][A

loss: tensor(0.5888, device='cuda:0', grad_fn=<NllLossBackward>)



 19%|█▉        | 11/57 [00:10<00:45,  1.01it/s][A

loss: tensor(0.6112, device='cuda:0', grad_fn=<NllLossBackward>)



 21%|██        | 12/57 [00:11<00:43,  1.03it/s][A

loss: tensor(0.7874, device='cuda:0', grad_fn=<NllLossBackward>)



 23%|██▎       | 13/57 [00:12<00:44,  1.00s/it][A

loss: tensor(0.6254, device='cuda:0', grad_fn=<NllLossBackward>)



 25%|██▍       | 14/57 [00:13<00:42,  1.02it/s][A

loss: tensor(0.5584, device='cuda:0', grad_fn=<NllLossBackward>)



 26%|██▋       | 15/57 [00:14<00:42,  1.01s/it][A

loss: tensor(0.6046, device='cuda:0', grad_fn=<NllLossBackward>)



 28%|██▊       | 16/57 [00:15<00:40,  1.01it/s][A

loss: tensor(0.5443, device='cuda:0', grad_fn=<NllLossBackward>)



 30%|██▉       | 17/57 [00:16<00:40,  1.00s/it][A

loss: tensor(0.6631, device='cuda:0', grad_fn=<NllLossBackward>)



 32%|███▏      | 18/57 [00:17<00:38,  1.01it/s][A

loss: tensor(0.5392, device='cuda:0', grad_fn=<NllLossBackward>)



 33%|███▎      | 19/57 [00:18<00:37,  1.03it/s][A

loss: tensor(0.6301, device='cuda:0', grad_fn=<NllLossBackward>)



 35%|███▌      | 20/57 [00:19<00:37,  1.00s/it][A

loss: tensor(0.8206, device='cuda:0', grad_fn=<NllLossBackward>)



 37%|███▋      | 21/57 [00:20<00:35,  1.02it/s][A

loss: tensor(0.6586, device='cuda:0', grad_fn=<NllLossBackward>)



 39%|███▊      | 22/57 [00:21<00:35,  1.01s/it][A

loss: tensor(0.6015, device='cuda:0', grad_fn=<NllLossBackward>)



 40%|████      | 23/57 [00:22<00:33,  1.01it/s][A

loss: tensor(0.6613, device='cuda:0', grad_fn=<NllLossBackward>)



 42%|████▏     | 24/57 [00:23<00:32,  1.03it/s][A

loss: tensor(0.7485, device='cuda:0', grad_fn=<NllLossBackward>)



 44%|████▍     | 25/57 [00:24<00:31,  1.00it/s][A

loss: tensor(0.7221, device='cuda:0', grad_fn=<NllLossBackward>)



 46%|████▌     | 26/57 [00:25<00:30,  1.02it/s][A

loss: tensor(0.7660, device='cuda:0', grad_fn=<NllLossBackward>)



 47%|████▋     | 27/57 [00:26<00:30,  1.00s/it][A

loss: tensor(0.5444, device='cuda:0', grad_fn=<NllLossBackward>)



 49%|████▉     | 28/57 [00:27<00:28,  1.01it/s][A

loss: tensor(0.7717, device='cuda:0', grad_fn=<NllLossBackward>)



 51%|█████     | 29/57 [00:28<00:28,  1.02s/it][A

loss: tensor(0.5188, device='cuda:0', grad_fn=<NllLossBackward>)



 53%|█████▎    | 30/57 [00:29<00:26,  1.01it/s][A

loss: tensor(0.6502, device='cuda:0', grad_fn=<NllLossBackward>)



 54%|█████▍    | 31/57 [00:30<00:25,  1.02it/s][A

loss: tensor(0.6167, device='cuda:0', grad_fn=<NllLossBackward>)



 56%|█████▌    | 32/57 [00:31<00:25,  1.00s/it][A

loss: tensor(0.6393, device='cuda:0', grad_fn=<NllLossBackward>)



 58%|█████▊    | 33/57 [00:32<00:23,  1.02it/s][A

loss: tensor(0.6284, device='cuda:0', grad_fn=<NllLossBackward>)



 60%|█████▉    | 34/57 [00:33<00:23,  1.00s/it][A

loss: tensor(0.5744, device='cuda:0', grad_fn=<NllLossBackward>)



 61%|██████▏   | 35/57 [00:34<00:21,  1.02it/s][A

loss: tensor(0.8417, device='cuda:0', grad_fn=<NllLossBackward>)



 63%|██████▎   | 36/57 [00:35<00:21,  1.01s/it][A

loss: tensor(0.6954, device='cuda:0', grad_fn=<NllLossBackward>)



 65%|██████▍   | 37/57 [00:36<00:19,  1.02it/s][A

loss: tensor(0.5982, device='cuda:0', grad_fn=<NllLossBackward>)



 67%|██████▋   | 38/57 [00:37<00:18,  1.03it/s][A

loss: tensor(0.6004, device='cuda:0', grad_fn=<NllLossBackward>)



 68%|██████▊   | 39/57 [00:38<00:17,  1.00it/s][A

loss: tensor(0.6640, device='cuda:0', grad_fn=<NllLossBackward>)



 70%|███████   | 40/57 [00:39<00:16,  1.02it/s][A

loss: tensor(0.6748, device='cuda:0', grad_fn=<NllLossBackward>)



 72%|███████▏  | 41/57 [00:40<00:16,  1.00s/it][A

loss: tensor(0.6178, device='cuda:0', grad_fn=<NllLossBackward>)



 74%|███████▎  | 42/57 [00:41<00:14,  1.02it/s][A

loss: tensor(0.6349, device='cuda:0', grad_fn=<NllLossBackward>)



 75%|███████▌  | 43/57 [00:42<00:13,  1.03it/s][A

loss: tensor(0.5383, device='cuda:0', grad_fn=<NllLossBackward>)



 77%|███████▋  | 44/57 [00:43<00:12,  1.00it/s][A

loss: tensor(0.6275, device='cuda:0', grad_fn=<NllLossBackward>)



 79%|███████▉  | 45/57 [00:44<00:11,  1.01it/s][A

loss: tensor(0.4850, device='cuda:0', grad_fn=<NllLossBackward>)



 81%|████████  | 46/57 [00:45<00:11,  1.02s/it][A

loss: tensor(0.5622, device='cuda:0', grad_fn=<NllLossBackward>)



 82%|████████▏ | 47/57 [00:46<00:10,  1.01s/it][A

loss: tensor(0.8122, device='cuda:0', grad_fn=<NllLossBackward>)



 84%|████████▍ | 48/57 [00:47<00:09,  1.03s/it][A

loss: tensor(0.7223, device='cuda:0', grad_fn=<NllLossBackward>)



 86%|████████▌ | 49/57 [00:48<00:08,  1.01s/it][A

loss: tensor(0.7005, device='cuda:0', grad_fn=<NllLossBackward>)



 88%|████████▊ | 50/57 [00:49<00:06,  1.00it/s][A

loss: tensor(0.8083, device='cuda:0', grad_fn=<NllLossBackward>)



 89%|████████▉ | 51/57 [00:50<00:06,  1.02s/it][A

loss: tensor(0.5838, device='cuda:0', grad_fn=<NllLossBackward>)



 91%|█████████ | 52/57 [00:51<00:05,  1.00s/it][A

loss: tensor(0.5680, device='cuda:0', grad_fn=<NllLossBackward>)



 93%|█████████▎| 53/57 [00:52<00:04,  1.02s/it][A

loss: tensor(0.7096, device='cuda:0', grad_fn=<NllLossBackward>)



 95%|█████████▍| 54/57 [00:53<00:03,  1.00s/it][A

loss: tensor(0.5932, device='cuda:0', grad_fn=<NllLossBackward>)



 96%|█████████▋| 55/57 [00:54<00:02,  1.02s/it][A

loss: tensor(0.6510, device='cuda:0', grad_fn=<NllLossBackward>)



 98%|█████████▊| 56/57 [00:55<00:01,  1.00s/it][A

loss: tensor(0.6999, device='cuda:0', grad_fn=<NllLossBackward>)



100%|██████████| 57/57 [00:56<00:00,  1.01it/s][A

  0%|          | 0/29 [00:00<?, ?it/s][A
  3%|▎         | 1/29 [00:00<00:04,  6.78it/s][A

loss: tensor(0.7585, device='cuda:0', grad_fn=<NllLossBackward>)

	Training Loss: 0.6472912671273214

	Training acc: 0.8159244962948276

	Training prec: 0.39894800605370295

	Training rec: 0.2591669570321306

	Training f1: 0.2681061106694232

	Current Learning rate:  0.0002857142857142857



  7%|▋         | 2/29 [00:00<00:03,  6.79it/s][A
 10%|█         | 3/29 [00:00<00:03,  6.80it/s][A
 14%|█▍        | 4/29 [00:00<00:03,  6.81it/s][A
 17%|█▋        | 5/29 [00:00<00:03,  6.80it/s][A
 21%|██        | 6/29 [00:00<00:03,  6.80it/s][A
 24%|██▍       | 7/29 [00:01<00:03,  6.80it/s][A
 28%|██▊       | 8/29 [00:01<00:03,  6.81it/s][A
 31%|███       | 9/29 [00:01<00:02,  6.81it/s][A
 34%|███▍      | 10/29 [00:01<00:02,  6.81it/s][A
 38%|███▊      | 11/29 [00:01<00:02,  6.82it/s][A
 41%|████▏     | 12/29 [00:01<00:02,  6.81it/s][A
 45%|████▍     | 13/29 [00:01<00:02,  6.82it/s][A
 48%|████▊     | 14/29 [00:02<00:02,  6.80it/s][A
 52%|█████▏    | 15/29 [00:02<00:02,  6.77it/s][A
 55%|█████▌    | 16/29 [00:02<00:01,  6.78it/s][A
 59%|█████▊    | 17/29 [00:02<00:01,  6.76it/s][A
 62%|██████▏   | 18/29 [00:02<00:01,  5.53it/s][A
 66%|██████▌   | 19/29 [00:02<00:01,  5.84it/s][A
 69%|██████▉   | 20/29 [00:03<00:01,  6.09it/s][A
 72%|███████▏  | 21/29 [00:03<00:01,  


	Validation Loss: 0.6878616398778455

	Validation acc: 0.8012253334774926

	Validation prec: 0.3331733160746675

	Validation rec: 0.27518358543221916

	Validation f1: 0.27483519148007685



  2%|▏         | 1/57 [00:00<00:53,  1.05it/s][A

loss: tensor(0.6290, device='cuda:0', grad_fn=<NllLossBackward>)



  4%|▎         | 2/57 [00:01<00:52,  1.05it/s][A

loss: tensor(0.6653, device='cuda:0', grad_fn=<NllLossBackward>)



  5%|▌         | 3/57 [00:02<00:54,  1.02s/it][A

loss: tensor(0.6123, device='cuda:0', grad_fn=<NllLossBackward>)



  7%|▋         | 4/57 [00:03<00:52,  1.01it/s][A

loss: tensor(0.5534, device='cuda:0', grad_fn=<NllLossBackward>)



  9%|▉         | 5/57 [00:05<00:53,  1.02s/it][A

loss: tensor(0.5549, device='cuda:0', grad_fn=<NllLossBackward>)



 11%|█         | 6/57 [00:05<00:51,  1.00s/it][A

loss: tensor(0.5948, device='cuda:0', grad_fn=<NllLossBackward>)



 12%|█▏        | 7/57 [00:07<00:51,  1.02s/it][A

loss: tensor(0.6265, device='cuda:0', grad_fn=<NllLossBackward>)



 14%|█▍        | 8/57 [00:08<00:48,  1.00it/s][A

loss: tensor(0.7454, device='cuda:0', grad_fn=<NllLossBackward>)



 16%|█▌        | 9/57 [00:08<00:47,  1.01it/s][A

loss: tensor(0.5928, device='cuda:0', grad_fn=<NllLossBackward>)



 18%|█▊        | 10/57 [00:10<00:47,  1.01s/it][A

loss: tensor(0.6402, device='cuda:0', grad_fn=<NllLossBackward>)



 19%|█▉        | 11/57 [00:10<00:45,  1.01it/s][A

loss: tensor(0.4900, device='cuda:0', grad_fn=<NllLossBackward>)



 21%|██        | 12/57 [00:12<00:45,  1.02s/it][A

loss: tensor(0.5577, device='cuda:0', grad_fn=<NllLossBackward>)



 23%|██▎       | 13/57 [00:13<00:43,  1.00it/s][A

loss: tensor(0.6676, device='cuda:0', grad_fn=<NllLossBackward>)



 25%|██▍       | 14/57 [00:14<00:43,  1.02s/it][A

loss: tensor(0.8089, device='cuda:0', grad_fn=<NllLossBackward>)



 26%|██▋       | 15/57 [00:15<00:42,  1.00s/it][A

loss: tensor(0.8979, device='cuda:0', grad_fn=<NllLossBackward>)



 28%|██▊       | 16/57 [00:16<00:40,  1.01it/s][A

loss: tensor(0.6152, device='cuda:0', grad_fn=<NllLossBackward>)



 30%|██▉       | 17/57 [00:17<00:40,  1.01s/it][A

loss: tensor(0.6722, device='cuda:0', grad_fn=<NllLossBackward>)



 32%|███▏      | 18/57 [00:18<00:38,  1.00it/s][A

loss: tensor(0.6139, device='cuda:0', grad_fn=<NllLossBackward>)



 33%|███▎      | 19/57 [00:19<00:38,  1.02s/it][A

loss: tensor(0.7960, device='cuda:0', grad_fn=<NllLossBackward>)



 35%|███▌      | 20/57 [00:20<00:37,  1.00s/it][A

loss: tensor(0.6332, device='cuda:0', grad_fn=<NllLossBackward>)



 37%|███▋      | 21/57 [00:21<00:35,  1.01it/s][A

loss: tensor(0.6281, device='cuda:0', grad_fn=<NllLossBackward>)



 39%|███▊      | 22/57 [00:22<00:35,  1.01s/it][A

loss: tensor(0.6639, device='cuda:0', grad_fn=<NllLossBackward>)



 40%|████      | 23/57 [00:23<00:33,  1.00it/s][A

loss: tensor(0.6738, device='cuda:0', grad_fn=<NllLossBackward>)



 42%|████▏     | 24/57 [00:24<00:33,  1.02s/it][A

loss: tensor(0.6477, device='cuda:0', grad_fn=<NllLossBackward>)



 44%|████▍     | 25/57 [00:25<00:32,  1.00s/it][A

loss: tensor(0.7259, device='cuda:0', grad_fn=<NllLossBackward>)



 46%|████▌     | 26/57 [00:26<00:31,  1.02s/it][A

loss: tensor(0.8198, device='cuda:0', grad_fn=<NllLossBackward>)



 47%|████▋     | 27/57 [00:27<00:30,  1.00s/it][A

loss: tensor(0.5410, device='cuda:0', grad_fn=<NllLossBackward>)



 49%|████▉     | 28/57 [00:28<00:28,  1.01it/s][A

loss: tensor(0.6944, device='cuda:0', grad_fn=<NllLossBackward>)



 51%|█████     | 29/57 [00:29<00:28,  1.01s/it][A

loss: tensor(0.7848, device='cuda:0', grad_fn=<NllLossBackward>)



 53%|█████▎    | 30/57 [00:30<00:26,  1.01it/s][A

loss: tensor(0.6429, device='cuda:0', grad_fn=<NllLossBackward>)



 54%|█████▍    | 31/57 [00:31<00:26,  1.02s/it][A

loss: tensor(0.7635, device='cuda:0', grad_fn=<NllLossBackward>)



 56%|█████▌    | 32/57 [00:32<00:24,  1.00it/s][A

loss: tensor(0.7686, device='cuda:0', grad_fn=<NllLossBackward>)



 58%|█████▊    | 33/57 [00:33<00:24,  1.02s/it][A

loss: tensor(0.6205, device='cuda:0', grad_fn=<NllLossBackward>)



 60%|█████▉    | 34/57 [00:34<00:23,  1.00s/it][A

loss: tensor(0.5807, device='cuda:0', grad_fn=<NllLossBackward>)



 61%|██████▏   | 35/57 [00:35<00:21,  1.01it/s][A

loss: tensor(0.7351, device='cuda:0', grad_fn=<NllLossBackward>)



 63%|██████▎   | 36/57 [00:36<00:21,  1.01s/it][A

loss: tensor(0.5335, device='cuda:0', grad_fn=<NllLossBackward>)



 65%|██████▍   | 37/57 [00:37<00:19,  1.01it/s][A

loss: tensor(0.7117, device='cuda:0', grad_fn=<NllLossBackward>)



 67%|██████▋   | 38/57 [00:38<00:19,  1.02s/it][A

loss: tensor(0.6489, device='cuda:0', grad_fn=<NllLossBackward>)



 68%|██████▊   | 39/57 [00:39<00:18,  1.00s/it][A

loss: tensor(0.4727, device='cuda:0', grad_fn=<NllLossBackward>)



 70%|███████   | 40/57 [00:40<00:17,  1.02s/it][A

loss: tensor(0.6116, device='cuda:0', grad_fn=<NllLossBackward>)



 72%|███████▏  | 41/57 [00:41<00:15,  1.00it/s][A

loss: tensor(0.6929, device='cuda:0', grad_fn=<NllLossBackward>)



 74%|███████▎  | 42/57 [00:42<00:14,  1.01it/s][A

loss: tensor(0.7437, device='cuda:0', grad_fn=<NllLossBackward>)



 75%|███████▌  | 43/57 [00:43<00:14,  1.01s/it][A

loss: tensor(0.6439, device='cuda:0', grad_fn=<NllLossBackward>)



 77%|███████▋  | 44/57 [00:44<00:12,  1.01it/s][A

loss: tensor(0.5733, device='cuda:0', grad_fn=<NllLossBackward>)



 79%|███████▉  | 45/57 [00:45<00:12,  1.01s/it][A

loss: tensor(0.6287, device='cuda:0', grad_fn=<NllLossBackward>)



 81%|████████  | 46/57 [00:46<00:10,  1.00it/s][A

loss: tensor(0.7199, device='cuda:0', grad_fn=<NllLossBackward>)



 82%|████████▏ | 47/57 [00:47<00:09,  1.01it/s][A

loss: tensor(0.7127, device='cuda:0', grad_fn=<NllLossBackward>)



 84%|████████▍ | 48/57 [00:48<00:09,  1.01s/it][A

loss: tensor(0.6553, device='cuda:0', grad_fn=<NllLossBackward>)



 86%|████████▌ | 49/57 [00:49<00:08,  1.00s/it][A

loss: tensor(0.7264, device='cuda:0', grad_fn=<NllLossBackward>)



 88%|████████▊ | 50/57 [00:50<00:07,  1.02s/it][A

loss: tensor(0.6278, device='cuda:0', grad_fn=<NllLossBackward>)



 89%|████████▉ | 51/57 [00:51<00:06,  1.00s/it][A

loss: tensor(0.6384, device='cuda:0', grad_fn=<NllLossBackward>)



 91%|█████████ | 52/57 [00:52<00:05,  1.02s/it][A

loss: tensor(0.5865, device='cuda:0', grad_fn=<NllLossBackward>)



 93%|█████████▎| 53/57 [00:53<00:04,  1.00s/it][A

loss: tensor(0.5440, device='cuda:0', grad_fn=<NllLossBackward>)



 95%|█████████▍| 54/57 [00:54<00:02,  1.01it/s][A

loss: tensor(0.4868, device='cuda:0', grad_fn=<NllLossBackward>)



 96%|█████████▋| 55/57 [00:55<00:02,  1.01s/it][A

loss: tensor(0.6649, device='cuda:0', grad_fn=<NllLossBackward>)



 98%|█████████▊| 56/57 [00:56<00:01,  1.00s/it][A

loss: tensor(0.5893, device='cuda:0', grad_fn=<NllLossBackward>)



100%|██████████| 57/57 [00:57<00:00,  1.00s/it][A

  0%|          | 0/29 [00:00<?, ?it/s][A
  3%|▎         | 1/29 [00:00<00:04,  6.77it/s][A

loss: tensor(0.7516, device='cuda:0', grad_fn=<NllLossBackward>)

	Training Loss: 0.6530236303806305

	Training acc: 0.8146279686788267

	Training prec: 0.3924162787298298

	Training rec: 0.2616105318370127

	Training f1: 0.26937191626244666

	Current Learning rate:  0.0002571428571428571



  7%|▋         | 2/29 [00:00<00:03,  6.75it/s][A
 10%|█         | 3/29 [00:00<00:03,  6.75it/s][A
 14%|█▍        | 4/29 [00:00<00:03,  6.74it/s][A
 17%|█▋        | 5/29 [00:00<00:03,  6.75it/s][A
 21%|██        | 6/29 [00:00<00:03,  6.75it/s][A
 24%|██▍       | 7/29 [00:01<00:03,  6.75it/s][A
 28%|██▊       | 8/29 [00:01<00:03,  6.76it/s][A
 31%|███       | 9/29 [00:01<00:02,  6.75it/s][A
 34%|███▍      | 10/29 [00:01<00:02,  6.74it/s][A
 38%|███▊      | 11/29 [00:01<00:02,  6.73it/s][A
 41%|████▏     | 12/29 [00:01<00:02,  6.74it/s][A
 45%|████▍     | 13/29 [00:01<00:02,  6.75it/s][A
 48%|████▊     | 14/29 [00:02<00:02,  6.75it/s][A
 52%|█████▏    | 15/29 [00:02<00:02,  6.75it/s][A
 55%|█████▌    | 16/29 [00:02<00:01,  6.76it/s][A
 59%|█████▊    | 17/29 [00:02<00:01,  6.75it/s][A
 62%|██████▏   | 18/29 [00:02<00:01,  6.75it/s][A
 66%|██████▌   | 19/29 [00:02<00:01,  6.76it/s][A
 69%|██████▉   | 20/29 [00:02<00:01,  6.76it/s][A
 72%|███████▏  | 21/29 [00:03<00:01,  


	Validation Loss: 0.7172491211315681

	Validation acc: 0.7947261534846807

	Validation prec: 0.30089346963384683

	Validation rec: 0.25750549066201395

	Validation f1: 0.2565228743313569



  2%|▏         | 1/57 [00:00<00:53,  1.05it/s][A

loss: tensor(0.5828, device='cuda:0', grad_fn=<NllLossBackward>)



  4%|▎         | 2/57 [00:02<00:56,  1.02s/it][A

loss: tensor(0.5219, device='cuda:0', grad_fn=<NllLossBackward>)



  5%|▌         | 3/57 [00:02<00:53,  1.01it/s][A

loss: tensor(0.4670, device='cuda:0', grad_fn=<NllLossBackward>)



  7%|▋         | 4/57 [00:04<00:54,  1.03s/it][A

loss: tensor(0.6069, device='cuda:0', grad_fn=<NllLossBackward>)



  9%|▉         | 5/57 [00:05<00:52,  1.00s/it][A

loss: tensor(0.5787, device='cuda:0', grad_fn=<NllLossBackward>)



 11%|█         | 6/57 [00:05<00:50,  1.01it/s][A

loss: tensor(0.8475, device='cuda:0', grad_fn=<NllLossBackward>)



 12%|█▏        | 7/57 [00:07<00:50,  1.02s/it][A

loss: tensor(0.5994, device='cuda:0', grad_fn=<NllLossBackward>)



 14%|█▍        | 8/57 [00:08<00:48,  1.00it/s][A

loss: tensor(0.6969, device='cuda:0', grad_fn=<NllLossBackward>)



 16%|█▌        | 9/57 [00:09<00:48,  1.02s/it][A

loss: tensor(0.6314, device='cuda:0', grad_fn=<NllLossBackward>)



 18%|█▊        | 10/57 [00:10<00:46,  1.00it/s][A

loss: tensor(0.5243, device='cuda:0', grad_fn=<NllLossBackward>)



 19%|█▉        | 11/57 [00:11<00:46,  1.02s/it][A

loss: tensor(0.6371, device='cuda:0', grad_fn=<NllLossBackward>)



 21%|██        | 12/57 [00:12<00:44,  1.00it/s][A

loss: tensor(0.5832, device='cuda:0', grad_fn=<NllLossBackward>)



 23%|██▎       | 13/57 [00:13<00:43,  1.01it/s][A

loss: tensor(0.5377, device='cuda:0', grad_fn=<NllLossBackward>)



 25%|██▍       | 14/57 [00:14<00:43,  1.01s/it][A

loss: tensor(0.6933, device='cuda:0', grad_fn=<NllLossBackward>)



 26%|██▋       | 15/57 [00:15<00:41,  1.01it/s][A

loss: tensor(0.7547, device='cuda:0', grad_fn=<NllLossBackward>)



 28%|██▊       | 16/57 [00:16<00:41,  1.02s/it][A

loss: tensor(0.6645, device='cuda:0', grad_fn=<NllLossBackward>)



 30%|██▉       | 17/57 [00:17<00:39,  1.00it/s][A

loss: tensor(0.6343, device='cuda:0', grad_fn=<NllLossBackward>)



 32%|███▏      | 18/57 [00:18<00:39,  1.02s/it][A

loss: tensor(0.7708, device='cuda:0', grad_fn=<NllLossBackward>)



 33%|███▎      | 19/57 [00:19<00:37,  1.00it/s][A

loss: tensor(0.5789, device='cuda:0', grad_fn=<NllLossBackward>)



 35%|███▌      | 20/57 [00:20<00:36,  1.01it/s][A

loss: tensor(0.6019, device='cuda:0', grad_fn=<NllLossBackward>)



 37%|███▋      | 21/57 [00:21<00:36,  1.01s/it][A

loss: tensor(0.7461, device='cuda:0', grad_fn=<NllLossBackward>)



 39%|███▊      | 22/57 [00:22<00:34,  1.01it/s][A

loss: tensor(0.6908, device='cuda:0', grad_fn=<NllLossBackward>)



 40%|████      | 23/57 [00:23<00:34,  1.02s/it][A

loss: tensor(0.6583, device='cuda:0', grad_fn=<NllLossBackward>)



 42%|████▏     | 24/57 [00:24<00:32,  1.00it/s][A

loss: tensor(0.6969, device='cuda:0', grad_fn=<NllLossBackward>)



 44%|████▍     | 25/57 [00:25<00:31,  1.02it/s][A

loss: tensor(0.6024, device='cuda:0', grad_fn=<NllLossBackward>)



 46%|████▌     | 26/57 [00:26<00:31,  1.01s/it][A

loss: tensor(0.6772, device='cuda:0', grad_fn=<NllLossBackward>)



 47%|████▋     | 27/57 [00:27<00:29,  1.01it/s][A

loss: tensor(0.7038, device='cuda:0', grad_fn=<NllLossBackward>)



 49%|████▉     | 28/57 [00:28<00:29,  1.02s/it][A

loss: tensor(0.5476, device='cuda:0', grad_fn=<NllLossBackward>)



 51%|█████     | 29/57 [00:29<00:27,  1.00it/s][A

loss: tensor(0.7714, device='cuda:0', grad_fn=<NllLossBackward>)



 53%|█████▎    | 30/57 [00:30<00:27,  1.02s/it][A

loss: tensor(0.8369, device='cuda:0', grad_fn=<NllLossBackward>)



 54%|█████▍    | 31/57 [00:31<00:25,  1.00it/s][A

loss: tensor(0.6325, device='cuda:0', grad_fn=<NllLossBackward>)



 56%|█████▌    | 32/57 [00:32<00:24,  1.01it/s][A

loss: tensor(0.6365, device='cuda:0', grad_fn=<NllLossBackward>)



 58%|█████▊    | 33/57 [00:33<00:24,  1.00s/it][A

loss: tensor(0.6570, device='cuda:0', grad_fn=<NllLossBackward>)



 60%|█████▉    | 34/57 [00:34<00:22,  1.01it/s][A

loss: tensor(0.7980, device='cuda:0', grad_fn=<NllLossBackward>)



 61%|██████▏   | 35/57 [00:35<00:22,  1.00s/it][A

loss: tensor(0.6271, device='cuda:0', grad_fn=<NllLossBackward>)



 63%|██████▎   | 36/57 [00:36<00:20,  1.02it/s][A

loss: tensor(0.7160, device='cuda:0', grad_fn=<NllLossBackward>)



 65%|██████▍   | 37/57 [00:37<00:20,  1.01s/it][A

loss: tensor(0.5678, device='cuda:0', grad_fn=<NllLossBackward>)



 67%|██████▋   | 38/57 [00:38<00:18,  1.02it/s][A

loss: tensor(0.6744, device='cuda:0', grad_fn=<NllLossBackward>)



 68%|██████▊   | 39/57 [00:38<00:17,  1.03it/s][A

loss: tensor(0.6158, device='cuda:0', grad_fn=<NllLossBackward>)



 70%|███████   | 40/57 [00:39<00:16,  1.01it/s][A

loss: tensor(0.5055, device='cuda:0', grad_fn=<NllLossBackward>)



 72%|███████▏  | 41/57 [00:40<00:15,  1.03it/s][A

loss: tensor(0.5329, device='cuda:0', grad_fn=<NllLossBackward>)



 74%|███████▎  | 42/57 [00:41<00:14,  1.00it/s][A

loss: tensor(0.6802, device='cuda:0', grad_fn=<NllLossBackward>)



 75%|███████▌  | 43/57 [00:42<00:13,  1.02it/s][A

loss: tensor(0.7153, device='cuda:0', grad_fn=<NllLossBackward>)



 77%|███████▋  | 44/57 [00:43<00:12,  1.01it/s][A

loss: tensor(0.7393, device='cuda:0', grad_fn=<NllLossBackward>)



 79%|███████▉  | 45/57 [00:44<00:11,  1.03it/s][A

loss: tensor(0.6953, device='cuda:0', grad_fn=<NllLossBackward>)



 81%|████████  | 46/57 [00:45<00:10,  1.04it/s][A

loss: tensor(0.6777, device='cuda:0', grad_fn=<NllLossBackward>)



 82%|████████▏ | 47/57 [00:46<00:09,  1.01it/s][A

loss: tensor(0.5103, device='cuda:0', grad_fn=<NllLossBackward>)



 84%|████████▍ | 48/57 [00:47<00:08,  1.03it/s][A

loss: tensor(0.5183, device='cuda:0', grad_fn=<NllLossBackward>)



 86%|████████▌ | 49/57 [00:48<00:07,  1.00it/s][A

loss: tensor(0.6994, device='cuda:0', grad_fn=<NllLossBackward>)



 88%|████████▊ | 50/57 [00:49<00:06,  1.02it/s][A

loss: tensor(0.5564, device='cuda:0', grad_fn=<NllLossBackward>)



 89%|████████▉ | 51/57 [00:50<00:05,  1.04it/s][A

loss: tensor(0.7270, device='cuda:0', grad_fn=<NllLossBackward>)



 91%|█████████ | 52/57 [00:51<00:04,  1.01it/s][A

loss: tensor(0.7426, device='cuda:0', grad_fn=<NllLossBackward>)



 93%|█████████▎| 53/57 [00:52<00:03,  1.03it/s][A

loss: tensor(0.5744, device='cuda:0', grad_fn=<NllLossBackward>)



 95%|█████████▍| 54/57 [00:53<00:03,  1.00s/it][A

loss: tensor(0.7190, device='cuda:0', grad_fn=<NllLossBackward>)



 96%|█████████▋| 55/57 [00:54<00:01,  1.02it/s][A

loss: tensor(0.7838, device='cuda:0', grad_fn=<NllLossBackward>)



 98%|█████████▊| 56/57 [00:55<00:01,  1.00s/it][A

loss: tensor(0.6225, device='cuda:0', grad_fn=<NllLossBackward>)



100%|██████████| 57/57 [00:56<00:00,  1.01it/s][A

  0%|          | 0/29 [00:00<?, ?it/s][A
  3%|▎         | 1/29 [00:00<00:04,  6.99it/s][A

loss: tensor(0.5850, device='cuda:0', grad_fn=<NllLossBackward>)

	Training Loss: 0.6483303141175655

	Training acc: 0.81577102285846

	Training prec: 0.3936409524242283

	Training rec: 0.2593064627574843

	Training f1: 0.2677392524099798

	Current Learning rate:  0.00022857142857142857



  7%|▋         | 2/29 [00:00<00:03,  6.95it/s][A
 10%|█         | 3/29 [00:00<00:03,  6.95it/s][A
 14%|█▍        | 4/29 [00:00<00:03,  6.85it/s][A
 17%|█▋        | 5/29 [00:00<00:03,  6.85it/s][A
 21%|██        | 6/29 [00:00<00:03,  6.84it/s][A
 24%|██▍       | 7/29 [00:01<00:03,  6.86it/s][A
 28%|██▊       | 8/29 [00:01<00:03,  6.86it/s][A
 31%|███       | 9/29 [00:01<00:02,  6.88it/s][A
 34%|███▍      | 10/29 [00:01<00:02,  6.90it/s][A
 38%|███▊      | 11/29 [00:01<00:02,  6.89it/s][A
 41%|████▏     | 12/29 [00:01<00:02,  6.87it/s][A
 45%|████▍     | 13/29 [00:01<00:02,  6.88it/s][A
 48%|████▊     | 14/29 [00:02<00:02,  6.89it/s][A
 52%|█████▏    | 15/29 [00:02<00:02,  6.90it/s][A
 55%|█████▌    | 16/29 [00:02<00:01,  6.89it/s][A
 59%|█████▊    | 17/29 [00:02<00:01,  6.90it/s][A
 62%|██████▏   | 18/29 [00:02<00:01,  6.90it/s][A
 66%|██████▌   | 19/29 [00:02<00:01,  6.88it/s][A
 69%|██████▉   | 20/29 [00:02<00:01,  6.87it/s][A
 72%|███████▏  | 21/29 [00:03<00:01,  


	Validation Loss: 0.6983088594058464

	Validation acc: 0.8007463737763609

	Validation prec: 0.3205065765680133

	Validation rec: 0.264704516221347

	Validation f1: 0.26515382851779906



  2%|▏         | 1/57 [00:01<00:58,  1.05s/it][A

loss: tensor(0.5938, device='cuda:0', grad_fn=<NllLossBackward>)



  4%|▎         | 2/57 [00:01<00:53,  1.02it/s][A

loss: tensor(0.6646, device='cuda:0', grad_fn=<NllLossBackward>)



  5%|▌         | 3/57 [00:02<00:51,  1.04it/s][A

loss: tensor(0.5596, device='cuda:0', grad_fn=<NllLossBackward>)



  7%|▋         | 4/57 [00:03<00:53,  1.00s/it][A

loss: tensor(0.5852, device='cuda:0', grad_fn=<NllLossBackward>)



  9%|▉         | 5/57 [00:04<00:50,  1.02it/s][A

loss: tensor(0.6496, device='cuda:0', grad_fn=<NllLossBackward>)



 11%|█         | 6/57 [00:05<00:51,  1.00s/it][A

loss: tensor(0.6832, device='cuda:0', grad_fn=<NllLossBackward>)



 12%|█▏        | 7/57 [00:06<00:48,  1.02it/s][A

loss: tensor(0.6614, device='cuda:0', grad_fn=<NllLossBackward>)



 14%|█▍        | 8/57 [00:07<00:49,  1.00s/it][A

loss: tensor(0.5897, device='cuda:0', grad_fn=<NllLossBackward>)



 16%|█▌        | 9/57 [00:08<00:46,  1.02it/s][A

loss: tensor(0.6069, device='cuda:0', grad_fn=<NllLossBackward>)



 18%|█▊        | 10/57 [00:09<00:45,  1.04it/s][A

loss: tensor(0.6819, device='cuda:0', grad_fn=<NllLossBackward>)



 19%|█▉        | 11/57 [00:10<00:45,  1.01it/s][A

loss: tensor(0.8230, device='cuda:0', grad_fn=<NllLossBackward>)



 21%|██        | 12/57 [00:11<00:44,  1.02it/s][A

loss: tensor(0.7274, device='cuda:0', grad_fn=<NllLossBackward>)



 23%|██▎       | 13/57 [00:12<00:44,  1.00s/it][A

loss: tensor(0.5180, device='cuda:0', grad_fn=<NllLossBackward>)



 25%|██▍       | 14/57 [00:13<00:42,  1.02it/s][A

loss: tensor(0.7404, device='cuda:0', grad_fn=<NllLossBackward>)



 26%|██▋       | 15/57 [00:14<00:42,  1.00s/it][A

loss: tensor(0.7101, device='cuda:0', grad_fn=<NllLossBackward>)



 28%|██▊       | 16/57 [00:15<00:40,  1.02it/s][A

loss: tensor(0.6462, device='cuda:0', grad_fn=<NllLossBackward>)



 30%|██▉       | 17/57 [00:16<00:38,  1.04it/s][A

loss: tensor(0.6386, device='cuda:0', grad_fn=<NllLossBackward>)



 32%|███▏      | 18/57 [00:17<00:38,  1.01it/s][A

loss: tensor(0.7679, device='cuda:0', grad_fn=<NllLossBackward>)



 33%|███▎      | 19/57 [00:18<00:36,  1.03it/s][A

loss: tensor(0.5394, device='cuda:0', grad_fn=<NllLossBackward>)



 35%|███▌      | 20/57 [00:19<00:36,  1.00it/s][A

loss: tensor(0.7029, device='cuda:0', grad_fn=<NllLossBackward>)



 37%|███▋      | 21/57 [00:20<00:35,  1.02it/s][A

loss: tensor(0.6192, device='cuda:0', grad_fn=<NllLossBackward>)



 39%|███▊      | 22/57 [00:21<00:34,  1.01it/s][A

loss: tensor(0.5475, device='cuda:0', grad_fn=<NllLossBackward>)



 40%|████      | 23/57 [00:22<00:33,  1.03it/s][A

loss: tensor(0.6950, device='cuda:0', grad_fn=<NllLossBackward>)



 42%|████▏     | 24/57 [00:23<00:31,  1.04it/s][A

loss: tensor(0.7038, device='cuda:0', grad_fn=<NllLossBackward>)



 44%|████▍     | 25/57 [00:24<00:31,  1.01it/s][A

loss: tensor(0.7916, device='cuda:0', grad_fn=<NllLossBackward>)



 46%|████▌     | 26/57 [00:25<00:30,  1.03it/s][A

loss: tensor(0.5861, device='cuda:0', grad_fn=<NllLossBackward>)



 47%|████▋     | 27/57 [00:26<00:29,  1.01it/s][A

loss: tensor(0.6473, device='cuda:0', grad_fn=<NllLossBackward>)



 49%|████▉     | 28/57 [00:27<00:28,  1.03it/s][A

loss: tensor(0.5699, device='cuda:0', grad_fn=<NllLossBackward>)



 51%|█████     | 29/57 [00:28<00:26,  1.04it/s][A

loss: tensor(0.4699, device='cuda:0', grad_fn=<NllLossBackward>)



 53%|█████▎    | 30/57 [00:29<00:26,  1.01it/s][A

loss: tensor(0.7297, device='cuda:0', grad_fn=<NllLossBackward>)



 54%|█████▍    | 31/57 [00:30<00:25,  1.03it/s][A

loss: tensor(0.6780, device='cuda:0', grad_fn=<NllLossBackward>)



 56%|█████▌    | 32/57 [00:31<00:24,  1.01it/s][A

loss: tensor(0.6600, device='cuda:0', grad_fn=<NllLossBackward>)



 58%|█████▊    | 33/57 [00:32<00:23,  1.02it/s][A

loss: tensor(0.5250, device='cuda:0', grad_fn=<NllLossBackward>)



 60%|█████▉    | 34/57 [00:33<00:22,  1.00it/s][A

loss: tensor(0.5106, device='cuda:0', grad_fn=<NllLossBackward>)



 61%|██████▏   | 35/57 [00:34<00:21,  1.02it/s][A

loss: tensor(0.7048, device='cuda:0', grad_fn=<NllLossBackward>)



 63%|██████▎   | 36/57 [00:35<00:20,  1.04it/s][A

loss: tensor(0.5362, device='cuda:0', grad_fn=<NllLossBackward>)



 65%|██████▍   | 37/57 [00:36<00:19,  1.01it/s][A

loss: tensor(0.5958, device='cuda:0', grad_fn=<NllLossBackward>)



 67%|██████▋   | 38/57 [00:37<00:18,  1.03it/s][A

loss: tensor(0.6907, device='cuda:0', grad_fn=<NllLossBackward>)



 68%|██████▊   | 39/57 [00:38<00:17,  1.01it/s][A

loss: tensor(0.6872, device='cuda:0', grad_fn=<NllLossBackward>)



 70%|███████   | 40/57 [00:39<00:16,  1.02it/s][A

loss: tensor(0.6165, device='cuda:0', grad_fn=<NllLossBackward>)



 72%|███████▏  | 41/57 [00:40<00:15,  1.00it/s][A

loss: tensor(0.5899, device='cuda:0', grad_fn=<NllLossBackward>)



 74%|███████▎  | 42/57 [00:41<00:14,  1.02it/s][A

loss: tensor(0.6195, device='cuda:0', grad_fn=<NllLossBackward>)



 75%|███████▌  | 43/57 [00:42<00:13,  1.04it/s][A

loss: tensor(0.5602, device='cuda:0', grad_fn=<NllLossBackward>)



 77%|███████▋  | 44/57 [00:43<00:12,  1.01it/s][A

loss: tensor(0.6508, device='cuda:0', grad_fn=<NllLossBackward>)



 79%|███████▉  | 45/57 [00:44<00:11,  1.03it/s][A

loss: tensor(0.7108, device='cuda:0', grad_fn=<NllLossBackward>)



 81%|████████  | 46/57 [00:45<00:10,  1.00it/s][A

loss: tensor(0.6075, device='cuda:0', grad_fn=<NllLossBackward>)



 82%|████████▏ | 47/57 [00:46<00:09,  1.02it/s][A

loss: tensor(0.6242, device='cuda:0', grad_fn=<NllLossBackward>)



 84%|████████▍ | 48/57 [00:47<00:08,  1.01it/s][A

loss: tensor(0.5472, device='cuda:0', grad_fn=<NllLossBackward>)



 86%|████████▌ | 49/57 [00:48<00:07,  1.03it/s][A

loss: tensor(0.8829, device='cuda:0', grad_fn=<NllLossBackward>)



 88%|████████▊ | 50/57 [00:49<00:06,  1.04it/s][A

loss: tensor(0.7743, device='cuda:0', grad_fn=<NllLossBackward>)



 89%|████████▉ | 51/57 [00:50<00:05,  1.01it/s][A

loss: tensor(0.6083, device='cuda:0', grad_fn=<NllLossBackward>)



 91%|█████████ | 52/57 [00:51<00:04,  1.03it/s][A

loss: tensor(0.6766, device='cuda:0', grad_fn=<NllLossBackward>)



 93%|█████████▎| 53/57 [00:52<00:03,  1.00it/s][A

loss: tensor(0.6159, device='cuda:0', grad_fn=<NllLossBackward>)



 95%|█████████▍| 54/57 [00:53<00:02,  1.02it/s][A

loss: tensor(0.7568, device='cuda:0', grad_fn=<NllLossBackward>)



 96%|█████████▋| 55/57 [00:53<00:01,  1.04it/s][A

loss: tensor(0.5917, device='cuda:0', grad_fn=<NllLossBackward>)



 98%|█████████▊| 56/57 [00:55<00:00,  1.01it/s][A

loss: tensor(0.5411, device='cuda:0', grad_fn=<NllLossBackward>)



100%|██████████| 57/57 [00:55<00:00,  1.02it/s][A

  0%|          | 0/29 [00:00<?, ?it/s][A
  3%|▎         | 1/29 [00:00<00:04,  6.85it/s][A

loss: tensor(0.7980, device='cuda:0', grad_fn=<NllLossBackward>)

	Training Loss: 0.6457959298501935

	Training acc: 0.8178708394519774

	Training prec: 0.42744925708269293

	Training rec: 0.2657711850568541

	Training f1: 0.2784136002600764

	Current Learning rate:  0.0002



  7%|▋         | 2/29 [00:00<00:03,  6.84it/s][A
 10%|█         | 3/29 [00:00<00:03,  6.87it/s][A
 14%|█▍        | 4/29 [00:00<00:03,  6.89it/s][A
 17%|█▋        | 5/29 [00:00<00:03,  6.89it/s][A
 21%|██        | 6/29 [00:00<00:03,  6.86it/s][A
 24%|██▍       | 7/29 [00:01<00:03,  6.87it/s][A
 28%|██▊       | 8/29 [00:01<00:03,  6.77it/s][A
 31%|███       | 9/29 [00:01<00:02,  6.81it/s][A
 34%|███▍      | 10/29 [00:01<00:02,  6.84it/s][A
 38%|███▊      | 11/29 [00:01<00:02,  6.86it/s][A
 41%|████▏     | 12/29 [00:01<00:02,  6.87it/s][A
 45%|████▍     | 13/29 [00:01<00:02,  6.86it/s][A
 48%|████▊     | 14/29 [00:02<00:02,  6.88it/s][A
 52%|█████▏    | 15/29 [00:02<00:02,  6.87it/s][A
 55%|█████▌    | 16/29 [00:02<00:01,  6.88it/s][A
 59%|█████▊    | 17/29 [00:02<00:01,  6.90it/s][A
 62%|██████▏   | 18/29 [00:02<00:01,  6.89it/s][A
 66%|██████▌   | 19/29 [00:02<00:01,  6.91it/s][A
 69%|██████▉   | 20/29 [00:02<00:01,  6.89it/s][A
 72%|███████▏  | 21/29 [00:03<00:01,  


	Validation Loss: 0.6958981978482214

	Validation acc: 0.7980507762406556

	Validation prec: 0.30656888008022676

	Validation rec: 0.2679998482444745

	Validation f1: 0.2657384024138447



  2%|▏         | 1/57 [00:00<00:52,  1.07it/s][A

loss: tensor(0.6078, device='cuda:0', grad_fn=<NllLossBackward>)



  4%|▎         | 2/57 [00:01<00:51,  1.07it/s][A

loss: tensor(0.5551, device='cuda:0', grad_fn=<NllLossBackward>)



  5%|▌         | 3/57 [00:02<00:53,  1.01it/s][A

loss: tensor(0.6838, device='cuda:0', grad_fn=<NllLossBackward>)



  7%|▋         | 4/57 [00:03<00:51,  1.03it/s][A

loss: tensor(0.6147, device='cuda:0', grad_fn=<NllLossBackward>)



  9%|▉         | 5/57 [00:04<00:51,  1.00it/s][A

loss: tensor(0.5975, device='cuda:0', grad_fn=<NllLossBackward>)



 11%|█         | 6/57 [00:05<00:49,  1.02it/s][A

loss: tensor(0.6750, device='cuda:0', grad_fn=<NllLossBackward>)



 12%|█▏        | 7/57 [00:06<00:49,  1.01it/s][A

loss: tensor(0.6436, device='cuda:0', grad_fn=<NllLossBackward>)



 14%|█▍        | 8/57 [00:07<00:47,  1.03it/s][A

loss: tensor(0.6079, device='cuda:0', grad_fn=<NllLossBackward>)



 16%|█▌        | 9/57 [00:08<00:46,  1.04it/s][A

loss: tensor(0.6794, device='cuda:0', grad_fn=<NllLossBackward>)



 18%|█▊        | 10/57 [00:09<00:46,  1.01it/s][A

loss: tensor(0.5824, device='cuda:0', grad_fn=<NllLossBackward>)



 19%|█▉        | 11/57 [00:10<00:44,  1.03it/s][A

loss: tensor(0.5195, device='cuda:0', grad_fn=<NllLossBackward>)



 21%|██        | 12/57 [00:11<00:44,  1.00it/s][A

loss: tensor(0.4855, device='cuda:0', grad_fn=<NllLossBackward>)



 23%|██▎       | 13/57 [00:12<00:43,  1.02it/s][A

loss: tensor(0.8335, device='cuda:0', grad_fn=<NllLossBackward>)



 25%|██▍       | 14/57 [00:13<00:41,  1.04it/s][A

loss: tensor(0.6867, device='cuda:0', grad_fn=<NllLossBackward>)



 26%|██▋       | 15/57 [00:14<00:41,  1.00it/s][A

loss: tensor(0.6918, device='cuda:0', grad_fn=<NllLossBackward>)



 28%|██▊       | 16/57 [00:15<00:40,  1.02it/s][A

loss: tensor(0.6963, device='cuda:0', grad_fn=<NllLossBackward>)



 30%|██▉       | 17/57 [00:16<00:40,  1.00s/it][A

loss: tensor(0.6274, device='cuda:0', grad_fn=<NllLossBackward>)



 32%|███▏      | 18/57 [00:17<00:38,  1.02it/s][A

loss: tensor(0.7617, device='cuda:0', grad_fn=<NllLossBackward>)



 33%|███▎      | 19/57 [00:18<00:38,  1.00s/it][A

loss: tensor(0.6523, device='cuda:0', grad_fn=<NllLossBackward>)



 35%|███▌      | 20/57 [00:19<00:36,  1.02it/s][A

loss: tensor(0.6320, device='cuda:0', grad_fn=<NllLossBackward>)



 37%|███▋      | 21/57 [00:20<00:34,  1.03it/s][A

loss: tensor(0.5403, device='cuda:0', grad_fn=<NllLossBackward>)



 39%|███▊      | 22/57 [00:21<00:34,  1.01it/s][A

loss: tensor(0.6651, device='cuda:0', grad_fn=<NllLossBackward>)



 40%|████      | 23/57 [00:22<00:33,  1.03it/s][A

loss: tensor(0.6446, device='cuda:0', grad_fn=<NllLossBackward>)



 42%|████▏     | 24/57 [00:23<00:32,  1.00it/s][A

loss: tensor(0.6649, device='cuda:0', grad_fn=<NllLossBackward>)



 44%|████▍     | 25/57 [00:24<00:31,  1.02it/s][A

loss: tensor(0.6086, device='cuda:0', grad_fn=<NllLossBackward>)



 46%|████▌     | 26/57 [00:25<00:30,  1.00it/s][A

loss: tensor(0.8122, device='cuda:0', grad_fn=<NllLossBackward>)



 47%|████▋     | 27/57 [00:26<00:29,  1.02it/s][A

loss: tensor(0.5969, device='cuda:0', grad_fn=<NllLossBackward>)



 49%|████▉     | 28/57 [00:27<00:27,  1.04it/s][A

loss: tensor(0.5591, device='cuda:0', grad_fn=<NllLossBackward>)



 51%|█████     | 29/57 [00:28<00:27,  1.01it/s][A

loss: tensor(0.6755, device='cuda:0', grad_fn=<NllLossBackward>)



 53%|█████▎    | 30/57 [00:29<00:26,  1.03it/s][A

loss: tensor(0.5870, device='cuda:0', grad_fn=<NllLossBackward>)



 54%|█████▍    | 31/57 [00:30<00:25,  1.00it/s][A

loss: tensor(0.5125, device='cuda:0', grad_fn=<NllLossBackward>)



 56%|█████▌    | 32/57 [00:31<00:24,  1.02it/s][A

loss: tensor(0.7713, device='cuda:0', grad_fn=<NllLossBackward>)



 58%|█████▊    | 33/57 [00:32<00:23,  1.01it/s][A

loss: tensor(0.6104, device='cuda:0', grad_fn=<NllLossBackward>)



 60%|█████▉    | 34/57 [00:33<00:22,  1.03it/s][A

loss: tensor(0.5970, device='cuda:0', grad_fn=<NllLossBackward>)



 61%|██████▏   | 35/57 [00:34<00:21,  1.04it/s][A

loss: tensor(0.7780, device='cuda:0', grad_fn=<NllLossBackward>)



 63%|██████▎   | 36/57 [00:35<00:20,  1.01it/s][A

loss: tensor(0.6489, device='cuda:0', grad_fn=<NllLossBackward>)



 65%|██████▍   | 37/57 [00:36<00:19,  1.03it/s][A

loss: tensor(0.5043, device='cuda:0', grad_fn=<NllLossBackward>)



 67%|██████▋   | 38/57 [00:37<00:18,  1.00it/s][A

loss: tensor(0.9346, device='cuda:0', grad_fn=<NllLossBackward>)



 68%|██████▊   | 39/57 [00:38<00:17,  1.02it/s][A

loss: tensor(0.4750, device='cuda:0', grad_fn=<NllLossBackward>)



 70%|███████   | 40/57 [00:39<00:16,  1.04it/s][A

loss: tensor(0.6767, device='cuda:0', grad_fn=<NllLossBackward>)



 72%|███████▏  | 41/57 [00:40<00:15,  1.01it/s][A

loss: tensor(0.8080, device='cuda:0', grad_fn=<NllLossBackward>)



 74%|███████▎  | 42/57 [00:41<00:14,  1.03it/s][A

loss: tensor(0.5788, device='cuda:0', grad_fn=<NllLossBackward>)



 75%|███████▌  | 43/57 [00:42<00:13,  1.00it/s][A

loss: tensor(0.5679, device='cuda:0', grad_fn=<NllLossBackward>)



 77%|███████▋  | 44/57 [00:43<00:12,  1.02it/s][A

loss: tensor(0.6487, device='cuda:0', grad_fn=<NllLossBackward>)



 79%|███████▉  | 45/57 [00:44<00:11,  1.00it/s][A

loss: tensor(0.5433, device='cuda:0', grad_fn=<NllLossBackward>)



 81%|████████  | 46/57 [00:45<00:10,  1.02it/s][A

loss: tensor(0.7341, device='cuda:0', grad_fn=<NllLossBackward>)



 82%|████████▏ | 47/57 [00:46<00:09,  1.04it/s][A

loss: tensor(0.7203, device='cuda:0', grad_fn=<NllLossBackward>)



 84%|████████▍ | 48/57 [00:47<00:08,  1.01it/s][A

loss: tensor(0.6049, device='cuda:0', grad_fn=<NllLossBackward>)



 86%|████████▌ | 49/57 [00:48<00:07,  1.03it/s][A

loss: tensor(0.5896, device='cuda:0', grad_fn=<NllLossBackward>)



 88%|████████▊ | 50/57 [00:49<00:06,  1.01it/s][A

loss: tensor(0.5689, device='cuda:0', grad_fn=<NllLossBackward>)



 89%|████████▉ | 51/57 [00:50<00:05,  1.02it/s][A

loss: tensor(0.6670, device='cuda:0', grad_fn=<NllLossBackward>)



 91%|█████████ | 52/57 [00:51<00:04,  1.00it/s][A

loss: tensor(0.6622, device='cuda:0', grad_fn=<NllLossBackward>)



 93%|█████████▎| 53/57 [00:52<00:03,  1.02it/s][A

loss: tensor(0.6212, device='cuda:0', grad_fn=<NllLossBackward>)



 95%|█████████▍| 54/57 [00:52<00:02,  1.03it/s][A

loss: tensor(0.6928, device='cuda:0', grad_fn=<NllLossBackward>)



 96%|█████████▋| 55/57 [00:54<00:01,  1.01it/s][A

loss: tensor(0.7509, device='cuda:0', grad_fn=<NllLossBackward>)



 98%|█████████▊| 56/57 [00:54<00:00,  1.02it/s][A

loss: tensor(0.6807, device='cuda:0', grad_fn=<NllLossBackward>)



100%|██████████| 57/57 [00:55<00:00,  1.02it/s][A

  0%|          | 0/29 [00:00<?, ?it/s][A
  3%|▎         | 1/29 [00:00<00:04,  6.86it/s][A

loss: tensor(0.8967, device='cuda:0', grad_fn=<NllLossBackward>)

	Training Loss: 0.6497007394046114

	Training acc: 0.8154254373873441

	Training prec: 0.4073385664997946

	Training rec: 0.2558380794817716

	Training f1: 0.26381981388256925

	Current Learning rate:  0.00017142857142857143



  7%|▋         | 2/29 [00:00<00:03,  6.90it/s][A
 10%|█         | 3/29 [00:00<00:03,  6.91it/s][A
 14%|█▍        | 4/29 [00:00<00:03,  6.91it/s][A
 17%|█▋        | 5/29 [00:00<00:03,  6.92it/s][A
 21%|██        | 6/29 [00:00<00:03,  6.90it/s][A
 24%|██▍       | 7/29 [00:01<00:03,  6.89it/s][A
 28%|██▊       | 8/29 [00:01<00:03,  6.88it/s][A
 31%|███       | 9/29 [00:01<00:02,  6.89it/s][A
 34%|███▍      | 10/29 [00:01<00:02,  6.89it/s][A
 38%|███▊      | 11/29 [00:01<00:02,  6.90it/s][A
 41%|████▏     | 12/29 [00:01<00:02,  6.90it/s][A
 45%|████▍     | 13/29 [00:01<00:02,  6.88it/s][A
 48%|████▊     | 14/29 [00:02<00:02,  6.88it/s][A
 52%|█████▏    | 15/29 [00:02<00:02,  6.88it/s][A
 55%|█████▌    | 16/29 [00:02<00:01,  6.89it/s][A
 59%|█████▊    | 17/29 [00:02<00:01,  6.89it/s][A
 62%|██████▏   | 18/29 [00:02<00:01,  6.90it/s][A
 66%|██████▌   | 19/29 [00:02<00:01,  6.90it/s][A
 69%|██████▉   | 20/29 [00:02<00:01,  6.87it/s][A
 72%|███████▏  | 21/29 [00:03<00:01,  


	Validation Loss: 0.6965809454177988

	Validation acc: 0.8000840658954521

	Validation prec: 0.30473642218489805

	Validation rec: 0.2590104553312563

	Validation f1: 0.255629933441316



  2%|▏         | 1/57 [00:00<00:52,  1.07it/s][A

loss: tensor(0.5758, device='cuda:0', grad_fn=<NllLossBackward>)



  4%|▎         | 2/57 [00:01<00:55,  1.01s/it][A

loss: tensor(0.5533, device='cuda:0', grad_fn=<NllLossBackward>)



  5%|▌         | 3/57 [00:02<00:52,  1.03it/s][A

loss: tensor(0.6308, device='cuda:0', grad_fn=<NllLossBackward>)



  7%|▋         | 4/57 [00:03<00:53,  1.00s/it][A

loss: tensor(0.7237, device='cuda:0', grad_fn=<NllLossBackward>)



  9%|▉         | 5/57 [00:04<00:50,  1.02it/s][A

loss: tensor(0.6304, device='cuda:0', grad_fn=<NllLossBackward>)



 11%|█         | 6/57 [00:05<00:49,  1.03it/s][A

loss: tensor(0.5426, device='cuda:0', grad_fn=<NllLossBackward>)



 12%|█▏        | 7/57 [00:06<00:49,  1.01it/s][A

loss: tensor(0.5999, device='cuda:0', grad_fn=<NllLossBackward>)



 14%|█▍        | 8/57 [00:07<00:47,  1.02it/s][A

loss: tensor(0.7345, device='cuda:0', grad_fn=<NllLossBackward>)



 16%|█▌        | 9/57 [00:08<00:47,  1.00it/s][A

loss: tensor(0.5711, device='cuda:0', grad_fn=<NllLossBackward>)



 18%|█▊        | 10/57 [00:09<00:45,  1.02it/s][A

loss: tensor(0.5899, device='cuda:0', grad_fn=<NllLossBackward>)



 19%|█▉        | 11/57 [00:10<00:45,  1.01it/s][A

loss: tensor(0.5490, device='cuda:0', grad_fn=<NllLossBackward>)



 21%|██        | 12/57 [00:11<00:43,  1.03it/s][A

loss: tensor(0.6460, device='cuda:0', grad_fn=<NllLossBackward>)



 23%|██▎       | 13/57 [00:12<00:42,  1.04it/s][A

loss: tensor(0.8618, device='cuda:0', grad_fn=<NllLossBackward>)



 25%|██▍       | 14/57 [00:13<00:42,  1.01it/s][A

loss: tensor(0.7056, device='cuda:0', grad_fn=<NllLossBackward>)



 26%|██▋       | 15/57 [00:14<00:40,  1.03it/s][A

loss: tensor(0.5621, device='cuda:0', grad_fn=<NllLossBackward>)



 28%|██▊       | 16/57 [00:15<00:40,  1.00it/s][A

loss: tensor(0.5603, device='cuda:0', grad_fn=<NllLossBackward>)



 30%|██▉       | 17/57 [00:16<00:39,  1.02it/s][A

loss: tensor(0.8723, device='cuda:0', grad_fn=<NllLossBackward>)



 32%|███▏      | 18/57 [00:17<00:37,  1.04it/s][A

loss: tensor(0.7048, device='cuda:0', grad_fn=<NllLossBackward>)



 33%|███▎      | 19/57 [00:18<00:37,  1.01it/s][A

loss: tensor(0.5572, device='cuda:0', grad_fn=<NllLossBackward>)



 35%|███▌      | 20/57 [00:19<00:35,  1.03it/s][A

loss: tensor(0.7715, device='cuda:0', grad_fn=<NllLossBackward>)



 37%|███▋      | 21/57 [00:20<00:35,  1.00it/s][A

loss: tensor(0.6170, device='cuda:0', grad_fn=<NllLossBackward>)



 39%|███▊      | 22/57 [00:21<00:34,  1.02it/s][A

loss: tensor(0.5151, device='cuda:0', grad_fn=<NllLossBackward>)



 40%|████      | 23/57 [00:22<00:34,  1.01s/it][A

loss: tensor(0.6325, device='cuda:0', grad_fn=<NllLossBackward>)



 42%|████▏     | 24/57 [00:23<00:32,  1.02it/s][A

loss: tensor(0.6094, device='cuda:0', grad_fn=<NllLossBackward>)



 44%|████▍     | 25/57 [00:24<00:31,  1.03it/s][A

loss: tensor(0.5807, device='cuda:0', grad_fn=<NllLossBackward>)



 46%|████▌     | 26/57 [00:25<00:30,  1.00it/s][A

loss: tensor(0.6049, device='cuda:0', grad_fn=<NllLossBackward>)



 47%|████▋     | 27/57 [00:26<00:29,  1.02it/s][A

loss: tensor(0.5045, device='cuda:0', grad_fn=<NllLossBackward>)



 49%|████▉     | 28/57 [00:27<00:28,  1.00it/s][A

loss: tensor(0.5598, device='cuda:0', grad_fn=<NllLossBackward>)



 51%|█████     | 29/57 [00:28<00:27,  1.02it/s][A

loss: tensor(0.5154, device='cuda:0', grad_fn=<NllLossBackward>)



 53%|█████▎    | 30/57 [00:29<00:27,  1.00s/it][A

loss: tensor(0.6359, device='cuda:0', grad_fn=<NllLossBackward>)



 54%|█████▍    | 31/57 [00:30<00:25,  1.02it/s][A

loss: tensor(0.5719, device='cuda:0', grad_fn=<NllLossBackward>)



 56%|█████▌    | 32/57 [00:31<00:24,  1.03it/s][A

loss: tensor(0.7124, device='cuda:0', grad_fn=<NllLossBackward>)



 58%|█████▊    | 33/57 [00:32<00:24,  1.00s/it][A

loss: tensor(0.7644, device='cuda:0', grad_fn=<NllLossBackward>)



 60%|█████▉    | 34/57 [00:33<00:22,  1.01it/s][A

loss: tensor(0.8862, device='cuda:0', grad_fn=<NllLossBackward>)



 61%|██████▏   | 35/57 [00:34<00:22,  1.01s/it][A

loss: tensor(0.8840, device='cuda:0', grad_fn=<NllLossBackward>)



 63%|██████▎   | 36/57 [00:35<00:20,  1.02it/s][A

loss: tensor(0.6896, device='cuda:0', grad_fn=<NllLossBackward>)



 65%|██████▍   | 37/57 [00:36<00:19,  1.03it/s][A

loss: tensor(0.7205, device='cuda:0', grad_fn=<NllLossBackward>)



 67%|██████▋   | 38/57 [00:37<00:19,  1.00s/it][A

loss: tensor(0.7651, device='cuda:0', grad_fn=<NllLossBackward>)



 68%|██████▊   | 39/57 [00:38<00:17,  1.02it/s][A

loss: tensor(0.5919, device='cuda:0', grad_fn=<NllLossBackward>)



 70%|███████   | 40/57 [00:39<00:17,  1.00s/it][A

loss: tensor(0.5623, device='cuda:0', grad_fn=<NllLossBackward>)



 72%|███████▏  | 41/57 [00:40<00:15,  1.02it/s][A

loss: tensor(0.5174, device='cuda:0', grad_fn=<NllLossBackward>)



 74%|███████▎  | 42/57 [00:41<00:15,  1.00s/it][A

loss: tensor(0.6549, device='cuda:0', grad_fn=<NllLossBackward>)



 75%|███████▌  | 43/57 [00:42<00:13,  1.02it/s][A

loss: tensor(0.6557, device='cuda:0', grad_fn=<NllLossBackward>)



 77%|███████▋  | 44/57 [00:43<00:12,  1.04it/s][A

loss: tensor(0.6080, device='cuda:0', grad_fn=<NllLossBackward>)



 79%|███████▉  | 45/57 [00:44<00:11,  1.01it/s][A

loss: tensor(0.8066, device='cuda:0', grad_fn=<NllLossBackward>)



 81%|████████  | 46/57 [00:45<00:10,  1.03it/s][A

loss: tensor(0.5954, device='cuda:0', grad_fn=<NllLossBackward>)



 82%|████████▏ | 47/57 [00:46<00:09,  1.00it/s][A

loss: tensor(0.6488, device='cuda:0', grad_fn=<NllLossBackward>)



 84%|████████▍ | 48/57 [00:47<00:08,  1.02it/s][A

loss: tensor(0.6852, device='cuda:0', grad_fn=<NllLossBackward>)



 86%|████████▌ | 49/57 [00:48<00:08,  1.01s/it][A

loss: tensor(0.7173, device='cuda:0', grad_fn=<NllLossBackward>)



 88%|████████▊ | 50/57 [00:49<00:06,  1.01it/s][A

loss: tensor(0.5913, device='cuda:0', grad_fn=<NllLossBackward>)



 89%|████████▉ | 51/57 [00:50<00:05,  1.03it/s][A

loss: tensor(0.7083, device='cuda:0', grad_fn=<NllLossBackward>)



 91%|█████████ | 52/57 [00:51<00:05,  1.01s/it][A

loss: tensor(0.7688, device='cuda:0', grad_fn=<NllLossBackward>)



 93%|█████████▎| 53/57 [00:52<00:03,  1.01it/s][A

loss: tensor(0.5383, device='cuda:0', grad_fn=<NllLossBackward>)



 95%|█████████▍| 54/57 [00:53<00:03,  1.01s/it][A

loss: tensor(0.5906, device='cuda:0', grad_fn=<NllLossBackward>)



 96%|█████████▋| 55/57 [00:54<00:01,  1.01it/s][A

loss: tensor(0.8585, device='cuda:0', grad_fn=<NllLossBackward>)



 98%|█████████▊| 56/57 [00:55<00:01,  1.01s/it][A

loss: tensor(0.6925, device='cuda:0', grad_fn=<NllLossBackward>)



100%|██████████| 57/57 [00:56<00:00,  1.02it/s][A

  0%|          | 0/29 [00:00<?, ?it/s][A
  3%|▎         | 1/29 [00:00<00:04,  6.81it/s][A

loss: tensor(0.5660, device='cuda:0', grad_fn=<NllLossBackward>)

	Training Loss: 0.6503463596628424

	Training acc: 0.8162123367454654

	Training prec: 0.4325372748100322

	Training rec: 0.26303962337748077

	Training f1: 0.2746718651575094

	Current Learning rate:  0.00014285714285714284



  7%|▋         | 2/29 [00:00<00:04,  6.69it/s][A
 10%|█         | 3/29 [00:00<00:03,  6.69it/s][A
 14%|█▍        | 4/29 [00:00<00:03,  6.73it/s][A
 17%|█▋        | 5/29 [00:00<00:03,  6.78it/s][A
 21%|██        | 6/29 [00:00<00:03,  6.72it/s][A
 24%|██▍       | 7/29 [00:01<00:03,  6.72it/s][A
 28%|██▊       | 8/29 [00:01<00:03,  6.75it/s][A
 31%|███       | 9/29 [00:01<00:02,  6.79it/s][A
 34%|███▍      | 10/29 [00:01<00:02,  6.80it/s][A
 38%|███▊      | 11/29 [00:01<00:02,  6.81it/s][A
 41%|████▏     | 12/29 [00:01<00:02,  6.85it/s][A
 45%|████▍     | 13/29 [00:01<00:02,  6.83it/s][A
 48%|████▊     | 14/29 [00:02<00:02,  6.85it/s][A
 52%|█████▏    | 15/29 [00:02<00:02,  6.85it/s][A
 55%|█████▌    | 16/29 [00:02<00:01,  6.87it/s][A
 59%|█████▊    | 17/29 [00:02<00:01,  6.88it/s][A
 62%|██████▏   | 18/29 [00:02<00:01,  6.87it/s][A
 66%|██████▌   | 19/29 [00:02<00:01,  6.88it/s][A
 69%|██████▉   | 20/29 [00:02<00:01,  6.85it/s][A
 72%|███████▏  | 21/29 [00:03<00:01,  


	Validation Loss: 0.709475182253739

	Validation acc: 0.7956485373547809

	Validation prec: 0.3264629942030357

	Validation rec: 0.2667714275131913

	Validation f1: 0.26969097283713217



  2%|▏         | 1/57 [00:01<00:57,  1.04s/it][A

loss: tensor(0.6464, device='cuda:0', grad_fn=<NllLossBackward>)



  4%|▎         | 2/57 [00:01<00:53,  1.02it/s][A

loss: tensor(0.5813, device='cuda:0', grad_fn=<NllLossBackward>)



  5%|▌         | 3/57 [00:02<00:51,  1.04it/s][A

loss: tensor(0.6575, device='cuda:0', grad_fn=<NllLossBackward>)



  7%|▋         | 4/57 [00:03<00:52,  1.01it/s][A

loss: tensor(0.6416, device='cuda:0', grad_fn=<NllLossBackward>)



  9%|▉         | 5/57 [00:04<00:50,  1.03it/s][A

loss: tensor(0.6086, device='cuda:0', grad_fn=<NllLossBackward>)



 11%|█         | 6/57 [00:05<00:50,  1.00it/s][A

loss: tensor(0.7637, device='cuda:0', grad_fn=<NllLossBackward>)



 12%|█▏        | 7/57 [00:06<00:48,  1.02it/s][A

loss: tensor(0.5223, device='cuda:0', grad_fn=<NllLossBackward>)



 14%|█▍        | 8/57 [00:07<00:47,  1.04it/s][A

loss: tensor(0.5909, device='cuda:0', grad_fn=<NllLossBackward>)



 16%|█▌        | 9/57 [00:08<00:47,  1.01it/s][A

loss: tensor(0.6947, device='cuda:0', grad_fn=<NllLossBackward>)



 18%|█▊        | 10/57 [00:09<00:45,  1.03it/s][A

loss: tensor(0.5308, device='cuda:0', grad_fn=<NllLossBackward>)



 19%|█▉        | 11/57 [00:10<00:45,  1.01it/s][A

loss: tensor(0.6926, device='cuda:0', grad_fn=<NllLossBackward>)



 21%|██        | 12/57 [00:11<00:43,  1.03it/s][A

loss: tensor(0.6458, device='cuda:0', grad_fn=<NllLossBackward>)



 23%|██▎       | 13/57 [00:12<00:43,  1.01it/s][A

loss: tensor(0.6391, device='cuda:0', grad_fn=<NllLossBackward>)



 25%|██▍       | 14/57 [00:13<00:41,  1.03it/s][A

loss: tensor(0.7159, device='cuda:0', grad_fn=<NllLossBackward>)



 26%|██▋       | 15/57 [00:14<00:40,  1.03it/s][A

loss: tensor(0.7023, device='cuda:0', grad_fn=<NllLossBackward>)



 28%|██▊       | 16/57 [00:15<00:40,  1.01it/s][A

loss: tensor(0.6549, device='cuda:0', grad_fn=<NllLossBackward>)



 30%|██▉       | 17/57 [00:16<00:38,  1.03it/s][A

loss: tensor(0.7734, device='cuda:0', grad_fn=<NllLossBackward>)



 32%|███▏      | 18/57 [00:17<00:38,  1.00it/s][A

loss: tensor(0.6113, device='cuda:0', grad_fn=<NllLossBackward>)



 33%|███▎      | 19/57 [00:18<00:37,  1.02it/s][A

loss: tensor(0.5561, device='cuda:0', grad_fn=<NllLossBackward>)



 35%|███▌      | 20/57 [00:19<00:36,  1.00it/s][A

loss: tensor(0.7258, device='cuda:0', grad_fn=<NllLossBackward>)



 37%|███▋      | 21/57 [00:20<00:35,  1.02it/s][A

loss: tensor(0.6334, device='cuda:0', grad_fn=<NllLossBackward>)



 39%|███▊      | 22/57 [00:21<00:33,  1.04it/s][A

loss: tensor(0.6374, device='cuda:0', grad_fn=<NllLossBackward>)



 40%|████      | 23/57 [00:22<00:33,  1.01it/s][A

loss: tensor(0.6357, device='cuda:0', grad_fn=<NllLossBackward>)



 42%|████▏     | 24/57 [00:23<00:32,  1.02it/s][A

loss: tensor(0.6882, device='cuda:0', grad_fn=<NllLossBackward>)



 44%|████▍     | 25/57 [00:24<00:31,  1.00it/s][A

loss: tensor(0.5710, device='cuda:0', grad_fn=<NllLossBackward>)



 46%|████▌     | 26/57 [00:25<00:30,  1.02it/s][A

loss: tensor(0.8804, device='cuda:0', grad_fn=<NllLossBackward>)



 47%|████▋     | 27/57 [00:26<00:30,  1.01s/it][A

loss: tensor(0.6385, device='cuda:0', grad_fn=<NllLossBackward>)



 49%|████▉     | 28/57 [00:27<00:28,  1.02it/s][A

loss: tensor(0.7029, device='cuda:0', grad_fn=<NllLossBackward>)



 51%|█████     | 29/57 [00:28<00:27,  1.03it/s][A

loss: tensor(0.6435, device='cuda:0', grad_fn=<NllLossBackward>)



 53%|█████▎    | 30/57 [00:29<00:26,  1.01it/s][A

loss: tensor(0.7390, device='cuda:0', grad_fn=<NllLossBackward>)



 54%|█████▍    | 31/57 [00:30<00:25,  1.03it/s][A

loss: tensor(0.5812, device='cuda:0', grad_fn=<NllLossBackward>)



 56%|█████▌    | 32/57 [00:31<00:24,  1.00it/s][A

loss: tensor(0.6360, device='cuda:0', grad_fn=<NllLossBackward>)



 58%|█████▊    | 33/57 [00:32<00:23,  1.03it/s][A

loss: tensor(0.6169, device='cuda:0', grad_fn=<NllLossBackward>)



 60%|█████▉    | 34/57 [00:33<00:22,  1.04it/s][A

loss: tensor(0.5277, device='cuda:0', grad_fn=<NllLossBackward>)



 61%|██████▏   | 35/57 [00:34<00:21,  1.02it/s][A

loss: tensor(0.7415, device='cuda:0', grad_fn=<NllLossBackward>)



 63%|██████▎   | 36/57 [00:35<00:20,  1.03it/s][A

loss: tensor(0.5843, device='cuda:0', grad_fn=<NllLossBackward>)



 65%|██████▍   | 37/57 [00:36<00:19,  1.01it/s][A

loss: tensor(0.6156, device='cuda:0', grad_fn=<NllLossBackward>)



 67%|██████▋   | 38/57 [00:37<00:18,  1.03it/s][A

loss: tensor(0.5993, device='cuda:0', grad_fn=<NllLossBackward>)



 68%|██████▊   | 39/57 [00:38<00:17,  1.00it/s][A

loss: tensor(0.7347, device='cuda:0', grad_fn=<NllLossBackward>)



 70%|███████   | 40/57 [00:39<00:16,  1.03it/s][A

loss: tensor(0.5956, device='cuda:0', grad_fn=<NllLossBackward>)



 72%|███████▏  | 41/57 [00:40<00:15,  1.04it/s][A

loss: tensor(0.7572, device='cuda:0', grad_fn=<NllLossBackward>)



 74%|███████▎  | 42/57 [00:41<00:14,  1.01it/s][A

loss: tensor(0.6608, device='cuda:0', grad_fn=<NllLossBackward>)



 75%|███████▌  | 43/57 [00:42<00:13,  1.03it/s][A

loss: tensor(0.6121, device='cuda:0', grad_fn=<NllLossBackward>)



 77%|███████▋  | 44/57 [00:43<00:12,  1.01it/s][A

loss: tensor(0.6376, device='cuda:0', grad_fn=<NllLossBackward>)



 79%|███████▉  | 45/57 [00:44<00:11,  1.02it/s][A

loss: tensor(0.5945, device='cuda:0', grad_fn=<NllLossBackward>)



 81%|████████  | 46/57 [00:45<00:10,  1.00it/s][A

loss: tensor(0.5670, device='cuda:0', grad_fn=<NllLossBackward>)



 82%|████████▏ | 47/57 [00:46<00:09,  1.02it/s][A

loss: tensor(0.7399, device='cuda:0', grad_fn=<NllLossBackward>)



 84%|████████▍ | 48/57 [00:47<00:08,  1.04it/s][A

loss: tensor(0.6668, device='cuda:0', grad_fn=<NllLossBackward>)



 86%|████████▌ | 49/57 [00:48<00:07,  1.01it/s][A

loss: tensor(0.5194, device='cuda:0', grad_fn=<NllLossBackward>)



 88%|████████▊ | 50/57 [00:49<00:06,  1.03it/s][A

loss: tensor(0.6809, device='cuda:0', grad_fn=<NllLossBackward>)



 89%|████████▉ | 51/57 [00:50<00:05,  1.01it/s][A

loss: tensor(0.6961, device='cuda:0', grad_fn=<NllLossBackward>)



 91%|█████████ | 52/57 [00:51<00:04,  1.03it/s][A

loss: tensor(0.6065, device='cuda:0', grad_fn=<NllLossBackward>)



 93%|█████████▎| 53/57 [00:52<00:03,  1.01it/s][A

loss: tensor(0.5575, device='cuda:0', grad_fn=<NllLossBackward>)



 95%|█████████▍| 54/57 [00:52<00:02,  1.03it/s][A

loss: tensor(0.5859, device='cuda:0', grad_fn=<NllLossBackward>)



 96%|█████████▋| 55/57 [00:53<00:01,  1.04it/s][A

loss: tensor(0.5708, device='cuda:0', grad_fn=<NllLossBackward>)



 98%|█████████▊| 56/57 [00:54<00:00,  1.01it/s][A

loss: tensor(0.7289, device='cuda:0', grad_fn=<NllLossBackward>)



100%|██████████| 57/57 [00:55<00:00,  1.02it/s][A

  0%|          | 0/29 [00:00<?, ?it/s][A
  3%|▎         | 1/29 [00:00<00:04,  6.91it/s][A

loss: tensor(0.5653, device='cuda:0', grad_fn=<NllLossBackward>)

	Training Loss: 0.6439453718955057

	Training acc: 0.8165072920878272

	Training prec: 0.40646195074583563

	Training rec: 0.2590061146710701

	Training f1: 0.26903259365865323

	Current Learning rate:  0.00011428571428571428



  7%|▋         | 2/29 [00:00<00:03,  6.86it/s][A
 10%|█         | 3/29 [00:00<00:03,  6.90it/s][A
 14%|█▍        | 4/29 [00:00<00:03,  6.90it/s][A
 17%|█▋        | 5/29 [00:00<00:03,  6.92it/s][A
 21%|██        | 6/29 [00:00<00:03,  6.92it/s][A
 24%|██▍       | 7/29 [00:01<00:03,  6.94it/s][A
 28%|██▊       | 8/29 [00:01<00:03,  6.93it/s][A
 31%|███       | 9/29 [00:01<00:02,  7.02it/s][A
 34%|███▍      | 10/29 [00:01<00:02,  7.08it/s][A
 38%|███▊      | 11/29 [00:01<00:02,  7.12it/s][A
 41%|████▏     | 12/29 [00:01<00:02,  7.17it/s][A
 45%|████▍     | 13/29 [00:01<00:02,  7.20it/s][A
 48%|████▊     | 14/29 [00:01<00:02,  7.22it/s][A
 52%|█████▏    | 15/29 [00:02<00:01,  7.24it/s][A
 55%|█████▌    | 16/29 [00:02<00:01,  7.24it/s][A
 59%|█████▊    | 17/29 [00:02<00:01,  7.24it/s][A
 62%|██████▏   | 18/29 [00:02<00:01,  7.25it/s][A
 66%|██████▌   | 19/29 [00:02<00:01,  7.26it/s][A
 69%|██████▉   | 20/29 [00:02<00:01,  7.26it/s][A
 72%|███████▏  | 21/29 [00:02<00:01,  


	Validation Loss: 0.7008759882943384

	Validation acc: 0.7992998320350454

	Validation prec: 0.3326767187656464

	Validation rec: 0.2644239937917588

	Validation f1: 0.2662490737970018



  2%|▏         | 1/57 [00:01<00:58,  1.04s/it][A

loss: tensor(0.5288, device='cuda:0', grad_fn=<NllLossBackward>)



  4%|▎         | 2/57 [00:01<00:53,  1.02it/s][A

loss: tensor(0.4243, device='cuda:0', grad_fn=<NllLossBackward>)



  5%|▌         | 3/57 [00:03<00:54,  1.01s/it][A

loss: tensor(0.6318, device='cuda:0', grad_fn=<NllLossBackward>)



  7%|▋         | 4/57 [00:03<00:51,  1.02it/s][A

loss: tensor(0.7226, device='cuda:0', grad_fn=<NllLossBackward>)



  9%|▉         | 5/57 [00:04<00:51,  1.01it/s][A

loss: tensor(0.6314, device='cuda:0', grad_fn=<NllLossBackward>)



 11%|█         | 6/57 [00:05<00:49,  1.03it/s][A

loss: tensor(0.6236, device='cuda:0', grad_fn=<NllLossBackward>)



 12%|█▏        | 7/57 [00:06<00:47,  1.04it/s][A

loss: tensor(0.8569, device='cuda:0', grad_fn=<NllLossBackward>)



 14%|█▍        | 8/57 [00:07<00:48,  1.02it/s][A

loss: tensor(0.6056, device='cuda:0', grad_fn=<NllLossBackward>)



 16%|█▌        | 9/57 [00:08<00:46,  1.03it/s][A

loss: tensor(0.5036, device='cuda:0', grad_fn=<NllLossBackward>)



 18%|█▊        | 10/57 [00:09<00:46,  1.01it/s][A

loss: tensor(0.6327, device='cuda:0', grad_fn=<NllLossBackward>)



 19%|█▉        | 11/57 [00:10<00:44,  1.03it/s][A

loss: tensor(0.5805, device='cuda:0', grad_fn=<NllLossBackward>)



 21%|██        | 12/57 [00:11<00:43,  1.04it/s][A

loss: tensor(0.6533, device='cuda:0', grad_fn=<NllLossBackward>)



 23%|██▎       | 13/57 [00:12<00:43,  1.01it/s][A

loss: tensor(0.7792, device='cuda:0', grad_fn=<NllLossBackward>)



 25%|██▍       | 14/57 [00:13<00:41,  1.03it/s][A

loss: tensor(0.5142, device='cuda:0', grad_fn=<NllLossBackward>)



 26%|██▋       | 15/57 [00:14<00:41,  1.01it/s][A

loss: tensor(0.6818, device='cuda:0', grad_fn=<NllLossBackward>)



 28%|██▊       | 16/57 [00:15<00:39,  1.03it/s][A

loss: tensor(0.7015, device='cuda:0', grad_fn=<NllLossBackward>)



 30%|██▉       | 17/57 [00:16<00:39,  1.01it/s][A

loss: tensor(0.7504, device='cuda:0', grad_fn=<NllLossBackward>)



 32%|███▏      | 18/57 [00:17<00:38,  1.03it/s][A

loss: tensor(0.6209, device='cuda:0', grad_fn=<NllLossBackward>)



 33%|███▎      | 19/57 [00:18<00:36,  1.04it/s][A

loss: tensor(0.8602, device='cuda:0', grad_fn=<NllLossBackward>)



 35%|███▌      | 20/57 [00:19<00:36,  1.01it/s][A

loss: tensor(0.5519, device='cuda:0', grad_fn=<NllLossBackward>)



 37%|███▋      | 21/57 [00:20<00:34,  1.03it/s][A

loss: tensor(0.5920, device='cuda:0', grad_fn=<NllLossBackward>)



 39%|███▊      | 22/57 [00:21<00:34,  1.01it/s][A

loss: tensor(0.7792, device='cuda:0', grad_fn=<NllLossBackward>)



 40%|████      | 23/57 [00:22<00:33,  1.03it/s][A

loss: tensor(0.6487, device='cuda:0', grad_fn=<NllLossBackward>)



 42%|████▏     | 24/57 [00:23<00:32,  1.00it/s][A

loss: tensor(0.7244, device='cuda:0', grad_fn=<NllLossBackward>)



 44%|████▍     | 25/57 [00:24<00:31,  1.02it/s][A

loss: tensor(0.6132, device='cuda:0', grad_fn=<NllLossBackward>)



 46%|████▌     | 26/57 [00:25<00:29,  1.04it/s][A

loss: tensor(0.6258, device='cuda:0', grad_fn=<NllLossBackward>)



 47%|████▋     | 27/57 [00:26<00:29,  1.01it/s][A

loss: tensor(0.5962, device='cuda:0', grad_fn=<NllLossBackward>)



 49%|████▉     | 28/57 [00:27<00:28,  1.03it/s][A

loss: tensor(0.5455, device='cuda:0', grad_fn=<NllLossBackward>)



 51%|█████     | 29/57 [00:28<00:27,  1.01it/s][A

loss: tensor(0.6820, device='cuda:0', grad_fn=<NllLossBackward>)



 53%|█████▎    | 30/57 [00:29<00:26,  1.03it/s][A

loss: tensor(0.7809, device='cuda:0', grad_fn=<NllLossBackward>)



 54%|█████▍    | 31/57 [00:30<00:25,  1.01it/s][A

loss: tensor(0.5979, device='cuda:0', grad_fn=<NllLossBackward>)



 56%|█████▌    | 32/57 [00:31<00:24,  1.03it/s][A

loss: tensor(0.6216, device='cuda:0', grad_fn=<NllLossBackward>)



 58%|█████▊    | 33/57 [00:32<00:22,  1.04it/s][A

loss: tensor(0.5653, device='cuda:0', grad_fn=<NllLossBackward>)



 60%|█████▉    | 34/57 [00:33<00:22,  1.02it/s][A

loss: tensor(0.6921, device='cuda:0', grad_fn=<NllLossBackward>)



 61%|██████▏   | 35/57 [00:34<00:21,  1.03it/s][A

loss: tensor(0.5918, device='cuda:0', grad_fn=<NllLossBackward>)



 63%|██████▎   | 36/57 [00:35<00:20,  1.01it/s][A

loss: tensor(0.4765, device='cuda:0', grad_fn=<NllLossBackward>)



 65%|██████▍   | 37/57 [00:36<00:19,  1.02it/s][A

loss: tensor(0.5278, device='cuda:0', grad_fn=<NllLossBackward>)



 67%|██████▋   | 38/57 [00:37<00:18,  1.04it/s][A

loss: tensor(0.6159, device='cuda:0', grad_fn=<NllLossBackward>)



 68%|██████▊   | 39/57 [00:38<00:17,  1.01it/s][A

loss: tensor(0.6460, device='cuda:0', grad_fn=<NllLossBackward>)



 70%|███████   | 40/57 [00:39<00:16,  1.03it/s][A

loss: tensor(0.4258, device='cuda:0', grad_fn=<NllLossBackward>)



 72%|███████▏  | 41/57 [00:40<00:15,  1.01it/s][A

loss: tensor(0.6329, device='cuda:0', grad_fn=<NllLossBackward>)



 74%|███████▎  | 42/57 [00:41<00:14,  1.03it/s][A

loss: tensor(0.6660, device='cuda:0', grad_fn=<NllLossBackward>)



 75%|███████▌  | 43/57 [00:42<00:13,  1.01it/s][A

loss: tensor(0.6779, device='cuda:0', grad_fn=<NllLossBackward>)



 77%|███████▋  | 44/57 [00:43<00:12,  1.03it/s][A

loss: tensor(0.7435, device='cuda:0', grad_fn=<NllLossBackward>)



 79%|███████▉  | 45/57 [00:44<00:11,  1.04it/s][A

loss: tensor(0.6303, device='cuda:0', grad_fn=<NllLossBackward>)



 81%|████████  | 46/57 [00:45<00:10,  1.01it/s][A

loss: tensor(0.5594, device='cuda:0', grad_fn=<NllLossBackward>)



 82%|████████▏ | 47/57 [00:45<00:09,  1.03it/s][A

loss: tensor(0.6372, device='cuda:0', grad_fn=<NllLossBackward>)



 84%|████████▍ | 48/57 [00:47<00:08,  1.01it/s][A

loss: tensor(0.5944, device='cuda:0', grad_fn=<NllLossBackward>)



 86%|████████▌ | 49/57 [00:47<00:07,  1.03it/s][A

loss: tensor(0.7326, device='cuda:0', grad_fn=<NllLossBackward>)



 88%|████████▊ | 50/57 [00:48<00:06,  1.01it/s][A

loss: tensor(0.6947, device='cuda:0', grad_fn=<NllLossBackward>)



 89%|████████▉ | 51/57 [00:49<00:05,  1.03it/s][A

loss: tensor(0.5201, device='cuda:0', grad_fn=<NllLossBackward>)



 91%|█████████ | 52/57 [00:50<00:04,  1.04it/s][A

loss: tensor(0.5775, device='cuda:0', grad_fn=<NllLossBackward>)



 93%|█████████▎| 53/57 [00:51<00:03,  1.02it/s][A

loss: tensor(0.6102, device='cuda:0', grad_fn=<NllLossBackward>)



 95%|█████████▍| 54/57 [00:52<00:02,  1.03it/s][A

loss: tensor(0.9342, device='cuda:0', grad_fn=<NllLossBackward>)



 96%|█████████▋| 55/57 [00:53<00:01,  1.01it/s][A

loss: tensor(0.8704, device='cuda:0', grad_fn=<NllLossBackward>)



 98%|█████████▊| 56/57 [00:54<00:00,  1.03it/s][A

loss: tensor(0.6464, device='cuda:0', grad_fn=<NllLossBackward>)



100%|██████████| 57/57 [00:55<00:00,  1.03it/s][A

  0%|          | 0/29 [00:00<?, ?it/s][A
  3%|▎         | 1/29 [00:00<00:04,  6.89it/s][A

loss: tensor(0.7456, device='cuda:0', grad_fn=<NllLossBackward>)

	Training Loss: 0.6434642824164608

	Training acc: 0.8168037822983708

	Training prec: 0.42267038180152094

	Training rec: 0.2583082266685858

	Training f1: 0.26872720751709916

	Current Learning rate:  8.571428571428571e-05



  7%|▋         | 2/29 [00:00<00:03,  6.87it/s][A
 10%|█         | 3/29 [00:00<00:03,  6.86it/s][A
 14%|█▍        | 4/29 [00:00<00:03,  6.90it/s][A
 17%|█▋        | 5/29 [00:00<00:04,  5.25it/s][A
 21%|██        | 6/29 [00:00<00:04,  5.71it/s][A
 24%|██▍       | 7/29 [00:01<00:03,  6.07it/s][A
 28%|██▊       | 8/29 [00:01<00:03,  6.29it/s][A
 31%|███       | 9/29 [00:01<00:03,  6.47it/s][A
 34%|███▍      | 10/29 [00:01<00:02,  6.59it/s][A
 38%|███▊      | 11/29 [00:01<00:02,  6.70it/s][A
 41%|████▏     | 12/29 [00:01<00:02,  6.76it/s][A
 45%|████▍     | 13/29 [00:02<00:02,  6.77it/s][A
 48%|████▊     | 14/29 [00:02<00:02,  6.79it/s][A
 52%|█████▏    | 15/29 [00:02<00:02,  6.78it/s][A
 55%|█████▌    | 16/29 [00:02<00:01,  6.75it/s][A
 59%|█████▊    | 17/29 [00:02<00:01,  6.71it/s][A
 62%|██████▏   | 18/29 [00:02<00:01,  6.67it/s][A
 66%|██████▌   | 19/29 [00:02<00:01,  6.69it/s][A
 69%|██████▉   | 20/29 [00:03<00:01,  6.69it/s][A
 72%|███████▏  | 21/29 [00:03<00:01,  


	Validation Loss: 0.6877735195488766

	Validation acc: 0.8018956941418882

	Validation prec: 0.3264709347871801

	Validation rec: 0.27056226319517984

	Validation f1: 0.2721078167429583



  2%|▏         | 1/57 [00:00<00:50,  1.11it/s][A

loss: tensor(0.6124, device='cuda:0', grad_fn=<NllLossBackward>)



  4%|▎         | 2/57 [00:01<00:52,  1.05it/s][A

loss: tensor(0.5623, device='cuda:0', grad_fn=<NllLossBackward>)



  5%|▌         | 3/57 [00:02<00:51,  1.05it/s][A

loss: tensor(0.4927, device='cuda:0', grad_fn=<NllLossBackward>)



  7%|▋         | 4/57 [00:03<00:50,  1.05it/s][A

loss: tensor(0.7157, device='cuda:0', grad_fn=<NllLossBackward>)



  9%|▉         | 5/57 [00:04<00:51,  1.00it/s][A

loss: tensor(0.6764, device='cuda:0', grad_fn=<NllLossBackward>)



 11%|█         | 6/57 [00:05<00:50,  1.02it/s][A

loss: tensor(0.4432, device='cuda:0', grad_fn=<NllLossBackward>)



 12%|█▏        | 7/57 [00:06<00:50,  1.01s/it][A

loss: tensor(0.7482, device='cuda:0', grad_fn=<NllLossBackward>)



 14%|█▍        | 8/57 [00:07<00:48,  1.00it/s][A

loss: tensor(0.6021, device='cuda:0', grad_fn=<NllLossBackward>)



 16%|█▌        | 9/57 [00:08<00:49,  1.02s/it][A

loss: tensor(0.6070, device='cuda:0', grad_fn=<NllLossBackward>)



 18%|█▊        | 10/57 [00:09<00:47,  1.00s/it][A

loss: tensor(0.6554, device='cuda:0', grad_fn=<NllLossBackward>)



 19%|█▉        | 11/57 [00:10<00:45,  1.01it/s][A

loss: tensor(0.6378, device='cuda:0', grad_fn=<NllLossBackward>)



 21%|██        | 12/57 [00:11<00:45,  1.01s/it][A

loss: tensor(0.7140, device='cuda:0', grad_fn=<NllLossBackward>)



 23%|██▎       | 13/57 [00:12<00:43,  1.00it/s][A

loss: tensor(0.6860, device='cuda:0', grad_fn=<NllLossBackward>)



 25%|██▍       | 14/57 [00:13<00:43,  1.02s/it][A

loss: tensor(0.5138, device='cuda:0', grad_fn=<NllLossBackward>)



 26%|██▋       | 15/57 [00:14<00:41,  1.00it/s][A

loss: tensor(0.7500, device='cuda:0', grad_fn=<NllLossBackward>)



 28%|██▊       | 16/57 [00:15<00:41,  1.01s/it][A

loss: tensor(0.5627, device='cuda:0', grad_fn=<NllLossBackward>)



 30%|██▉       | 17/57 [00:16<00:39,  1.00it/s][A

loss: tensor(0.7246, device='cuda:0', grad_fn=<NllLossBackward>)



 32%|███▏      | 18/57 [00:17<00:38,  1.02it/s][A

loss: tensor(0.7002, device='cuda:0', grad_fn=<NllLossBackward>)



 33%|███▎      | 19/57 [00:18<00:38,  1.01s/it][A

loss: tensor(0.7119, device='cuda:0', grad_fn=<NllLossBackward>)



 35%|███▌      | 20/57 [00:19<00:36,  1.01it/s][A

loss: tensor(0.6920, device='cuda:0', grad_fn=<NllLossBackward>)



 37%|███▋      | 21/57 [00:20<00:35,  1.01it/s][A

loss: tensor(0.6470, device='cuda:0', grad_fn=<NllLossBackward>)



 39%|███▊      | 22/57 [00:21<00:34,  1.02it/s][A

loss: tensor(0.5638, device='cuda:0', grad_fn=<NllLossBackward>)



 40%|████      | 23/57 [00:22<00:33,  1.03it/s][A

loss: tensor(0.5956, device='cuda:0', grad_fn=<NllLossBackward>)



 42%|████▏     | 24/57 [00:23<00:33,  1.00s/it][A

loss: tensor(0.5893, device='cuda:0', grad_fn=<NllLossBackward>)



 44%|████▍     | 25/57 [00:24<00:31,  1.01it/s][A

loss: tensor(0.7207, device='cuda:0', grad_fn=<NllLossBackward>)



 46%|████▌     | 26/57 [00:25<00:31,  1.01s/it][A

loss: tensor(0.7184, device='cuda:0', grad_fn=<NllLossBackward>)



 47%|████▋     | 27/57 [00:26<00:29,  1.01it/s][A

loss: tensor(0.6129, device='cuda:0', grad_fn=<NllLossBackward>)



 49%|████▉     | 28/57 [00:27<00:29,  1.02s/it][A

loss: tensor(0.7034, device='cuda:0', grad_fn=<NllLossBackward>)



 51%|█████     | 29/57 [00:28<00:27,  1.00it/s][A

loss: tensor(0.5887, device='cuda:0', grad_fn=<NllLossBackward>)



 53%|█████▎    | 30/57 [00:29<00:26,  1.02it/s][A

loss: tensor(0.6150, device='cuda:0', grad_fn=<NllLossBackward>)



 54%|█████▍    | 31/57 [00:30<00:26,  1.01s/it][A

loss: tensor(0.5274, device='cuda:0', grad_fn=<NllLossBackward>)



 56%|█████▌    | 32/57 [00:31<00:24,  1.01it/s][A

loss: tensor(0.5935, device='cuda:0', grad_fn=<NllLossBackward>)



 58%|█████▊    | 33/57 [00:32<00:24,  1.01s/it][A

loss: tensor(0.5639, device='cuda:0', grad_fn=<NllLossBackward>)



 60%|█████▉    | 34/57 [00:33<00:22,  1.00it/s][A

loss: tensor(0.6545, device='cuda:0', grad_fn=<NllLossBackward>)



 61%|██████▏   | 35/57 [00:34<00:22,  1.02s/it][A

loss: tensor(0.7312, device='cuda:0', grad_fn=<NllLossBackward>)



 63%|██████▎   | 36/57 [00:35<00:20,  1.00it/s][A

loss: tensor(0.7603, device='cuda:0', grad_fn=<NllLossBackward>)



 65%|██████▍   | 37/57 [00:36<00:19,  1.01it/s][A

loss: tensor(0.5168, device='cuda:0', grad_fn=<NllLossBackward>)



 67%|██████▋   | 38/57 [00:37<00:19,  1.01s/it][A

loss: tensor(0.6181, device='cuda:0', grad_fn=<NllLossBackward>)



 68%|██████▊   | 39/57 [00:38<00:17,  1.01it/s][A

loss: tensor(0.7598, device='cuda:0', grad_fn=<NllLossBackward>)



 70%|███████   | 40/57 [00:39<00:17,  1.02s/it][A

loss: tensor(0.6728, device='cuda:0', grad_fn=<NllLossBackward>)



 72%|███████▏  | 41/57 [00:40<00:15,  1.00it/s][A

loss: tensor(0.6812, device='cuda:0', grad_fn=<NllLossBackward>)



 74%|███████▎  | 42/57 [00:41<00:14,  1.03it/s][A

loss: tensor(0.6421, device='cuda:0', grad_fn=<NllLossBackward>)



 75%|███████▌  | 43/57 [00:42<00:13,  1.01it/s][A

loss: tensor(0.6352, device='cuda:0', grad_fn=<NllLossBackward>)



 77%|███████▋  | 44/57 [00:43<00:12,  1.02it/s][A

loss: tensor(0.6140, device='cuda:0', grad_fn=<NllLossBackward>)



 79%|███████▉  | 45/57 [00:44<00:12,  1.01s/it][A

loss: tensor(0.7962, device='cuda:0', grad_fn=<NllLossBackward>)



 81%|████████  | 46/57 [00:45<00:10,  1.00it/s][A

loss: tensor(0.6775, device='cuda:0', grad_fn=<NllLossBackward>)



 82%|████████▏ | 47/57 [00:46<00:10,  1.02s/it][A

loss: tensor(0.5029, device='cuda:0', grad_fn=<NllLossBackward>)



 84%|████████▍ | 48/57 [00:47<00:08,  1.00it/s][A

loss: tensor(0.6757, device='cuda:0', grad_fn=<NllLossBackward>)



 86%|████████▌ | 49/57 [00:48<00:07,  1.02it/s][A

loss: tensor(0.6012, device='cuda:0', grad_fn=<NllLossBackward>)



 88%|████████▊ | 50/57 [00:49<00:06,  1.00it/s][A

loss: tensor(0.6182, device='cuda:0', grad_fn=<NllLossBackward>)



 89%|████████▉ | 51/57 [00:50<00:05,  1.01it/s][A

loss: tensor(0.7341, device='cuda:0', grad_fn=<NllLossBackward>)



 91%|█████████ | 52/57 [00:51<00:05,  1.01s/it][A

loss: tensor(0.6390, device='cuda:0', grad_fn=<NllLossBackward>)



 93%|█████████▎| 53/57 [00:52<00:04,  1.01s/it][A

loss: tensor(0.6451, device='cuda:0', grad_fn=<NllLossBackward>)



 95%|█████████▍| 54/57 [00:53<00:03,  1.03s/it][A

loss: tensor(0.6555, device='cuda:0', grad_fn=<NllLossBackward>)



 96%|█████████▋| 55/57 [00:54<00:02,  1.00s/it][A

loss: tensor(0.5584, device='cuda:0', grad_fn=<NllLossBackward>)



 98%|█████████▊| 56/57 [00:55<00:00,  1.01it/s][A

loss: tensor(0.7028, device='cuda:0', grad_fn=<NllLossBackward>)



100%|██████████| 57/57 [00:56<00:00,  1.00it/s][A

  0%|          | 0/29 [00:00<?, ?it/s][A
  3%|▎         | 1/29 [00:00<00:04,  6.66it/s][A

loss: tensor(0.5801, device='cuda:0', grad_fn=<NllLossBackward>)

	Training Loss: 0.6407132478136766

	Training acc: 0.8179319937502707

	Training prec: 0.41402240743845464

	Training rec: 0.2639058239470173

	Training f1: 0.27546095712920066

	Current Learning rate:  5.714285714285714e-05



  7%|▋         | 2/29 [00:00<00:04,  6.67it/s][A
 10%|█         | 3/29 [00:00<00:03,  6.68it/s][A
 14%|█▍        | 4/29 [00:00<00:03,  6.67it/s][A
 17%|█▋        | 5/29 [00:00<00:03,  6.68it/s][A
 21%|██        | 6/29 [00:00<00:03,  6.69it/s][A
 24%|██▍       | 7/29 [00:01<00:03,  6.70it/s][A
 28%|██▊       | 8/29 [00:01<00:03,  6.68it/s][A
 31%|███       | 9/29 [00:01<00:02,  6.67it/s][A
 34%|███▍      | 10/29 [00:01<00:02,  6.68it/s][A
 38%|███▊      | 11/29 [00:01<00:02,  6.68it/s][A
 41%|████▏     | 12/29 [00:01<00:02,  6.69it/s][A
 45%|████▍     | 13/29 [00:01<00:02,  6.68it/s][A
 48%|████▊     | 14/29 [00:02<00:02,  6.68it/s][A
 52%|█████▏    | 15/29 [00:02<00:02,  6.68it/s][A
 55%|█████▌    | 16/29 [00:02<00:01,  6.68it/s][A
 59%|█████▊    | 17/29 [00:02<00:01,  6.69it/s][A
 62%|██████▏   | 18/29 [00:02<00:01,  6.67it/s][A
 66%|██████▌   | 19/29 [00:02<00:01,  6.66it/s][A
 69%|██████▉   | 20/29 [00:02<00:01,  6.64it/s][A
 72%|███████▏  | 21/29 [00:03<00:01,  


	Validation Loss: 0.6935999722316347

	Validation acc: 0.8021694240122605

	Validation prec: 0.3179086972997272

	Validation rec: 0.2683001597800541

	Validation f1: 0.26862269387072624



  2%|▏         | 1/57 [00:00<00:53,  1.05it/s][A

loss: tensor(0.5785, device='cuda:0', grad_fn=<NllLossBackward>)



  4%|▎         | 2/57 [00:02<00:56,  1.02s/it][A

loss: tensor(0.6034, device='cuda:0', grad_fn=<NllLossBackward>)



  5%|▌         | 3/57 [00:02<00:53,  1.01it/s][A

loss: tensor(0.6388, device='cuda:0', grad_fn=<NllLossBackward>)



  7%|▋         | 4/57 [00:04<00:54,  1.02s/it][A

loss: tensor(0.7663, device='cuda:0', grad_fn=<NllLossBackward>)



  9%|▉         | 5/57 [00:04<00:51,  1.00it/s][A

loss: tensor(0.6097, device='cuda:0', grad_fn=<NllLossBackward>)



 11%|█         | 6/57 [00:06<00:51,  1.02s/it][A

loss: tensor(0.4819, device='cuda:0', grad_fn=<NllLossBackward>)



 12%|█▏        | 7/57 [00:07<00:49,  1.00it/s][A

loss: tensor(0.5504, device='cuda:0', grad_fn=<NllLossBackward>)



 14%|█▍        | 8/57 [00:07<00:48,  1.02it/s][A

loss: tensor(0.6451, device='cuda:0', grad_fn=<NllLossBackward>)



 16%|█▌        | 9/57 [00:09<00:48,  1.01s/it][A

loss: tensor(0.5185, device='cuda:0', grad_fn=<NllLossBackward>)



 18%|█▊        | 10/57 [00:09<00:46,  1.01it/s][A

loss: tensor(0.7509, device='cuda:0', grad_fn=<NllLossBackward>)



 19%|█▉        | 11/57 [00:11<00:46,  1.02s/it][A

loss: tensor(0.5949, device='cuda:0', grad_fn=<NllLossBackward>)



 21%|██        | 12/57 [00:12<00:44,  1.00it/s][A

loss: tensor(0.6838, device='cuda:0', grad_fn=<NllLossBackward>)



 23%|██▎       | 13/57 [00:13<00:44,  1.02s/it][A

loss: tensor(0.5628, device='cuda:0', grad_fn=<NllLossBackward>)



 25%|██▍       | 14/57 [00:14<00:42,  1.00it/s][A

loss: tensor(0.6202, device='cuda:0', grad_fn=<NllLossBackward>)



 26%|██▋       | 15/57 [00:14<00:41,  1.01it/s][A

loss: tensor(0.6226, device='cuda:0', grad_fn=<NllLossBackward>)



 28%|██▊       | 16/57 [00:16<00:41,  1.01s/it][A

loss: tensor(0.6706, device='cuda:0', grad_fn=<NllLossBackward>)



 30%|██▉       | 17/57 [00:17<00:39,  1.00it/s][A

loss: tensor(0.6672, device='cuda:0', grad_fn=<NllLossBackward>)



 32%|███▏      | 18/57 [00:18<00:39,  1.02s/it][A

loss: tensor(0.6792, device='cuda:0', grad_fn=<NllLossBackward>)



 33%|███▎      | 19/57 [00:19<00:37,  1.00it/s][A

loss: tensor(0.5158, device='cuda:0', grad_fn=<NllLossBackward>)



 35%|███▌      | 20/57 [00:20<00:36,  1.02it/s][A

loss: tensor(0.4313, device='cuda:0', grad_fn=<NllLossBackward>)



 37%|███▋      | 21/57 [00:21<00:36,  1.01s/it][A

loss: tensor(0.6626, device='cuda:0', grad_fn=<NllLossBackward>)



 39%|███▊      | 22/57 [00:22<00:34,  1.01it/s][A

loss: tensor(0.5450, device='cuda:0', grad_fn=<NllLossBackward>)



 40%|████      | 23/57 [00:23<00:34,  1.02s/it][A

loss: tensor(0.7110, device='cuda:0', grad_fn=<NllLossBackward>)



 42%|████▏     | 24/57 [00:24<00:32,  1.00it/s][A

loss: tensor(0.6483, device='cuda:0', grad_fn=<NllLossBackward>)



 44%|████▍     | 25/57 [00:25<00:32,  1.02s/it][A

loss: tensor(0.7191, device='cuda:0', grad_fn=<NllLossBackward>)



 46%|████▌     | 26/57 [00:26<00:30,  1.00it/s][A

loss: tensor(0.7622, device='cuda:0', grad_fn=<NllLossBackward>)



 47%|████▋     | 27/57 [00:27<00:29,  1.01it/s][A

loss: tensor(0.8347, device='cuda:0', grad_fn=<NllLossBackward>)



 49%|████▉     | 28/57 [00:28<00:29,  1.01s/it][A

loss: tensor(0.7220, device='cuda:0', grad_fn=<NllLossBackward>)



 51%|█████     | 29/57 [00:29<00:27,  1.01it/s][A

loss: tensor(0.6551, device='cuda:0', grad_fn=<NllLossBackward>)



 53%|█████▎    | 30/57 [00:30<00:27,  1.02s/it][A

loss: tensor(0.6712, device='cuda:0', grad_fn=<NllLossBackward>)



 54%|█████▍    | 31/57 [00:31<00:25,  1.00it/s][A

loss: tensor(0.6299, device='cuda:0', grad_fn=<NllLossBackward>)



 56%|█████▌    | 32/57 [00:32<00:25,  1.02s/it][A

loss: tensor(0.6819, device='cuda:0', grad_fn=<NllLossBackward>)



 58%|█████▊    | 33/57 [00:33<00:23,  1.00it/s][A

loss: tensor(0.6467, device='cuda:0', grad_fn=<NllLossBackward>)



 60%|█████▉    | 34/57 [00:34<00:22,  1.01it/s][A

loss: tensor(0.6445, device='cuda:0', grad_fn=<NllLossBackward>)



 61%|██████▏   | 35/57 [00:35<00:22,  1.01s/it][A

loss: tensor(0.5987, device='cuda:0', grad_fn=<NllLossBackward>)



 63%|██████▎   | 36/57 [00:36<00:20,  1.01it/s][A

loss: tensor(0.6416, device='cuda:0', grad_fn=<NllLossBackward>)



 65%|██████▍   | 37/57 [00:37<00:20,  1.02s/it][A

loss: tensor(0.5499, device='cuda:0', grad_fn=<NllLossBackward>)



 67%|██████▋   | 38/57 [00:38<00:18,  1.00it/s][A

loss: tensor(0.7339, device='cuda:0', grad_fn=<NllLossBackward>)



 68%|██████▊   | 39/57 [00:39<00:18,  1.02s/it][A

loss: tensor(0.6277, device='cuda:0', grad_fn=<NllLossBackward>)



 70%|███████   | 40/57 [00:40<00:17,  1.00s/it][A

loss: tensor(0.5454, device='cuda:0', grad_fn=<NllLossBackward>)



 72%|███████▏  | 41/57 [00:41<00:15,  1.01it/s][A

loss: tensor(0.6366, device='cuda:0', grad_fn=<NllLossBackward>)



 74%|███████▎  | 42/57 [00:42<00:15,  1.01s/it][A

loss: tensor(0.5985, device='cuda:0', grad_fn=<NllLossBackward>)



 75%|███████▌  | 43/57 [00:43<00:13,  1.01it/s][A

loss: tensor(0.5050, device='cuda:0', grad_fn=<NllLossBackward>)



 77%|███████▋  | 44/57 [00:44<00:13,  1.02s/it][A

loss: tensor(0.6473, device='cuda:0', grad_fn=<NllLossBackward>)



 79%|███████▉  | 45/57 [00:45<00:12,  1.00s/it][A

loss: tensor(0.7284, device='cuda:0', grad_fn=<NllLossBackward>)



 81%|████████  | 46/57 [00:46<00:10,  1.01it/s][A

loss: tensor(0.6372, device='cuda:0', grad_fn=<NllLossBackward>)



 82%|████████▏ | 47/57 [00:47<00:10,  1.01s/it][A

loss: tensor(0.5704, device='cuda:0', grad_fn=<NllLossBackward>)



 84%|████████▍ | 48/57 [00:48<00:08,  1.01it/s][A

loss: tensor(0.4990, device='cuda:0', grad_fn=<NllLossBackward>)



 86%|████████▌ | 49/57 [00:49<00:08,  1.01s/it][A

loss: tensor(0.6370, device='cuda:0', grad_fn=<NllLossBackward>)



 88%|████████▊ | 50/57 [00:50<00:06,  1.00it/s][A

loss: tensor(0.6105, device='cuda:0', grad_fn=<NllLossBackward>)



 89%|████████▉ | 51/57 [00:51<00:06,  1.02s/it][A

loss: tensor(0.6871, device='cuda:0', grad_fn=<NllLossBackward>)



 91%|█████████ | 52/57 [00:52<00:04,  1.00it/s][A

loss: tensor(0.6248, device='cuda:0', grad_fn=<NllLossBackward>)



 93%|█████████▎| 53/57 [00:53<00:03,  1.02it/s][A

loss: tensor(0.8368, device='cuda:0', grad_fn=<NllLossBackward>)



 95%|█████████▍| 54/57 [00:54<00:03,  1.01s/it][A

loss: tensor(0.6944, device='cuda:0', grad_fn=<NllLossBackward>)



 96%|█████████▋| 55/57 [00:55<00:01,  1.00it/s][A

loss: tensor(0.7574, device='cuda:0', grad_fn=<NllLossBackward>)



 98%|█████████▊| 56/57 [00:56<00:01,  1.02s/it][A

loss: tensor(0.8311, device='cuda:0', grad_fn=<NllLossBackward>)



100%|██████████| 57/57 [00:56<00:00,  1.00it/s][A

  0%|          | 0/29 [00:00<?, ?it/s][A
  3%|▎         | 1/29 [00:00<00:04,  6.78it/s][A

loss: tensor(0.6192, device='cuda:0', grad_fn=<NllLossBackward>)

	Training Loss: 0.6411248482110208

	Training acc: 0.8179487701111927

	Training prec: 0.43286136132251585

	Training rec: 0.2664777341573938

	Training f1: 0.2781729816258138

	Current Learning rate:  2.857142857142857e-05



  7%|▋         | 2/29 [00:00<00:03,  6.98it/s][A
 10%|█         | 3/29 [00:00<00:03,  7.05it/s][A
 14%|█▍        | 4/29 [00:00<00:03,  7.06it/s][A
 17%|█▋        | 5/29 [00:00<00:03,  7.08it/s][A
 21%|██        | 6/29 [00:00<00:03,  7.09it/s][A
 24%|██▍       | 7/29 [00:00<00:03,  7.01it/s][A
 28%|██▊       | 8/29 [00:01<00:02,  7.02it/s][A
 31%|███       | 9/29 [00:01<00:02,  6.94it/s][A
 34%|███▍      | 10/29 [00:01<00:02,  6.89it/s][A
 38%|███▊      | 11/29 [00:01<00:02,  6.71it/s][A
 41%|████▏     | 12/29 [00:01<00:02,  6.72it/s][A
 45%|████▍     | 13/29 [00:01<00:02,  6.73it/s][A
 48%|████▊     | 14/29 [00:02<00:02,  6.73it/s][A
 52%|█████▏    | 15/29 [00:02<00:02,  6.70it/s][A
 55%|█████▌    | 16/29 [00:02<00:01,  6.70it/s][A
 59%|█████▊    | 17/29 [00:02<00:01,  6.70it/s][A
 62%|██████▏   | 18/29 [00:02<00:01,  6.71it/s][A
 66%|██████▌   | 19/29 [00:02<00:01,  6.72it/s][A
 69%|██████▉   | 20/29 [00:02<00:01,  6.71it/s][A
 72%|███████▏  | 21/29 [00:03<00:01,  


	Validation Loss: 0.6968624591827393

	Validation acc: 0.8028862575987915

	Validation prec: 0.3231635617498546

	Validation rec: 0.27517952919666455

	Validation f1: 0.27569180766263235



  2%|▏         | 1/57 [00:01<00:59,  1.07s/it][A

loss: tensor(0.7307, device='cuda:0', grad_fn=<NllLossBackward>)



  4%|▎         | 2/57 [00:02<00:55,  1.00s/it][A

loss: tensor(0.5822, device='cuda:0', grad_fn=<NllLossBackward>)



  5%|▌         | 3/57 [00:03<00:55,  1.03s/it][A

loss: tensor(0.7200, device='cuda:0', grad_fn=<NllLossBackward>)



  7%|▋         | 4/57 [00:04<00:53,  1.00s/it][A

loss: tensor(0.7140, device='cuda:0', grad_fn=<NllLossBackward>)



  9%|▉         | 5/57 [00:05<00:51,  1.01it/s][A

loss: tensor(0.7021, device='cuda:0', grad_fn=<NllLossBackward>)



 11%|█         | 6/57 [00:06<00:51,  1.01s/it][A

loss: tensor(0.7511, device='cuda:0', grad_fn=<NllLossBackward>)



 12%|█▏        | 7/57 [00:07<00:49,  1.00it/s][A

loss: tensor(0.7463, device='cuda:0', grad_fn=<NllLossBackward>)



 14%|█▍        | 8/57 [00:08<00:50,  1.02s/it][A

loss: tensor(0.5134, device='cuda:0', grad_fn=<NllLossBackward>)



 16%|█▌        | 9/57 [00:09<00:48,  1.00s/it][A

loss: tensor(0.7906, device='cuda:0', grad_fn=<NllLossBackward>)



 18%|█▊        | 10/57 [00:10<00:47,  1.02s/it][A

loss: tensor(0.6127, device='cuda:0', grad_fn=<NllLossBackward>)



 19%|█▉        | 11/57 [00:11<00:45,  1.00it/s][A

loss: tensor(0.5611, device='cuda:0', grad_fn=<NllLossBackward>)



 21%|██        | 12/57 [00:12<00:44,  1.02it/s][A

loss: tensor(0.5852, device='cuda:0', grad_fn=<NllLossBackward>)



 23%|██▎       | 13/57 [00:13<00:44,  1.01s/it][A

loss: tensor(0.6536, device='cuda:0', grad_fn=<NllLossBackward>)



 25%|██▍       | 14/57 [00:14<00:42,  1.01it/s][A

loss: tensor(0.6308, device='cuda:0', grad_fn=<NllLossBackward>)



 26%|██▋       | 15/57 [00:15<00:42,  1.02s/it][A

loss: tensor(0.7327, device='cuda:0', grad_fn=<NllLossBackward>)



 28%|██▊       | 16/57 [00:16<00:40,  1.00it/s][A

loss: tensor(0.8786, device='cuda:0', grad_fn=<NllLossBackward>)



 30%|██▉       | 17/57 [00:17<00:39,  1.02it/s][A

loss: tensor(0.5695, device='cuda:0', grad_fn=<NllLossBackward>)



 32%|███▏      | 18/57 [00:18<00:39,  1.01s/it][A

loss: tensor(0.4821, device='cuda:0', grad_fn=<NllLossBackward>)



 33%|███▎      | 19/57 [00:19<00:37,  1.01it/s][A

loss: tensor(0.8196, device='cuda:0', grad_fn=<NllLossBackward>)



 35%|███▌      | 20/57 [00:20<00:37,  1.02s/it][A

loss: tensor(0.5508, device='cuda:0', grad_fn=<NllLossBackward>)



 37%|███▋      | 21/57 [00:21<00:35,  1.00it/s][A

loss: tensor(0.5371, device='cuda:0', grad_fn=<NllLossBackward>)



 39%|███▊      | 22/57 [00:22<00:35,  1.02s/it][A

loss: tensor(0.7782, device='cuda:0', grad_fn=<NllLossBackward>)



 40%|████      | 23/57 [00:23<00:33,  1.00it/s][A

loss: tensor(0.6963, device='cuda:0', grad_fn=<NllLossBackward>)



 42%|████▏     | 24/57 [00:24<00:32,  1.01it/s][A

loss: tensor(0.5854, device='cuda:0', grad_fn=<NllLossBackward>)



 44%|████▍     | 25/57 [00:25<00:32,  1.01s/it][A

loss: tensor(0.7384, device='cuda:0', grad_fn=<NllLossBackward>)



 46%|████▌     | 26/57 [00:26<00:30,  1.01it/s][A

loss: tensor(0.6599, device='cuda:0', grad_fn=<NllLossBackward>)



 47%|████▋     | 27/57 [00:27<00:30,  1.02s/it][A

loss: tensor(0.5416, device='cuda:0', grad_fn=<NllLossBackward>)



 49%|████▉     | 28/57 [00:28<00:28,  1.00it/s][A

loss: tensor(0.4538, device='cuda:0', grad_fn=<NllLossBackward>)



 51%|█████     | 29/57 [00:29<00:28,  1.02s/it][A

loss: tensor(0.5570, device='cuda:0', grad_fn=<NllLossBackward>)



 53%|█████▎    | 30/57 [00:30<00:26,  1.00it/s][A

loss: tensor(0.8044, device='cuda:0', grad_fn=<NllLossBackward>)



 54%|█████▍    | 31/57 [00:31<00:25,  1.01it/s][A

loss: tensor(0.6866, device='cuda:0', grad_fn=<NllLossBackward>)



 56%|█████▌    | 32/57 [00:32<00:25,  1.01s/it][A

loss: tensor(0.6652, device='cuda:0', grad_fn=<NllLossBackward>)



 58%|█████▊    | 33/57 [00:33<00:23,  1.00it/s][A

loss: tensor(0.6090, device='cuda:0', grad_fn=<NllLossBackward>)



 60%|█████▉    | 34/57 [00:34<00:23,  1.02s/it][A

loss: tensor(0.5056, device='cuda:0', grad_fn=<NllLossBackward>)



 61%|██████▏   | 35/57 [00:35<00:21,  1.00it/s][A

loss: tensor(0.7104, device='cuda:0', grad_fn=<NllLossBackward>)



 63%|██████▎   | 36/57 [00:36<00:21,  1.01s/it][A

loss: tensor(0.5934, device='cuda:0', grad_fn=<NllLossBackward>)



 65%|██████▍   | 37/57 [00:37<00:19,  1.00it/s][A

loss: tensor(0.6163, device='cuda:0', grad_fn=<NllLossBackward>)



 67%|██████▋   | 38/57 [00:38<00:18,  1.02it/s][A

loss: tensor(0.5113, device='cuda:0', grad_fn=<NllLossBackward>)



 68%|██████▊   | 39/57 [00:39<00:18,  1.01s/it][A

loss: tensor(0.4778, device='cuda:0', grad_fn=<NllLossBackward>)



 70%|███████   | 40/57 [00:40<00:16,  1.01it/s][A

loss: tensor(0.5360, device='cuda:0', grad_fn=<NllLossBackward>)



 72%|███████▏  | 41/57 [00:41<00:16,  1.01s/it][A

loss: tensor(0.6295, device='cuda:0', grad_fn=<NllLossBackward>)



 74%|███████▎  | 42/57 [00:42<00:14,  1.00it/s][A

loss: tensor(0.7584, device='cuda:0', grad_fn=<NllLossBackward>)



 75%|███████▌  | 43/57 [00:43<00:13,  1.02it/s][A

loss: tensor(0.5798, device='cuda:0', grad_fn=<NllLossBackward>)



 77%|███████▋  | 44/57 [00:44<00:13,  1.01s/it][A

loss: tensor(0.6881, device='cuda:0', grad_fn=<NllLossBackward>)



 79%|███████▉  | 45/57 [00:45<00:11,  1.01it/s][A

loss: tensor(0.6011, device='cuda:0', grad_fn=<NllLossBackward>)



 81%|████████  | 46/57 [00:46<00:11,  1.02s/it][A

loss: tensor(0.6263, device='cuda:0', grad_fn=<NllLossBackward>)



 82%|████████▏ | 47/57 [00:47<00:09,  1.00it/s][A

loss: tensor(0.7335, device='cuda:0', grad_fn=<NllLossBackward>)



 84%|████████▍ | 48/57 [00:48<00:09,  1.02s/it][A

loss: tensor(0.7307, device='cuda:0', grad_fn=<NllLossBackward>)



 86%|████████▌ | 49/57 [00:49<00:07,  1.00it/s][A

loss: tensor(0.7603, device='cuda:0', grad_fn=<NllLossBackward>)



 88%|████████▊ | 50/57 [00:50<00:06,  1.02it/s][A

loss: tensor(0.6068, device='cuda:0', grad_fn=<NllLossBackward>)



 89%|████████▉ | 51/57 [00:51<00:06,  1.01s/it][A

loss: tensor(0.7302, device='cuda:0', grad_fn=<NllLossBackward>)



 91%|█████████ | 52/57 [00:52<00:04,  1.01it/s][A

loss: tensor(0.5907, device='cuda:0', grad_fn=<NllLossBackward>)



 93%|█████████▎| 53/57 [00:53<00:04,  1.02s/it][A

loss: tensor(0.5656, device='cuda:0', grad_fn=<NllLossBackward>)



 95%|█████████▍| 54/57 [00:54<00:02,  1.00it/s][A

loss: tensor(0.5171, device='cuda:0', grad_fn=<NllLossBackward>)



 96%|█████████▋| 55/57 [00:55<00:02,  1.02s/it][A

loss: tensor(0.5779, device='cuda:0', grad_fn=<NllLossBackward>)



 98%|█████████▊| 56/57 [00:56<00:00,  1.00it/s][A

loss: tensor(0.7375, device='cuda:0', grad_fn=<NllLossBackward>)



100%|██████████| 57/57 [00:56<00:00,  1.00it/s][A

  0%|          | 0/29 [00:00<?, ?it/s][A
  3%|▎         | 1/29 [00:00<00:03,  7.16it/s][A

loss: tensor(0.5685, device='cuda:0', grad_fn=<NllLossBackward>)

	Training Loss: 0.6419847884721923

	Training acc: 0.8173261412309518

	Training prec: 0.42610625206331576

	Training rec: 0.26762111790566384

	Training f1: 0.2805972084960774

	Current Learning rate:  0.0



  7%|▋         | 2/29 [00:00<00:03,  7.15it/s][A
 10%|█         | 3/29 [00:00<00:03,  7.18it/s][A
 14%|█▍        | 4/29 [00:00<00:03,  7.18it/s][A
 17%|█▋        | 5/29 [00:00<00:03,  7.14it/s][A
 21%|██        | 6/29 [00:00<00:03,  7.15it/s][A
 24%|██▍       | 7/29 [00:00<00:03,  7.13it/s][A
 28%|██▊       | 8/29 [00:01<00:02,  7.14it/s][A
 31%|███       | 9/29 [00:01<00:02,  7.14it/s][A
 34%|███▍      | 10/29 [00:01<00:02,  7.13it/s][A
 38%|███▊      | 11/29 [00:01<00:02,  7.14it/s][A
 41%|████▏     | 12/29 [00:01<00:02,  7.10it/s][A
 45%|████▍     | 13/29 [00:01<00:02,  7.10it/s][A
 48%|████▊     | 14/29 [00:01<00:02,  7.09it/s][A
 52%|█████▏    | 15/29 [00:02<00:01,  7.09it/s][A
 55%|█████▌    | 16/29 [00:02<00:01,  7.10it/s][A
 59%|█████▊    | 17/29 [00:02<00:01,  7.11it/s][A
 62%|██████▏   | 18/29 [00:02<00:01,  7.11it/s][A
 66%|██████▌   | 19/29 [00:02<00:01,  6.00it/s][A
 69%|██████▉   | 20/29 [00:02<00:01,  6.20it/s][A
 72%|███████▏  | 21/29 [00:03<00:01,  


	Validation Loss: 0.6938323193582995

	Validation acc: 0.8028444763290649

	Validation prec: 0.322225774092201

	Validation rec: 0.2768359663737677

	Validation f1: 0.27502518194870884





### Evaluation on the test dataset

In [43]:

############ test eval metrics ######################
nb_test_steps = 0 # Tracking variables
test_true_labels = []
test_predict_labels = []
test_loss = []
test_acc = []
test_prec = []
test_rec = []
test_f1 = []

########################################################
for batch in tqdm(test_loader):
    batch = tuple(batch[t].to(device) for t in batch)      # batch to GPU
    t_input_ids, t_input_mask, t_token_type_ids, t_labels, t_bio_tags = batch     # unpack inputs from dataloader

    with torch.no_grad(): # tell model not to compute or store gradients -> saves memory + speeds up validation
        model.eval() # put model in evaluation mode for validation set
        logits = model(**{"input_ids":t_input_ids, "attention_mask":t_input_mask, "token_type_ids":t_token_type_ids}) # forward pass, calculates logit predictions

    ######################################################

    # similar to the class RobertaForToken classification in transformers: https://github.com/huggingface/transformers/blob/master/src/transformers/models/roberta/modeling_roberta.py
    t_active_loss = t_input_mask.view(-1) == 1  # either based on attention_mask (includes <CLS>, <SEP> token)
    t_active_logits = logits.view(-1, N_bio_tags)[t_active_loss] # 5 
    t_active_tags = t_bio_tags.view(-1)[t_active_loss]
    t_loss = loss_fn(t_active_logits, t_active_tags)             
    test_loss.append(t_loss.item())

    #########################################################
    logits = logits.detach().to('cpu').numpy()
    tags_ids = t_bio_tags.to('cpu').numpy()

    # calculate performance measures only on tokens and not subwords or special tokens
    tags_mask = tags_ids != -100 # only get token labels and not labels from subwords or special tokens
    pred = np.argmax(logits, axis=2)[tags_mask] #.flatten() # convert logits to list of predicted labels
    tags = tags_ids[tags_mask]#.flatten()                          
    test_true_labels.append(tags) # appends true labels for batch
    test_predict_labels.append(pred) # # appends predicted labels for batch
    
    metrics = compute_metrics(pred, tags)
    test_acc.append(metrics["accuracy"])
    test_prec.append(metrics["precision"])
    test_rec.append(metrics["recall"])
    test_f1.append(metrics["f1"])

    nb_test_steps += 1

print(F'\n\tTest Loss: {np.mean(test_loss)}')
print(F'\n\tTest acc: {np.mean(test_acc)}')
print(F'\n\tTest prec: {np.mean(test_prec)}')
print(F'\n\tTest rec: {np.mean(test_rec)}')
print(F'\n\tTest f1: {np.mean(test_f1)}')


  , "labels" : torch.tensor(self.labels[idx], dtype=torch.long)
100%|██████████| 36/36 [00:05<00:00,  6.15it/s]


	Test Loss: 0.6690744368566407

	Test acc: 0.802791295696593

	Test prec: 0.3352464612211138

	Test rec: 0.2670752618857317

	Test f1: 0.27091637221146603





### Classification reports

In [44]:
from sklearn.metrics import classification_report
tag2id = {label: idx for idx, label in enumerate(["O", "B-C", "I-C", "B-E", "I-E"])}
tag2id[-100] = -100
id2tag = {id:tag for tag,id in tag2id.items()}
test_true_tag = [id2tag[ID] for ID in np.concatenate(test_true_labels)]#
test_predict_tag = [id2tag[ID] for ID in np.concatenate(test_predict_labels)]
print(classification_report(test_true_tag, test_predict_tag))

              precision    recall  f1-score   support

         B-C       0.53      0.25      0.34       272
         B-E       1.00      0.00      0.01       278
         I-C       0.67      0.07      0.12       291
         I-E       0.00      0.00      0.00       321
           O       0.82      0.99      0.90      4609

    accuracy                           0.81      5771
   macro avg       0.60      0.26      0.27      5771
weighted avg       0.76      0.81      0.74      5771



In [45]:
from seqeval.metrics import classification_report as classification_report_seqeval

#The metrics we are seeing in this report are designed specifically for NLP tasks such as NER and POS tagging,
#in which all words of an entity need to be predicted correctly to be counted as one correct prediction. 
#Therefore, the metrics in this classification report are much lower than in scikit-learn's classification report.
test_true_tag = [[id2tag[ID] for ID in IDS] for IDS in test_true_labels]
test_predict_tag = [[id2tag[ID] for ID in IDS] for IDS in test_predict_labels]
print(classification_report_seqeval(test_true_tag, test_predict_tag))

              precision    recall  f1-score   support

           C       0.36      0.21      0.26       273
           E       1.00      0.00      0.01       278

   micro avg       0.36      0.11      0.16       551
   macro avg       0.68      0.11      0.14       551
weighted avg       0.68      0.11      0.13       551



### bio tags back to tokens

In [46]:
# take last batch of test set:
t_input_ids, t_input_mask, t_token_type_ids, t_labels, t_bio_tags = batch 

for i in range(len(batch)):
    tags_mask = t_bio_tags[i].to("cpu").numpy() != -100 # only get token labels and not labels from subwords or special tokens
    pred = np.argmax(logits[i], axis=1)[tags_mask]
    true_tags = t_bio_tags[i][tags_mask].to("cpu").numpy()    
    

    tokens = tokenizer.convert_ids_to_tokens(t_input_ids[i])

    print("\n\nPadded Sentence:")
    print(tokens)
    print("true labels:")
    print(t_bio_tags[i])
    for token, true_label, pred in zip(np.array(tokens)[tags_mask], true_tags, pred):
        print(token, "\t\ttrue:", true_label, "  pred:", pred)

    
    break






Padded Sentence:
['<s>', 'USER', 'I', "'m", 'a', 'type', '1', 'diabetic', 'and', 'I', 'get', 'my', 'medical', 'supplies', 'through', 'UPS', 'monthly', 'for', 'my', 'insulin', 'pump', ',', 'it', "'s", 'been', 'slower', 'and', 'slower', 'every', 'single', 'month', ',', 'it', 'makes', 'me', 'a', 'nervous', 'wreck', 'period', '.', '</s>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>']
true labels:
tensor([-100,    0,    0,    0,    0,    1,    2,    2,    0,    0,    0,    0,
           0,    0,    0,    0,    0,    0,    0,    3,    4,    0,    0,    0,
           0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
           0,    0,    0,    0, -100, -100, -100, -100, -100, -100, -100, -100,
        -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100,
        -100], device='cuda:0')
USER 		true: 0   pred: 0
I 		true: 0

### Save model

In [47]:
torch.save(model.state_dict(), "finetuned-causal-span-cause-or-effect-35-epochs.pth")

### Load model locally

In [51]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = CausalNER()
model.load_state_dict(torch.load("finetuned-causal-span-cause-or-effect-35-epochs.pth"))
model.to(device)
model.eval()

You are using a model of type roberta to instantiate a model of type bert. This is not supported for all configurations of models and can yield errors.
Some weights of the model checkpoint at vinai/bertweet-base were not used when initializing BertModel: ['roberta.encoder.layer.11.attention.self.query.weight', 'roberta.encoder.layer.2.output.dense.bias', 'roberta.encoder.layer.1.attention.self.query.weight', 'roberta.encoder.layer.3.intermediate.dense.bias', 'roberta.encoder.layer.1.attention.self.value.bias', 'roberta.encoder.layer.10.output.LayerNorm.weight', 'roberta.encoder.layer.1.attention.self.value.weight', 'roberta.encoder.layer.7.output.LayerNorm.weight', 'roberta.encoder.layer.6.attention.self.query.bias', 'roberta.encoder.layer.9.intermediate.dense.bias', 'roberta.encoder.layer.10.attention.self.value.bias', 'roberta.encoder.layer.5.attention.self.key.weight', 'roberta.encoder.layer.0.attention.output.LayerNorm.weight', 'roberta.encoder.layer.2.intermediate.dense.bias', 'ro

CausalNER(
  (bert): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(64001, 768, padding_idx=1)
      (position_embeddings): Embedding(130, 768)
      (token_type_embeddings): Embedding(1, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0): BertLayer(
          (attention): BertAttention(
            (self): BertSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
  

In [52]:
dir(TweetDataSet)

['__add__',
 '__class__',
 '__class_getitem__',
 '__delattr__',
 '__dict__',
 '__dir__',
 '__doc__',
 '__eq__',
 '__format__',
 '__ge__',
 '__getattribute__',
 '__getitem__',
 '__gt__',
 '__hash__',
 '__init__',
 '__init_subclass__',
 '__le__',
 '__len__',
 '__lt__',
 '__module__',
 '__ne__',
 '__new__',
 '__orig_bases__',
 '__parameters__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__setattr__',
 '__sizeof__',
 '__slots__',
 '__str__',
 '__subclasshook__',
 '__weakref__',
 '_is_protocol',
 'extend_tags']