# Neural Network approach with RNN

In [1]:
!python --version

Python 3.7.13


## Libraries and data import

In [2]:
!pip install -U torch==1.10.0
!pip install -U torchtext==0.11.0
!python -m spacy download en_core_web_sm

from google.colab import drive
import pandas as pd

import torch
import torchtext
from torchtext.legacy import data
from torchtext.legacy.data import Field, Dataset, Example

import random

import torch.nn as nn
import torch.optim as optim

from sklearn.metrics import recall_score, precision_score, fbeta_score

Collecting en_core_web_sm==2.2.5
  Downloading https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-2.2.5/en_core_web_sm-2.2.5.tar.gz (12.0 MB)
[K     |████████████████████████████████| 12.0 MB 6.8 MB/s 
[38;5;2m✔ Download and installation successful[0m
You can now load the model via spacy.load('en_core_web_sm')


In [3]:
#import of the processed dataset and the columns' names
from_drive = True
dataset = dict()
path = "/content/gdrive/MyDrive/Magistrale/Stage/data"

if from_drive == True: 
  drive.mount("/content/gdrive")
  dataset["ace"] = pd.read_csv(path + "/preprocessed_ace2.csv")
  dataset["copd"] = pd.read_csv(path + "/preprocessed_copd2.csv")
  dataset["ppi"] = pd.read_csv(path + "/preprocessed_ppi2.csv")
else: 
  dataset["ace"] = pd.read_csv(path + "/content/preprocessed_ace2.csv")
  dataset["copd"] = pd.read_csv(path + "/content/preprocessed_copd2.csv")
  dataset["ppi"] = pd.read_csv(path + "/content/preprocessed_ppi2.csv")

Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).


## Preparing Data


In [4]:
#random seed for reproducibility
SEED = 1234
torch.manual_seed(SEED)
torch.backends.cudnn.deterministic = True

#FIELD: how the data should be processed
TEXT = data.Field(tokenize = 'spacy',
                  tokenizer_language = 'en_core_web_sm')
LABEL = data.LabelField(dtype = torch.float)
fields = {'label' : LABEL, "text" : TEXT}

#HYPERPARAMETERS

#dataset configuration
i = "ace" #which dataset
clean_text = True #otherwise, it will be used "text"

#bucket iterator configuration
BATCH_SIZE = 250 
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

#neural network configuration
EMBEDDING_DIM = 100 #size of the dense word vectors (usually 50-250)
HIDDEN_DIM = 256 #size of the hidden states (usually 100-500)
OUTPUT_DIM = 1 #number of classes

In [5]:
#removal and renaming of columns

if clean_text == True: 
  dataset[i].drop(dataset[i].columns.difference(['Label', "text_clean"]), 1, inplace=True)
  dataset[i].rename(columns={'text_clean': 'text'}, inplace=True)

else: 
  dataset[i].drop(dataset[i].columns.difference(['Label', "text"]), 1, inplace=True)

dataset[i].rename(columns={'Label': 'label'}, inplace=True)

#final dataset with "label" and "text"

  after removing the cwd from sys.path.


In [6]:
# dataset[i]["label"] = dataset[i]["label"].replace({0:1, 1:0})

In [7]:
print(dataset[i].shape)
dataset[i].head()

(2496, 2)


Unnamed: 0,label,text
0,0,distinct and combined vascular effects of ace ...
1,0,computerized surveillance of adverse drug reac...
2,0,glomerular size selective dysfunction in niddm...
3,0,total arterial compliance in ambulatory hypert...
4,0,racial differences in the outcome of left vent...


In [8]:
class DataFrameDataset(Dataset):
  """Class for using pandas DataFrames as a datasource"""
  def __init__(self, examples, fields, filter_pred=None):
    """
    Create a dataset from a pandas dataframe of examples and Fields
    Arguments:
      examples pd.DataFrame: DataFrame of examples
      fields {str: Field}: The Fields to use in this tuple. The
        string is a field name, and the Field is the associated field.
      filter_pred (callable or None): use only exanples for which
        filter_pred(example) is true, or use all examples if None.
        Default is None
    """
    self.examples = examples.apply(SeriesExample.fromSeries, args=(fields,), axis=1).tolist()
    if filter_pred is not None:
      self.examples = filter(filter_pred, self.examples)
    self.fields = dict(fields)
    # Unpack field tuples
    for n, f in list(self.fields.items()):
      if isinstance(n, tuple):
        self.fields.update(zip(n, f))
        del self.fields[n]

class SeriesExample(Example):
  """Class to convert a pandas Series to an Example"""

  @classmethod
  def fromSeries(cls, data, fields):
    return cls.fromdict(data.to_dict(), fields)

  @classmethod
  def fromdict(cls, data, fields):
    ex = cls()
      
    for key, field in fields.items():
      if key not in data:
        raise ValueError("Specified key {} was not found in "
        "the input data".format(key))
      if field is not None:
        setattr(ex, key, field.preprocess(data[key]))
      else:
        setattr(ex, key, data[key])
    return ex

In [9]:
#pytorch dataset 
df = DataFrameDataset(dataset[i], fields)

### Train, test and val split

In [10]:
#train and test split
train_data, test_data, valid_data = df.split(split_ratio=[0.5, 0.4, 0.1], stratified=True, strata_field='label', random_state = random.seed(SEED))

print(f'Number of training examples: {len(train_data)}')
print(f'Number of testing examples: {len(test_data)}')
print(f'Number of validation examples: {len(valid_data)}')

#checking an example
print("\n", vars(train_data.examples[0]))

Number of training examples: 1248
Number of testing examples: 249
Number of validation examples: 999

 {'label': 0, 'text': ['insulin', 'resistant', 'lipolysis', 'in', 'abdominally', 'obese', 'hypertensive', 'individuals', 'role', 'of', 'the', 'renin', 'angiotensin', 'system', 'resistance', 'to', 'the', 'capacity', 'of', 'insulin', 'to', 'suppress', 'lipolysis', 'may', 'be', 'an', 'important', 'link', 'in', 'the', 'association', 'between', 'abdominal', 'obesity', 'and', 'hypertension', 'furthermore', 'a', 'more', 'active', 'renin', 'angiotensin', 'system', 'in', 'adipose', 'tissue', 'may', 'contribute', 'to', 'insulin', 'resistant', 'lipolysis', 'in', 'abdominally', 'obese', 'hypertensive', 'subjects', 'we', 'determined', 'nonesterified', 'fatty', 'acid', 'concentrations', 'and', 'turnover', 'as', 'well', 'as', 'lipid', 'oxidation', 'under', 'basal', 'conditions', 'and', 'during', 'steady', 'state', 'euglycemia', 'with', 'two', 'levels', 'of', 'insulinemia', '72', 'and', '287', 'pmol',

In [11]:
#VOCABULARY: look up table where every unique word in your data set has a corresponding _index_ (an integer).
#each _index_ is used to construct a _one-hot_ vector for each word.
TEXT.build_vocab(train_data) #optional parameter: max_size = MAX_VOCAB_SIZE
LABEL.build_vocab(train_data)

Only build the vocabulary on the training set because when testing a machine learning system you do not want to look at the test set in any way. Also, do not include the validation set as you want it to reflect the test set as much as possible.

In [12]:
print(f"Unique tokens in TEXT vocabulary: {len(TEXT.vocab)}")
print(f"Unique tokens in LABEL vocabulary: {len(LABEL.vocab)}")

Unique tokens in TEXT vocabulary: 9992
Unique tokens in LABEL vocabulary: 2


In [13]:
poscount = 0
for elem in range(len(train_data)):
  if train_data[elem].label == 1:
    poscount += 1 

In [14]:
poscount

18

In [15]:
train_data[0].label

0

There is the addition of the `<pad>` token.

When feeding sentences into our model, you feed a _batch_ of them at a time, i.e. more than one at a time, and all sentences in the batch need to be the same size. Thus, to ensure each sentence in the batch is the same size, any shorter than the longest within the batch are padded.

In [16]:
print("Most common words:", TEXT.vocab.freqs.most_common(20))

Most common words: [('the', 11373), ('of', 10672), ('and', 9680), ('in', 8835), ('with', 5459), ('to', 5315), ('patients', 4550), ('a', 4018), ('drug', 3453), ('0', 3181), ('effects', 3020), ('use', 2851), ('was', 2808), ('blood', 2724), ('therapeutic', 2665), ('were', 2609), ('therapy', 2447), ('1', 2250), ('treatment', 2207), ('angiotensin', 2194)]


In [17]:
#see the vocabulary directly using either stoi (string to int) or itos (int to string)
print(TEXT.vocab.itos[:10])

#check the labels, ensuring 0 is for negative and 1 is for positive
# LABEL.vocab.stoi = {1:1, 0:0}
print(LABEL.vocab.stoi)

['<unk>', '<pad>', 'the', 'of', 'and', 'in', 'with', 'to', 'patients', 'a']
defaultdict(None, {0: 0, 1: 1})


In [18]:
#creating the iterators

#ITERATORS: you iterate over iterators in the training/evaluation loop, 
#and they return a batch of examples (indexed and converted into tensors) at each iteration.

#BUCKET ITERATORS: special type of iterator that will return a batch of examples 
#where each example is of a similar length, minimizing the amount of padding per example.

train_iterator, valid_iterator, test_iterator = data.BucketIterator.splits(
  (train_data, valid_data, test_data), 
  batch_size = BATCH_SIZE,
  device = device, 
  shuffle = True,
  sort = False)

## Build the Model


All layers have their parameters initialized to random values, unless explicitly specified.

- **Embedding layer**: is used to transform the sparse one-hot vector into a dense embedding vector (dense as the dimensionality is a lot smaller and all the elements are real numbers). It is simply a single fully connected layer. 
As well as reducing the dimensionality of the input to the RNN, there is the theory that words with similar meaning are mapped close together in this dense vector space.

- **RNN**: takes in the dense vector and the previous hidden state $h_{t-1}$, which it uses to calculate the next hidden state, $h_t$.

- **Linear layer**: takes the final hidden state and feeds it through a fully connected layer, $f(h_T)$, transforming it to the correct output dimension.

In [19]:
class RNN(nn.Module): #the RNN class is a sub-class of nn.Module
  def __init__(self, input_dim, embedding_dim, hidden_dim, output_dim): #define the layers of the module 
      
    super().__init__()
    
    self.embedding = nn.Embedding(input_dim, embedding_dim) #embedding layer
    
    self.rnn = nn.RNN(embedding_dim, hidden_dim) #rnn
    
    self.fc = nn.Linear(hidden_dim, output_dim) #linear layer
      
  def forward(self, text): #called when feeding examples into the model

    #text = [sent len, batch size] (tensor)
    #text is a batch of senteces, each having each word converted into a one-hot vector 

    #the input batch is passed through the embedding layer to get `embedded`, 
    #which gives us a dense vector representation of our sentences.
    embedded = self.embedding(text) 
    
    #embedded = [sent len, batch size, emb dim] (tensor)
    
    output, hidden = self.rnn(embedded)
    
    #output = [sent len, batch size, hid dim] (tensor)
    #output is the concatenation of the hidden state from every time step

    #hidden = [1, batch size, hid dim] (tensor)
    #hidden is simply the final hidden state
    
    #assert statement udse to verify if output is the concatenation of the 
    #hidden state from every time step and hidden is the final hidden state
    assert torch.equal(output[-1,:,:], hidden.squeeze(0))
    #squeeze is used to remove a dimension of size 1
    
    #the last hidden state, hidden, is fed through the linear layer to produce a prediction
    return self.fc(hidden.squeeze(0))

The tensor `text` should have another dimension due to the one-hot vectors, however PyTorch conveniently stores a one-hot vector as it's index value, i.e. the tensor representing a sentence is just a tensor of the indexes for each token in that sentence. The act of converting a list of tokens into a list of indexes is commonly called *numericalizing*.

In [20]:
#create an instance of the RNN class
INPUT_DIM = len(TEXT.vocab) #input dimension: dimension of the one-hot vectors (equal to the vocabulary size)

model = RNN(INPUT_DIM, EMBEDDING_DIM, HIDDEN_DIM, OUTPUT_DIM)

- **Input dimension**: dimension of the one-hot vectors (equal to the vocabulary size).
- **Embedding dimension**: size of the dense word vectors. This is usually around 50-250 dimensions, but depends on the size of the vocabulary.
- **Hidden dimension**: size of the hidden states. This is usually around 100-500 dimensions, but also depends on factors such as on the vocabulary size, the size of the dense vectors and the complexity of the task.
- **Output dimension**: usually the number of classes. However in the case of only 2 classes the output value is between 0 and 1 and thus can be 1-dimensional, i.e. a single scalar real number.

In [21]:
def count_parameters(model):
  """ 
  Function that tells how many trainable parameters the model has 
  so we can compare the number of parameters across different models.
  """
  return sum(p.numel() for p in model.parameters() if p.requires_grad)

print(f'The model has {count_parameters(model):,} trainable parameters')

The model has 1,091,105 trainable parameters


## Train the Model

In [22]:
#OPTIMIZER: algorithm we use to update the parameters of the module. 
#chosen optimizer: stochastic gradient descent (SGD) 
optimizer = optim.SGD(model.parameters(), lr=1e-3)

#CRITERION: loss function
#chosen loss function: binary cross entropy with logits
weights = [1 - (len(dataset[i][dataset[i]["label"]==0]) / dataset[i].shape[0])]
weight = torch.FloatTensor(weights)
criterion = nn.BCEWithLogitsLoss(weight=weight)

#place the model and the criterion on the GPU (if we have one)
model = model.to(device)
criterion = criterion.to(device)

The model currently outputs an unbound real number. As the labels are either 0 or 1, you want to restrict the predictions to a number between 0 and 1. This can be done using the _sigmoid_ or _logit_ functions. 

Is it possible to use the bound scalar to calculate the loss using binary cross entropy. 

The `BCEWithLogitsLoss` criterion carries out both the sigmoid and the binary cross entropy steps.

In [23]:
import numpy as np

def binary_metrics(preds, y):
  
  """
  Returns metrics per batch, i.e. if you get 8/10 right, this returns 0.8, NOT 8
  """

  #feed the prediction through a sigmoid layer, squashing the values in [0, 1], then round them to the nearest integer
  rounded_preds = torch.round(torch.sigmoid(preds)) 

  #calculate how many rounded predictions equal the actual labels and average it across the batch
  correct = (rounded_preds == y).float() #convert into float for division 
  acc = correct.sum() / len(correct)

  y_true = y.detach().numpy()
  y_pred = rounded_preds.detach().numpy()

  recall = recall_score(y_true, y_pred, average='binary', zero_division="warn")
  precision = precision_score(y_true, y_pred, average='binary', zero_division="warn")
  f2 = fbeta_score(y_true, y_pred, average='binary', beta=2, zero_division="warn")
  f3 = fbeta_score(y_true, y_pred, average='binary', beta=3, zero_division="warn")

  #print(y)
  #sumpos = np.sum(rounded_preds.detach().numpy() == 1)

  return acc, recall, precision, f2, f3

In [24]:
def train(model, iterator, optimizer, criterion): #iterates over all examples, one batch at a time
  
  epoch_loss = 0
  epoch_acc = 0
  epoch_recall = 0
  epoch_precision = 0
  epoch_f2 = 0
  epoch_f3 = 0
  
  model.train() #put the model in "training mode", which turns on dropout and batch normalization
  #although we aren't using them in this model, it's good practice to include it
  
  for batch in iterator: #for each batch
    
    optimizer.zero_grad() #we first zero the gradients

    #feed the batch of sentences, batch.text, into the model 
    predictions = model(batch.text).squeeze(1)
    #squeeze is needed as the predictions are initially size [batch size, 1]
    #and we need to remove the dimension of size 1, as PyTorch expects the 
    #predictions input to our criterion function to be of size [batch size]    
    
    #loss and accuracy are then calculated using the predictions and the labels, batch.label, 
    #with the loss being averaged over all examples in the batch    
    
    #computation of loss 
    loss = criterion(predictions, batch.label)
    #criterion expects both input to be FloatTensors
    #that's wht in the label field we set dtype=torch.float 
    
    #computation of accuracy
    acc, recall, precision, f2, f3 = binary_metrics(predictions.cpu(), batch.label.cpu())

    loss.backward() #calculate the gradient of each parameter
    
    optimizer.step() #update the parameters using the gradients and optimizer algorithm

    #the loss and accuracy are accumulated across the epoch, 
    #the .item() method is used to extract a scalar from a tensor which only contains a single value
    epoch_loss += loss.item()
    epoch_acc += acc.item()
    epoch_recall += recall.item()
    epoch_precision += precision.item()
    epoch_f2 += f2.item()
    epoch_f3 += f3.item()

  #return the loss and accuracy, averaged across the epochs
  #the len of an iterator is the number of batches in the iterator
    
  return epoch_loss / len(iterator), epoch_acc / len(iterator), epoch_recall / len(iterator), epoch_precision / len(iterator), epoch_f2 / len(iterator), epoch_f3 / len(iterator)

**Train method notes**: 

- Each parameter in a model has a `grad` attribute which stores the gradient calculated by the `criterion`. PyTorch does not automatically remove (or "zero") the gradients calculated from the last gradient calculation, so they must be manually zeroed.

- You do not need to do `model.forward(batch.text)`, simply calling the model works.

- When initializing the `LABEL` field, we set `dtype=torch.float`. This is because TorchText sets tensors to be `LongTensor`s by default, however our criterion expects both inputs to be `FloatTensor`s. 
The alternative method of doing this would be to do the conversion inside the `train` function by passing `batch.label.float()` instad of `batch.label` to the criterion. 

In [25]:
def evaluate(model, iterator, criterion): #similar to train, without the the update of the parameters
  
  epoch_loss = 0
  epoch_acc = 0
  epoch_recall = 0
  epoch_precision = 0
  epoch_f2 = 0
  epoch_f3 = 0
  
  model.eval() #puts the model in "evaluation mode", which turns off dropout and batch normalization
  #we are not using them in this model, but it is good practice to include them
  
  with torch.no_grad(): #in order to not calculate gradients

    for batch in iterator:

      predictions = model(batch.text).squeeze(1)
      
      loss = criterion(predictions, batch.label)
      
      acc, recall, precision, f2, f3 = binary_metrics(predictions.cpu(), batch.label.cpu())
      
      epoch_loss += loss.item()
      epoch_acc += acc.item()
      epoch_recall += recall.item()
      epoch_precision += precision.item()
      epoch_f2 += f2.item()
      epoch_f3 += f3.item()
      
  return epoch_loss / len(iterator), epoch_acc / len(iterator), epoch_recall / len(iterator), epoch_precision / len(iterator), epoch_f2 / len(iterator), epoch_f3 / len(iterator)

No gradients are calculated on PyTorch operations inside the `with no_grad()` block. This causes less memory to be used and speeds up computation.

The rest of the function is the same as `train`, with the removal of `optimizer.zero_grad()`, `loss.backward()` and `optimizer.step()`, as we do not update the model's parameters when evaluating.

In [26]:
import time

def epoch_time(start_time, end_time):
  """
  Returns how long an epoch takes, in order to compare training times between models
  """
  elapsed_time = end_time - start_time
  elapsed_mins = int(elapsed_time / 60)
  elapsed_secs = int(elapsed_time - (elapsed_mins * 60))
  return elapsed_mins, elapsed_secs

We then train the model through multiple epochs, an epoch being a complete pass through all examples in the training and validation sets.

At each epoch, if the validation loss is the best we have seen so far, we'll save the parameters of the model and then after training has finished we'll use that model on the test set.

In [27]:
#train the model through multiple epochs
#EPOCH: complete pass through all examples in the training and validation sets

N_EPOCHS = 50

best_valid_loss = float('inf')

for epoch in range(N_EPOCHS):

    start_time = time.time()
    
    # train_loss, train_acc, train_recall, train_f2, train_f3 = train(model, train_iterator, optimizer, criterion)
    # valid_loss, valid_acc, valid_recall, valid_f2, valid_f3 = evaluate(model, valid_iterator, criterion)
    train_loss, train_acc, train_recall, train_precision, train_f2, train_f3= train(model, train_iterator, optimizer, criterion)
    valid_loss, valid_acc, valid_recall, valid_precision, valid_f2, valid_f3 = evaluate(model, valid_iterator, criterion)
    
    end_time = time.time()

    epoch_mins, epoch_secs = epoch_time(start_time, end_time)
    
    #at each epoch, if the validation loss is the best seen so far,
    #we'll save the parameters of the model and then, after training has finished,
    #we'll use that model on the test set
    if valid_loss < best_valid_loss:
        best_valid_loss = valid_loss
        torch.save(model.state_dict(), 'tut1-model.pt')
    
    print(f'Epoch: {epoch+1:02} | Epoch Time: {epoch_mins}m {epoch_secs}s')
    print(f'\tTrain Loss: {train_loss:.3f} | Train Acc: {train_acc*100:.2f}% | Train Recall: {train_recall*100:.2f}% | Train Precision: {train_precision*100:.2f}% | Train F2: {train_f2*100:.2f}% | Train F3: {train_f3*100:.2f}%')
    print(f'\t Val. Loss: {valid_loss:.3f} |  Val. Acc: {valid_acc*100:.2f}% | Val. Recall: {valid_recall*100:.2f}% | Val. Precision: {valid_precision*100:.2f}% | Val. F2: {valid_f2*100:.2f}% | Val. F3: {valid_f3*100:.2f}%')

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Epoch: 01 | Epoch Time: 0m 1s
	Train Loss: 0.008 | Train Acc: 98.56% | Train Recall: 0.00% | Train Precision: 0.00% | Train F2: 0.00% | Train F3: 0.00%
	 Val. Loss: 0.008 |  Val. Acc: 98.40% | Val. Recall: 0.00% | Val. Precision: 0.00% | Val. F2: 0.00% | Val. F3: 0.00%


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Epoch: 02 | Epoch Time: 0m 2s
	Train Loss: 0.008 | Train Acc: 98.48% | Train Recall: 0.00% | Train Precision: 0.00% | Train F2: 0.00% | Train F3: 0.00%
	 Val. Loss: 0.008 |  Val. Acc: 98.40% | Val. Recall: 0.00% | Val. Precision: 0.00% | Val. F2: 0.00% | Val. F3: 0.00%


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Epoch: 03 | Epoch Time: 0m 1s
	Train Loss: 0.008 | Train Acc: 98.48% | Train Recall: 0.00% | Train Precision: 0.00% | Train F2: 0.00% | Train F3: 0.00%
	 Val. Loss: 0.008 |  Val. Acc: 98.30% | Val. Recall: 0.00% | Val. Precision: 0.00% | Val. F2: 0.00% | Val. F3: 0.00%


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Epoch: 04 | Epoch Time: 0m 1s
	Train Loss: 0.008 | Train Acc: 98.56% | Train Recall: 0.00% | Train Precision: 0.00% | Train F2: 0.00% | Train F3: 0.00%
	 Val. Loss: 0.008 |  Val. Acc: 98.30% | Val. Recall: 0.00% | Val. Precision: 0.00% | Val. F2: 0.00% | Val. F3: 0.00%


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Epoch: 05 | Epoch Time: 0m 1s
	Train Loss: 0.008 | Train Acc: 98.48% | Train Recall: 0.00% | Train Precision: 0.00% | Train F2: 0.00% | Train F3: 0.00%
	 Val. Loss: 0.008 |  Val. Acc: 98.40% | Val. Recall: 0.00% | Val. Precision: 0.00% | Val. F2: 0.00% | Val. F3: 0.00%


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Epoch: 06 | Epoch Time: 0m 1s
	Train Loss: 0.008 | Train Acc: 98.48% | Train Recall: 0.00% | Train Precision: 0.00% | Train F2: 0.00% | Train F3: 0.00%
	 Val. Loss: 0.008 |  Val. Acc: 98.40% | Val. Recall: 0.00% | Val. Precision: 0.00% | Val. F2: 0.00% | Val. F3: 0.00%


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Epoch: 07 | Epoch Time: 0m 2s
	Train Loss: 0.008 | Train Acc: 98.48% | Train Recall: 0.00% | Train Precision: 0.00% | Train F2: 0.00% | Train F3: 0.00%
	 Val. Loss: 0.008 |  Val. Acc: 98.40% | Val. Recall: 0.00% | Val. Precision: 0.00% | Val. F2: 0.00% | Val. F3: 0.00%


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Epoch: 08 | Epoch Time: 0m 1s
	Train Loss: 0.008 | Train Acc: 98.48% | Train Recall: 0.00% | Train Precision: 0.00% | Train F2: 0.00% | Train F3: 0.00%
	 Val. Loss: 0.008 |  Val. Acc: 98.40% | Val. Recall: 0.00% | Val. Precision: 0.00% | Val. F2: 0.00% | Val. F3: 0.00%


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Epoch: 09 | Epoch Time: 0m 1s
	Train Loss: 0.008 | Train Acc: 98.48% | Train Recall: 0.00% | Train Precision: 0.00% | Train F2: 0.00% | Train F3: 0.00%
	 Val. Loss: 0.008 |  Val. Acc: 98.30% | Val. Recall: 0.00% | Val. Precision: 0.00% | Val. F2: 0.00% | Val. F3: 0.00%


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Epoch: 10 | Epoch Time: 0m 1s
	Train Loss: 0.008 | Train Acc: 98.48% | Train Recall: 0.00% | Train Precision: 0.00% | Train F2: 0.00% | Train F3: 0.00%
	 Val. Loss: 0.008 |  Val. Acc: 98.40% | Val. Recall: 0.00% | Val. Precision: 0.00% | Val. F2: 0.00% | Val. F3: 0.00%


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Epoch: 11 | Epoch Time: 0m 1s
	Train Loss: 0.008 | Train Acc: 98.56% | Train Recall: 0.00% | Train Precision: 0.00% | Train F2: 0.00% | Train F3: 0.00%
	 Val. Loss: 0.008 |  Val. Acc: 98.40% | Val. Recall: 0.00% | Val. Precision: 0.00% | Val. F2: 0.00% | Val. F3: 0.00%


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Epoch: 12 | Epoch Time: 0m 1s
	Train Loss: 0.008 | Train Acc: 98.40% | Train Recall: 0.00% | Train Precision: 0.00% | Train F2: 0.00% | Train F3: 0.00%
	 Val. Loss: 0.008 |  Val. Acc: 98.40% | Val. Recall: 0.00% | Val. Precision: 0.00% | Val. F2: 0.00% | Val. F3: 0.00%


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Epoch: 13 | Epoch Time: 0m 1s
	Train Loss: 0.008 | Train Acc: 98.48% | Train Recall: 0.00% | Train Precision: 0.00% | Train F2: 0.00% | Train F3: 0.00%
	 Val. Loss: 0.008 |  Val. Acc: 98.50% | Val. Recall: 0.00% | Val. Precision: 0.00% | Val. F2: 0.00% | Val. F3: 0.00%


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Epoch: 14 | Epoch Time: 0m 1s
	Train Loss: 0.008 | Train Acc: 98.40% | Train Recall: 0.00% | Train Precision: 0.00% | Train F2: 0.00% | Train F3: 0.00%
	 Val. Loss: 0.008 |  Val. Acc: 98.40% | Val. Recall: 0.00% | Val. Precision: 0.00% | Val. F2: 0.00% | Val. F3: 0.00%


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Epoch: 15 | Epoch Time: 0m 1s
	Train Loss: 0.008 | Train Acc: 98.48% | Train Recall: 0.00% | Train Precision: 0.00% | Train F2: 0.00% | Train F3: 0.00%
	 Val. Loss: 0.008 |  Val. Acc: 98.30% | Val. Recall: 0.00% | Val. Precision: 0.00% | Val. F2: 0.00% | Val. F3: 0.00%


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Epoch: 16 | Epoch Time: 0m 1s
	Train Loss: 0.008 | Train Acc: 98.48% | Train Recall: 0.00% | Train Precision: 0.00% | Train F2: 0.00% | Train F3: 0.00%
	 Val. Loss: 0.008 |  Val. Acc: 98.40% | Val. Recall: 0.00% | Val. Precision: 0.00% | Val. F2: 0.00% | Val. F3: 0.00%


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Epoch: 17 | Epoch Time: 0m 1s
	Train Loss: 0.008 | Train Acc: 98.48% | Train Recall: 0.00% | Train Precision: 0.00% | Train F2: 0.00% | Train F3: 0.00%
	 Val. Loss: 0.008 |  Val. Acc: 98.40% | Val. Recall: 0.00% | Val. Precision: 0.00% | Val. F2: 0.00% | Val. F3: 0.00%


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Epoch: 18 | Epoch Time: 0m 1s
	Train Loss: 0.008 | Train Acc: 98.48% | Train Recall: 0.00% | Train Precision: 0.00% | Train F2: 0.00% | Train F3: 0.00%
	 Val. Loss: 0.008 |  Val. Acc: 98.30% | Val. Recall: 0.00% | Val. Precision: 0.00% | Val. F2: 0.00% | Val. F3: 0.00%


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Epoch: 19 | Epoch Time: 0m 1s
	Train Loss: 0.008 | Train Acc: 98.48% | Train Recall: 0.00% | Train Precision: 0.00% | Train F2: 0.00% | Train F3: 0.00%
	 Val. Loss: 0.008 |  Val. Acc: 98.20% | Val. Recall: 0.00% | Val. Precision: 0.00% | Val. F2: 0.00% | Val. F3: 0.00%


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Epoch: 20 | Epoch Time: 0m 1s
	Train Loss: 0.008 | Train Acc: 98.56% | Train Recall: 0.00% | Train Precision: 0.00% | Train F2: 0.00% | Train F3: 0.00%
	 Val. Loss: 0.008 |  Val. Acc: 98.30% | Val. Recall: 0.00% | Val. Precision: 0.00% | Val. F2: 0.00% | Val. F3: 0.00%


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, "true nor predicted", "F-score is", len(true_sum))
  _warn_prf(average, "true nor predicted", "F-score is", len(true_sum))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Epoch: 21 | Epoch Time: 0m 1s
	Train Loss: 0.008 | Train Acc: 98.56% | Train Recall: 0.00% | Train Precision: 0.00% | Train F2: 0.00% | Train F3: 0.00%
	 Val. Loss: 0.008 |  Val. Acc: 98.40% | Val. Recall: 0.00% | Val. Precision: 0.00% | Val. F2: 0.00% | Val. F3: 0.00%


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Epoch: 22 | Epoch Time: 0m 1s
	Train Loss: 0.008 | Train Acc: 98.40% | Train Recall: 0.00% | Train Precision: 0.00% | Train F2: 0.00% | Train F3: 0.00%
	 Val. Loss: 0.008 |  Val. Acc: 98.20% | Val. Recall: 0.00% | Val. Precision: 0.00% | Val. F2: 0.00% | Val. F3: 0.00%


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Epoch: 23 | Epoch Time: 0m 1s
	Train Loss: 0.008 | Train Acc: 98.48% | Train Recall: 0.00% | Train Precision: 0.00% | Train F2: 0.00% | Train F3: 0.00%
	 Val. Loss: 0.008 |  Val. Acc: 98.40% | Val. Recall: 0.00% | Val. Precision: 0.00% | Val. F2: 0.00% | Val. F3: 0.00%


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, "true nor predicted", "F-score is", len(true_sum))
  _warn_prf(average, "true nor predicted", "F-score is", len(true_sum))


Epoch: 24 | Epoch Time: 0m 1s
	Train Loss: 0.008 | Train Acc: 98.39% | Train Recall: 0.00% | Train Precision: 0.00% | Train F2: 0.00% | Train F3: 0.00%
	 Val. Loss: 0.008 |  Val. Acc: 98.30% | Val. Recall: 0.00% | Val. Precision: 0.00% | Val. F2: 0.00% | Val. F3: 0.00%


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Epoch: 25 | Epoch Time: 0m 1s
	Train Loss: 0.008 | Train Acc: 98.40% | Train Recall: 0.00% | Train Precision: 0.00% | Train F2: 0.00% | Train F3: 0.00%
	 Val. Loss: 0.008 |  Val. Acc: 98.30% | Val. Recall: 0.00% | Val. Precision: 0.00% | Val. F2: 0.00% | Val. F3: 0.00%


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Epoch: 26 | Epoch Time: 0m 1s
	Train Loss: 0.008 | Train Acc: 98.47% | Train Recall: 0.00% | Train Precision: 0.00% | Train F2: 0.00% | Train F3: 0.00%
	 Val. Loss: 0.008 |  Val. Acc: 98.20% | Val. Recall: 0.00% | Val. Precision: 0.00% | Val. F2: 0.00% | Val. F3: 0.00%


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Epoch: 27 | Epoch Time: 0m 1s
	Train Loss: 0.008 | Train Acc: 98.48% | Train Recall: 0.00% | Train Precision: 0.00% | Train F2: 0.00% | Train F3: 0.00%
	 Val. Loss: 0.008 |  Val. Acc: 98.20% | Val. Recall: 0.00% | Val. Precision: 0.00% | Val. F2: 0.00% | Val. F3: 0.00%


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Epoch: 28 | Epoch Time: 0m 1s
	Train Loss: 0.008 | Train Acc: 98.56% | Train Recall: 0.00% | Train Precision: 0.00% | Train F2: 0.00% | Train F3: 0.00%
	 Val. Loss: 0.008 |  Val. Acc: 98.20% | Val. Recall: 0.00% | Val. Precision: 0.00% | Val. F2: 0.00% | Val. F3: 0.00%


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Epoch: 29 | Epoch Time: 0m 1s
	Train Loss: 0.008 | Train Acc: 98.48% | Train Recall: 0.00% | Train Precision: 0.00% | Train F2: 0.00% | Train F3: 0.00%
	 Val. Loss: 0.008 |  Val. Acc: 98.40% | Val. Recall: 0.00% | Val. Precision: 0.00% | Val. F2: 0.00% | Val. F3: 0.00%


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Epoch: 30 | Epoch Time: 0m 1s
	Train Loss: 0.008 | Train Acc: 98.40% | Train Recall: 0.00% | Train Precision: 0.00% | Train F2: 0.00% | Train F3: 0.00%
	 Val. Loss: 0.008 |  Val. Acc: 98.40% | Val. Recall: 0.00% | Val. Precision: 0.00% | Val. F2: 0.00% | Val. F3: 0.00%


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Epoch: 31 | Epoch Time: 0m 1s
	Train Loss: 0.008 | Train Acc: 98.40% | Train Recall: 0.00% | Train Precision: 0.00% | Train F2: 0.00% | Train F3: 0.00%
	 Val. Loss: 0.008 |  Val. Acc: 98.30% | Val. Recall: 0.00% | Val. Precision: 0.00% | Val. F2: 0.00% | Val. F3: 0.00%


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Epoch: 32 | Epoch Time: 0m 1s
	Train Loss: 0.008 | Train Acc: 98.48% | Train Recall: 0.00% | Train Precision: 0.00% | Train F2: 0.00% | Train F3: 0.00%
	 Val. Loss: 0.008 |  Val. Acc: 98.40% | Val. Recall: 0.00% | Val. Precision: 0.00% | Val. F2: 0.00% | Val. F3: 0.00%


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Epoch: 33 | Epoch Time: 0m 1s
	Train Loss: 0.008 | Train Acc: 98.48% | Train Recall: 0.00% | Train Precision: 0.00% | Train F2: 0.00% | Train F3: 0.00%
	 Val. Loss: 0.008 |  Val. Acc: 98.40% | Val. Recall: 0.00% | Val. Precision: 0.00% | Val. F2: 0.00% | Val. F3: 0.00%


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Epoch: 34 | Epoch Time: 0m 1s
	Train Loss: 0.008 | Train Acc: 98.48% | Train Recall: 0.00% | Train Precision: 0.00% | Train F2: 0.00% | Train F3: 0.00%
	 Val. Loss: 0.008 |  Val. Acc: 98.40% | Val. Recall: 0.00% | Val. Precision: 0.00% | Val. F2: 0.00% | Val. F3: 0.00%


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Epoch: 35 | Epoch Time: 0m 1s
	Train Loss: 0.008 | Train Acc: 98.48% | Train Recall: 0.00% | Train Precision: 0.00% | Train F2: 0.00% | Train F3: 0.00%
	 Val. Loss: 0.008 |  Val. Acc: 98.40% | Val. Recall: 0.00% | Val. Precision: 0.00% | Val. F2: 0.00% | Val. F3: 0.00%


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, "true nor predicted", "F-score is", len(true_sum))
  _warn_prf(average, "true nor predicted", "F-score is", len(true_sum))


Epoch: 36 | Epoch Time: 0m 1s
	Train Loss: 0.008 | Train Acc: 98.56% | Train Recall: 0.00% | Train Precision: 0.00% | Train F2: 0.00% | Train F3: 0.00%
	 Val. Loss: 0.008 |  Val. Acc: 98.50% | Val. Recall: 0.00% | Val. Precision: 0.00% | Val. F2: 0.00% | Val. F3: 0.00%


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Epoch: 37 | Epoch Time: 0m 1s
	Train Loss: 0.008 | Train Acc: 98.48% | Train Recall: 0.00% | Train Precision: 0.00% | Train F2: 0.00% | Train F3: 0.00%
	 Val. Loss: 0.008 |  Val. Acc: 98.40% | Val. Recall: 0.00% | Val. Precision: 0.00% | Val. F2: 0.00% | Val. F3: 0.00%


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Epoch: 38 | Epoch Time: 0m 1s
	Train Loss: 0.008 | Train Acc: 98.48% | Train Recall: 0.00% | Train Precision: 0.00% | Train F2: 0.00% | Train F3: 0.00%
	 Val. Loss: 0.008 |  Val. Acc: 98.40% | Val. Recall: 0.00% | Val. Precision: 0.00% | Val. F2: 0.00% | Val. F3: 0.00%


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Epoch: 39 | Epoch Time: 0m 1s
	Train Loss: 0.008 | Train Acc: 98.48% | Train Recall: 0.00% | Train Precision: 0.00% | Train F2: 0.00% | Train F3: 0.00%
	 Val. Loss: 0.008 |  Val. Acc: 98.30% | Val. Recall: 0.00% | Val. Precision: 0.00% | Val. F2: 0.00% | Val. F3: 0.00%


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Epoch: 40 | Epoch Time: 0m 1s
	Train Loss: 0.008 | Train Acc: 98.56% | Train Recall: 0.00% | Train Precision: 0.00% | Train F2: 0.00% | Train F3: 0.00%
	 Val. Loss: 0.008 |  Val. Acc: 98.30% | Val. Recall: 0.00% | Val. Precision: 0.00% | Val. F2: 0.00% | Val. F3: 0.00%


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Epoch: 41 | Epoch Time: 0m 1s
	Train Loss: 0.008 | Train Acc: 98.48% | Train Recall: 0.00% | Train Precision: 0.00% | Train F2: 0.00% | Train F3: 0.00%
	 Val. Loss: 0.008 |  Val. Acc: 98.40% | Val. Recall: 0.00% | Val. Precision: 0.00% | Val. F2: 0.00% | Val. F3: 0.00%


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Epoch: 42 | Epoch Time: 0m 1s
	Train Loss: 0.008 | Train Acc: 98.48% | Train Recall: 0.00% | Train Precision: 0.00% | Train F2: 0.00% | Train F3: 0.00%
	 Val. Loss: 0.008 |  Val. Acc: 98.50% | Val. Recall: 0.00% | Val. Precision: 0.00% | Val. F2: 0.00% | Val. F3: 0.00%


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Epoch: 43 | Epoch Time: 0m 1s
	Train Loss: 0.008 | Train Acc: 98.40% | Train Recall: 0.00% | Train Precision: 0.00% | Train F2: 0.00% | Train F3: 0.00%
	 Val. Loss: 0.007 |  Val. Acc: 98.40% | Val. Recall: 0.00% | Val. Precision: 0.00% | Val. F2: 0.00% | Val. F3: 0.00%


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, "true nor predicted", "F-score is", len(true_sum))
  _warn_prf(average, "true nor predicted", "F-score is", len(true_sum))


Epoch: 44 | Epoch Time: 0m 1s
	Train Loss: 0.007 | Train Acc: 98.56% | Train Recall: 0.00% | Train Precision: 0.00% | Train F2: 0.00% | Train F3: 0.00%
	 Val. Loss: 0.007 |  Val. Acc: 98.40% | Val. Recall: 0.00% | Val. Precision: 0.00% | Val. F2: 0.00% | Val. F3: 0.00%


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Epoch: 45 | Epoch Time: 0m 1s
	Train Loss: 0.007 | Train Acc: 98.48% | Train Recall: 0.00% | Train Precision: 0.00% | Train F2: 0.00% | Train F3: 0.00%
	 Val. Loss: 0.007 |  Val. Acc: 98.30% | Val. Recall: 0.00% | Val. Precision: 0.00% | Val. F2: 0.00% | Val. F3: 0.00%


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, "true nor predicted", "F-score is", len(true_sum))
  _warn_prf(average, "true nor predicted", "F-score is", len(true_sum))


Epoch: 46 | Epoch Time: 0m 1s
	Train Loss: 0.007 | Train Acc: 98.48% | Train Recall: 0.00% | Train Precision: 0.00% | Train F2: 0.00% | Train F3: 0.00%
	 Val. Loss: 0.007 |  Val. Acc: 98.40% | Val. Recall: 0.00% | Val. Precision: 0.00% | Val. F2: 0.00% | Val. F3: 0.00%


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, "true nor predicted", "F-score is", len(true_sum))
  _warn_prf(average, "true nor predicted", "F-score is", len(true_sum))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Epoch: 47 | Epoch Time: 0m 1s
	Train Loss: 0.007 | Train Acc: 98.56% | Train Recall: 0.00% | Train Precision: 0.00% | Train F2: 0.00% | Train F3: 0.00%
	 Val. Loss: 0.007 |  Val. Acc: 98.40% | Val. Recall: 0.00% | Val. Precision: 0.00% | Val. F2: 0.00% | Val. F3: 0.00%


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Epoch: 48 | Epoch Time: 0m 1s
	Train Loss: 0.007 | Train Acc: 98.56% | Train Recall: 0.00% | Train Precision: 0.00% | Train F2: 0.00% | Train F3: 0.00%
	 Val. Loss: 0.007 |  Val. Acc: 98.40% | Val. Recall: 0.00% | Val. Precision: 0.00% | Val. F2: 0.00% | Val. F3: 0.00%


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Epoch: 49 | Epoch Time: 0m 1s
	Train Loss: 0.007 | Train Acc: 98.47% | Train Recall: 0.00% | Train Precision: 0.00% | Train F2: 0.00% | Train F3: 0.00%
	 Val. Loss: 0.007 |  Val. Acc: 98.40% | Val. Recall: 0.00% | Val. Precision: 0.00% | Val. F2: 0.00% | Val. F3: 0.00%


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Epoch: 50 | Epoch Time: 0m 1s
	Train Loss: 0.007 | Train Acc: 98.48% | Train Recall: 0.00% | Train Precision: 0.00% | Train F2: 0.00% | Train F3: 0.00%
	 Val. Loss: 0.007 |  Val. Acc: 98.40% | Val. Recall: 0.00% | Val. Precision: 0.00% | Val. F2: 0.00% | Val. F3: 0.00%


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [28]:
#test loss and accuracy (using parameters that gave the best validation loss)

model.load_state_dict(torch.load('tut1-model.pt'))

test_loss, test_acc, test_recall, test_precision, test_f2, test_f3 = evaluate(model, test_iterator, criterion)

print(f'Test Loss: {test_loss:.3f} | Test Acc: {test_acc*100:.2f}% | Test Recall: {test_recall*100:.2f}% | Test Precision: {test_precision*100:.2f}% | Test F2: {test_f2*100:.2f}% | Test F3: {test_f3*100:.2f}%')

Test Loss: 0.007 | Test Acc: 98.39% | Test Recall: 0.00% | Test Precision: 0.00% | Test F2: 0.00% | Test F3: 0.00%


  _warn_prf(average, modifier, msg_start, len(result))
