In [2]:
! pip install transformers
!pip install pycaret

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting transformers
  Downloading transformers-4.24.0-py3-none-any.whl (5.5 MB)
[K     |████████████████████████████████| 5.5 MB 3.3 MB/s 
[?25hCollecting tokenizers!=0.11.3,<0.14,>=0.11.1
  Downloading tokenizers-0.13.2-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (7.6 MB)
[K     |████████████████████████████████| 7.6 MB 44.1 MB/s 
Collecting huggingface-hub<1.0,>=0.10.0
  Downloading huggingface_hub-0.10.1-py3-none-any.whl (163 kB)
[K     |████████████████████████████████| 163 kB 50.7 MB/s 
Installing collected packages: tokenizers, huggingface-hub, transformers
Successfully installed huggingface-hub-0.10.1 tokenizers-0.13.2 transformers-4.24.0
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting pycaret
  Downloading pycaret-2.3.10-py3-none-any.whl (320 kB)
[K     |████████████████████████████████| 320 kB 4.

In [3]:
import numpy as np
import pandas as pd
import pycaret
import transformers
from transformers import AutoModel, BertTokenizerFast
import matplotlib.pyplot as plt
from sklearn.metrics import plot_confusion_matrix
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
import torch
import torch.nn as nn
from tqdm import tqdm
from torch.utils.data import TensorDataset, DataLoader, RandomSampler, SequentialSampler
# specify GPU
device = torch.device("cuda")

In [4]:
!pip install datasets 
from datasets import load_dataset
dataset = load_dataset("liar")

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting datasets
  Downloading datasets-2.6.1-py3-none-any.whl (441 kB)
[K     |████████████████████████████████| 441 kB 4.7 MB/s 
Collecting dill<0.3.6
  Downloading dill-0.3.5.1-py2.py3-none-any.whl (95 kB)
[K     |████████████████████████████████| 95 kB 4.9 MB/s 
Collecting responses<0.19
  Downloading responses-0.18.0-py3-none-any.whl (38 kB)
Collecting multiprocess
  Downloading multiprocess-0.70.14-py37-none-any.whl (115 kB)
[K     |████████████████████████████████| 115 kB 62.2 MB/s 
Collecting xxhash
  Downloading xxhash-3.1.0-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (212 kB)
[K     |████████████████████████████████| 212 kB 59.1 MB/s 
Collecting multiprocess
  Downloading multiprocess-0.70.13-py37-none-any.whl (115 kB)
[K     |████████████████████████████████| 115 kB 65.9 MB/s 
Installing collected packages: dill, xxhash, responses, multiprocess, datasets
 

Downloading builder script:   0%|          | 0.00/6.41k [00:00<?, ?B/s]

Downloading metadata:   0%|          | 0.00/4.03k [00:00<?, ?B/s]

Downloading readme:   0%|          | 0.00/5.15k [00:00<?, ?B/s]

Downloading and preparing dataset liar/default to /root/.cache/huggingface/datasets/liar/default/1.0.0/479463e757b7991eed50ffa7504d7788d6218631a484442e2098dabbf3b44514...


Downloading data:   0%|          | 0.00/1.01M [00:00<?, ?B/s]

Generating train split:   0%|          | 0/10269 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/1283 [00:00<?, ? examples/s]

Generating validation split:   0%|          | 0/1284 [00:00<?, ? examples/s]

Dataset liar downloaded and prepared to /root/.cache/huggingface/datasets/liar/default/1.0.0/479463e757b7991eed50ffa7504d7788d6218631a484442e2098dabbf3b44514. Subsequent calls will reuse this data.


  0%|          | 0/3 [00:00<?, ?it/s]

In [5]:
print(dataset)

DatasetDict({
    train: Dataset({
        features: ['id', 'label', 'statement', 'subject', 'speaker', 'job_title', 'state_info', 'party_affiliation', 'barely_true_counts', 'false_counts', 'half_true_counts', 'mostly_true_counts', 'pants_on_fire_counts', 'context'],
        num_rows: 10269
    })
    test: Dataset({
        features: ['id', 'label', 'statement', 'subject', 'speaker', 'job_title', 'state_info', 'party_affiliation', 'barely_true_counts', 'false_counts', 'half_true_counts', 'mostly_true_counts', 'pants_on_fire_counts', 'context'],
        num_rows: 1283
    })
    validation: Dataset({
        features: ['id', 'label', 'statement', 'subject', 'speaker', 'job_title', 'state_info', 'party_affiliation', 'barely_true_counts', 'false_counts', 'half_true_counts', 'mostly_true_counts', 'pants_on_fire_counts', 'context'],
        num_rows: 1284
    })
})


In [6]:
# loading pre-trained models
from transformers import RobertaForSequenceClassification,RobertaTokenizer
from transformers import RobertaTokenizerFast
                                                           

# RoBERTa
roberta = AutoModel.from_pretrained("roberta-base")
roberta_tokenizer = RobertaTokenizerFast.from_pretrained("roberta-base")


print(' Base models loaded')

Downloading:   0%|          | 0.00/481 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/501M [00:00<?, ?B/s]

Some weights of the model checkpoint at roberta-base were not used when initializing RobertaModel: ['lm_head.layer_norm.weight', 'lm_head.bias', 'lm_head.layer_norm.bias', 'lm_head.dense.bias', 'lm_head.decoder.weight', 'lm_head.dense.weight']
- This IS expected if you are initializing RobertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Downloading:   0%|          | 0.00/899k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/456k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

 Base models loaded


In [7]:

MAX_LENGHT = 100

# Tokenize and encode sequences in the train set
Text=dataset["train"]["statement"]
tokens_train = roberta_tokenizer.batch_encode_plus(
    Text,
    max_length = MAX_LENGHT,
    pad_to_max_length=True,
    truncation=True
)

# tokenize and encode sequences in the validation set
Text=dataset["validation"]["statement"]
tokens_val = roberta_tokenizer.batch_encode_plus(
    Text,
    max_length = MAX_LENGHT,
    pad_to_max_length=True,
    truncation=True
)

# tokenize and encode sequences in the test set
Text=dataset["test"]["statement"]
tokens_test = roberta_tokenizer.batch_encode_plus(
    Text,
    max_length = MAX_LENGHT,
    pad_to_max_length=True,
    truncation=True
)



In [8]:
# Convert lists to tensors
train_seq = torch.tensor(tokens_train['input_ids'])
train_mask = torch.tensor(tokens_train['attention_mask'])
train_y = torch.tensor(dataset["train"]["label"])

val_seq = torch.tensor(tokens_val['input_ids'])
val_mask = torch.tensor(tokens_val['attention_mask'])
val_y = torch.tensor(dataset["validation"]["label"])

test_seq = torch.tensor(tokens_test['input_ids'])
test_mask = torch.tensor(tokens_test['attention_mask'])
test_y = torch.tensor(dataset["test"]["label"])

In [9]:
print(len(val_seq),len(val_mask),len(val_y))
print(len(test_seq),len(test_mask),len(test_y))

print(len(train_seq),len(train_mask),len(train_y))
print(val_y)
print(len(dataset["validation"]["statement"]))
print()

1284 1284 1284
1283 1283 1283
10269 10269 10269
tensor([4, 5, 0,  ..., 3, 0, 4])
1284



In [10]:
# Data Loader structure definition
batch_size = 32                                               #define a batch size

train_data = TensorDataset(train_seq, train_mask, train_y)    # wrap tensors
train_sampler = RandomSampler(train_data)                     # sampler for sampling the data during training
train_dataloader = DataLoader(train_data, sampler=train_sampler, batch_size=batch_size)
                                                              # dataLoader for train set
val_data = TensorDataset(val_seq, val_mask, val_y)            # wrap tensors
val_sampler = SequentialSampler(val_data)                     # sampler for sampling the data during training
val_dataloader = DataLoader(val_data, sampler = val_sampler, batch_size=batch_size)
                                                              # dataLoader for validation set

In [11]:
# Freezing the parameters and defining trainable BERT structure
for param in roberta.parameters():
    param.requires_grad = False   # false here means gradient need not be computed

In [12]:
class RoBERT_Arch(nn.Module):
    def __init__(self, roberta):  
      super(RoBERT_Arch, self).__init__()
      self.roberta = roberta   
      self.dropout = nn.Dropout(0.1)            # dropout layer
      self.relu =  nn.ReLU()                    # relu activation function
      self.fc1 = nn.Linear(768,512)             # dense layer 1
      self.fc2 = nn.Linear(512,6)               # dense layer 2 (Output layer)
      self.softmax = nn.LogSoftmax(dim=1)       # softmax activation function
    def forward(self, sent_id, mask):           # define the forward pass  
      cls_hs = self.roberta(sent_id, attention_mask=mask)['pooler_output']
                                                # pass the inputs to the model
      x = self.fc1(cls_hs)
      x = self.relu(x)
      x = self.dropout(x)
      x = self.fc2(x)                           # output layer
      x = self.softmax(x)                       # apply softmax activation
      return x

model = RoBERT_Arch(roberta)
# Defining the hyperparameters (optimizer, weights of the classes and the epochs)
# Define the optimizer
from transformers import AdamW
optimizer = AdamW(model.parameters(),
                  lr = 1e-5)          # learning rate
# Define the loss function
cross_entropy  = nn.NLLLoss() 
# Number of training epochs
epochs = 5



In [13]:
# Defining training and evaluation functions
def train():  
  model.train()
  total_loss, total_accuracy = 0, 0
  
  for step,batch in enumerate(tqdm(train_dataloader)):                # iterate over batches
    if step % 50 == 0 and not step == 0:                        # progress update after every 50 batches.
      print('  Batch {:>5,}  of  {:>5,}.'.format(step, len(train_dataloader)))
    batch = [r for r in batch]                                  # push the batch to gpu
    sent_id, mask, labels = batch 
    model.zero_grad()                                           # clear previously calculated gradients
    preds = model(sent_id, mask)                                # get model predictions for current batch
    loss = cross_entropy(preds, labels)                         # compute loss between actual & predicted values
    total_loss = total_loss + loss.item()                       # add on to the total loss
    loss.backward()                                             # backward pass to calculate the gradients
    torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)     # clip gradients to 1.0. It helps in preventing exploding gradient problem
    optimizer.step()                                            # update parameters
    preds=preds.detach().cpu().numpy()                          # model predictions are stored on GPU. So, push it to CPU

  avg_loss = total_loss / len(train_dataloader)                 # compute training loss of the epoch  
                                                                # reshape predictions in form of (# samples, # classes)
  return avg_loss                                 # returns the loss and predictions

def evaluate():  
  print("\nEvaluating...")  
  model.eval()                                    # Deactivate dropout layers
  total_loss, total_accuracy = 0, 0  
  for step,batch in enumerate(tqdm(val_dataloader)):    # Iterate over batches  
    if step % 50 == 0 and not step == 0:          # Progress update every 50 batches.     
                                                  # Calculate elapsed time in minutes.
                                                  # Elapsed = format_time(time.time() - t0)
      print('  Batch {:>5,}  of  {:>5,}.'.format(step, len(val_dataloader)))
                                                  # Report progress
    batch = [t for t in batch]                    # Push the batch to GPU
    sent_id, mask, labels = batch
    with torch.no_grad():                         # Deactivate autograd
      preds = model(sent_id, mask)                # Model predictions
      loss = cross_entropy(preds,labels)          # Compute the validation loss between actual and predicted values
      total_loss = total_loss + loss.item()
      preds = preds.detach().cpu().numpy()
  avg_loss = total_loss / len(val_dataloader)         # compute the validation loss of the epoch
  return avg_loss

In [14]:
# Train and predict


best_valid_loss = float('inf')
train_losses=[]                   # empty lists to store training and validation loss of each epoch
valid_losses=[]

for epoch in tqdm(range(epochs)):     
    print('\n Epoch {:} / {:}'.format(epoch + 1, epochs))     
    train_loss = train()                       # train model
    valid_loss = evaluate()                    # evaluate model
    if valid_loss < best_valid_loss:              # save the best model
        best_valid_loss = valid_loss
        torch.save(model.state_dict(), 'fixed_weights_roberta.pt')
    train_losses.append(train_loss)               # append training and validation loss
    valid_losses.append(valid_loss)
    
    print(f'\nTraining Loss: {train_loss:.3f}')
    print(f'Validation Loss: {valid_loss:.3f}')

  0%|          | 0/5 [00:00<?, ?it/s]


 Epoch 1 / 5



  0%|          | 0/321 [00:00<?, ?it/s][A
  0%|          | 1/321 [00:12<1:07:49, 12.72s/it][A
  1%|          | 2/321 [00:26<1:11:58, 13.54s/it][A
  1%|          | 3/321 [00:35<1:00:21, 11.39s/it][A
  1%|          | 4/321 [00:44<54:22, 10.29s/it]  [A
  2%|▏         | 5/321 [00:52<51:08,  9.71s/it][A
  2%|▏         | 6/321 [01:01<49:16,  9.38s/it][A
  2%|▏         | 7/321 [01:10<48:01,  9.18s/it][A
  2%|▏         | 8/321 [01:19<47:09,  9.04s/it][A
  3%|▎         | 9/321 [01:27<46:25,  8.93s/it][A
  3%|▎         | 10/321 [01:37<47:51,  9.23s/it][A
  3%|▎         | 11/321 [01:46<46:42,  9.04s/it][A
  4%|▎         | 12/321 [01:55<45:55,  8.92s/it][A
  4%|▍         | 13/321 [02:03<45:20,  8.83s/it][A
  4%|▍         | 14/321 [02:12<44:59,  8.79s/it][A
  5%|▍         | 15/321 [02:21<44:41,  8.76s/it][A
  5%|▍         | 16/321 [02:29<44:20,  8.72s/it][A
  5%|▌         | 17/321 [02:38<44:11,  8.72s/it][A
  6%|▌         | 18/321 [02:47<43:51,  8.69s/it][A
  6%|▌         | 19/3

  Batch    50  of    321.



 16%|█▌        | 51/321 [07:33<38:45,  8.61s/it][A
 16%|█▌        | 52/321 [07:41<38:37,  8.61s/it][A
 17%|█▋        | 53/321 [07:50<38:28,  8.61s/it][A
 17%|█▋        | 54/321 [07:58<38:17,  8.60s/it][A
 17%|█▋        | 55/321 [08:07<38:08,  8.60s/it][A
 17%|█▋        | 56/321 [08:16<38:03,  8.62s/it][A
 18%|█▊        | 57/321 [08:24<37:52,  8.61s/it][A
 18%|█▊        | 58/321 [08:33<37:50,  8.63s/it][A
 18%|█▊        | 59/321 [08:42<38:09,  8.74s/it][A
 19%|█▊        | 60/321 [08:51<37:55,  8.72s/it][A
 19%|█▉        | 61/321 [08:59<37:51,  8.74s/it][A
 19%|█▉        | 62/321 [09:08<37:36,  8.71s/it][A
 20%|█▉        | 63/321 [09:17<37:28,  8.72s/it][A
 20%|█▉        | 64/321 [09:25<37:09,  8.67s/it][A
 20%|██        | 65/321 [09:34<37:00,  8.67s/it][A
 21%|██        | 66/321 [09:43<36:53,  8.68s/it][A
 21%|██        | 67/321 [09:51<36:36,  8.65s/it][A
 21%|██        | 68/321 [10:00<36:24,  8.64s/it][A
 21%|██▏       | 69/321 [10:08<36:18,  8.65s/it][A
 22%|██▏   

  Batch   100  of    321.



 31%|███▏      | 101/321 [14:45<31:37,  8.62s/it][A
 32%|███▏      | 102/321 [14:54<31:32,  8.64s/it][A
 32%|███▏      | 103/321 [15:03<31:26,  8.65s/it][A
 32%|███▏      | 104/321 [15:11<31:12,  8.63s/it][A
 33%|███▎      | 105/321 [15:20<31:06,  8.64s/it][A
 33%|███▎      | 106/321 [15:28<31:00,  8.65s/it][A
 33%|███▎      | 107/321 [15:37<30:49,  8.64s/it][A
 34%|███▎      | 108/321 [15:46<30:38,  8.63s/it][A
 34%|███▍      | 109/321 [15:54<30:34,  8.65s/it][A
 34%|███▍      | 110/321 [16:03<30:28,  8.67s/it][A
 35%|███▍      | 111/321 [16:12<30:19,  8.66s/it][A
 35%|███▍      | 112/321 [16:20<30:15,  8.69s/it][A
 35%|███▌      | 113/321 [16:29<30:01,  8.66s/it][A
 36%|███▌      | 114/321 [16:38<29:51,  8.65s/it][A
 36%|███▌      | 115/321 [16:46<29:41,  8.65s/it][A
 36%|███▌      | 116/321 [16:55<29:36,  8.66s/it][A
 36%|███▋      | 117/321 [17:04<29:22,  8.64s/it][A
 37%|███▋      | 118/321 [17:12<29:11,  8.63s/it][A
 37%|███▋      | 119/321 [17:21<29:04,  8.64s

  Batch   150  of    321.



 47%|████▋     | 151/321 [21:57<24:35,  8.68s/it][A
 47%|████▋     | 152/321 [22:06<24:27,  8.68s/it][A
 48%|████▊     | 153/321 [22:14<24:13,  8.65s/it][A
 48%|████▊     | 154/321 [22:23<24:02,  8.64s/it][A
 48%|████▊     | 155/321 [22:32<23:55,  8.65s/it][A
 49%|████▊     | 156/321 [22:40<23:43,  8.63s/it][A
 49%|████▉     | 157/321 [22:49<23:35,  8.63s/it][A
 49%|████▉     | 158/321 [22:58<23:26,  8.63s/it][A
 50%|████▉     | 159/321 [23:06<23:17,  8.63s/it][A
 50%|████▉     | 160/321 [23:15<23:06,  8.61s/it][A
 50%|█████     | 161/321 [23:23<22:58,  8.62s/it][A
 50%|█████     | 162/321 [23:32<22:50,  8.62s/it][A
 51%|█████     | 163/321 [23:41<22:42,  8.62s/it][A
 51%|█████     | 164/321 [23:49<22:35,  8.63s/it][A
 51%|█████▏    | 165/321 [23:58<22:25,  8.63s/it][A
 52%|█████▏    | 166/321 [24:07<22:20,  8.65s/it][A
 52%|█████▏    | 167/321 [24:15<22:21,  8.71s/it][A
 52%|█████▏    | 168/321 [24:24<22:22,  8.77s/it][A
 53%|█████▎    | 169/321 [24:33<22:11,  8.76s

  Batch   200  of    321.



 63%|██████▎   | 201/321 [28:56<16:25,  8.22s/it][A
 63%|██████▎   | 202/321 [29:05<16:24,  8.28s/it][A
 63%|██████▎   | 203/321 [29:13<16:07,  8.20s/it][A
 64%|██████▎   | 204/321 [29:21<15:53,  8.15s/it][A
 64%|██████▍   | 205/321 [29:29<15:42,  8.13s/it][A
 64%|██████▍   | 206/321 [29:37<15:36,  8.14s/it][A
 64%|██████▍   | 207/321 [29:45<15:26,  8.13s/it][A
 65%|██████▍   | 208/321 [29:53<15:19,  8.14s/it][A
 65%|██████▌   | 209/321 [30:01<15:12,  8.15s/it][A
 65%|██████▌   | 210/321 [30:10<15:05,  8.16s/it][A
 66%|██████▌   | 211/321 [30:18<14:54,  8.13s/it][A
 66%|██████▌   | 212/321 [30:26<14:46,  8.14s/it][A
 66%|██████▋   | 213/321 [30:34<14:40,  8.15s/it][A
 67%|██████▋   | 214/321 [30:42<14:29,  8.12s/it][A
 67%|██████▋   | 215/321 [30:50<14:18,  8.10s/it][A
 67%|██████▋   | 216/321 [30:58<14:10,  8.10s/it][A
 68%|██████▊   | 217/321 [31:06<14:06,  8.13s/it][A
 68%|██████▊   | 218/321 [31:14<13:54,  8.10s/it][A
 68%|██████▊   | 219/321 [31:22<13:44,  8.08s

  Batch   250  of    321.



 78%|███████▊  | 251/321 [35:42<09:23,  8.06s/it][A
 79%|███████▊  | 252/321 [35:50<09:14,  8.04s/it][A
 79%|███████▉  | 253/321 [35:58<09:07,  8.05s/it][A
 79%|███████▉  | 254/321 [36:06<08:58,  8.04s/it][A
 79%|███████▉  | 255/321 [36:14<08:51,  8.05s/it][A
 80%|███████▉  | 256/321 [36:22<08:43,  8.05s/it][A
 80%|████████  | 257/321 [36:30<08:36,  8.07s/it][A
 80%|████████  | 258/321 [36:38<08:29,  8.08s/it][A
 81%|████████  | 259/321 [36:46<08:20,  8.08s/it][A
 81%|████████  | 260/321 [36:54<08:13,  8.09s/it][A
 81%|████████▏ | 261/321 [37:02<08:04,  8.08s/it][A
 82%|████████▏ | 262/321 [37:10<07:56,  8.07s/it][A
 82%|████████▏ | 263/321 [37:18<07:48,  8.07s/it][A
 82%|████████▏ | 264/321 [37:26<07:38,  8.05s/it][A
 83%|████████▎ | 265/321 [37:34<07:30,  8.05s/it][A
 83%|████████▎ | 266/321 [37:43<07:23,  8.07s/it][A
 83%|████████▎ | 267/321 [37:51<07:15,  8.07s/it][A
 83%|████████▎ | 268/321 [37:59<07:08,  8.09s/it][A
 84%|████████▍ | 269/321 [38:07<06:59,  8.07s

  Batch   300  of    321.



 94%|█████████▍| 301/321 [42:30<02:42,  8.12s/it][A
 94%|█████████▍| 302/321 [42:38<02:33,  8.10s/it][A
 94%|█████████▍| 303/321 [42:46<02:25,  8.08s/it][A
 95%|█████████▍| 304/321 [42:54<02:17,  8.10s/it][A
 95%|█████████▌| 305/321 [43:03<02:09,  8.08s/it][A
 95%|█████████▌| 306/321 [43:11<02:00,  8.06s/it][A
 96%|█████████▌| 307/321 [43:19<01:53,  8.07s/it][A
 96%|█████████▌| 308/321 [43:27<01:44,  8.05s/it][A
 96%|█████████▋| 309/321 [43:35<01:36,  8.05s/it][A
 97%|█████████▋| 310/321 [43:43<01:28,  8.03s/it][A
 97%|█████████▋| 311/321 [43:51<01:20,  8.03s/it][A
 97%|█████████▋| 312/321 [43:59<01:12,  8.04s/it][A
 98%|█████████▊| 313/321 [44:07<01:05,  8.17s/it][A
 98%|█████████▊| 314/321 [44:15<00:57,  8.16s/it][A
 98%|█████████▊| 315/321 [44:24<00:49,  8.18s/it][A
 98%|█████████▊| 316/321 [44:32<00:40,  8.17s/it][A
 99%|█████████▉| 317/321 [44:40<00:32,  8.14s/it][A
 99%|█████████▉| 318/321 [44:48<00:24,  8.09s/it][A
 99%|█████████▉| 319/321 [44:56<00:16,  8.08s


Evaluating...



  0%|          | 0/41 [00:00<?, ?it/s][A
  2%|▏         | 1/41 [00:07<05:07,  7.68s/it][A
  5%|▍         | 2/41 [00:15<04:58,  7.66s/it][A
  7%|▋         | 3/41 [00:22<04:50,  7.65s/it][A
 10%|▉         | 4/41 [00:30<04:42,  7.65s/it][A
 12%|█▏        | 5/41 [00:38<04:36,  7.68s/it][A
 15%|█▍        | 6/41 [00:46<04:30,  7.72s/it][A
 17%|█▋        | 7/41 [00:53<04:22,  7.73s/it][A
 20%|█▉        | 8/41 [01:01<04:14,  7.72s/it][A
 22%|██▏       | 9/41 [01:09<04:07,  7.72s/it][A
 24%|██▍       | 10/41 [01:16<03:58,  7.69s/it][A
 27%|██▋       | 11/41 [01:24<03:51,  7.70s/it][A
 29%|██▉       | 12/41 [01:32<03:42,  7.68s/it][A
 32%|███▏      | 13/41 [01:40<03:35,  7.70s/it][A
 34%|███▍      | 14/41 [01:47<03:28,  7.71s/it][A
 37%|███▋      | 15/41 [01:55<03:20,  7.72s/it][A
 39%|███▉      | 16/41 [02:03<03:13,  7.73s/it][A
 41%|████▏     | 17/41 [02:11<03:05,  7.74s/it][A
 44%|████▍     | 18/41 [02:18<02:57,  7.70s/it][A
 46%|████▋     | 19/41 [02:26<02:49,  7.70s/it]


Training Loss: 1.769
Validation Loss: 1.764

 Epoch 2 / 5



  0%|          | 0/321 [00:00<?, ?it/s][A
  0%|          | 1/321 [00:08<43:11,  8.10s/it][A
  1%|          | 2/321 [00:16<42:52,  8.06s/it][A
  1%|          | 3/321 [00:24<42:27,  8.01s/it][A
  1%|          | 4/321 [00:32<42:24,  8.03s/it][A
  2%|▏         | 5/321 [00:40<42:24,  8.05s/it][A
  2%|▏         | 6/321 [00:48<42:15,  8.05s/it][A
  2%|▏         | 7/321 [00:56<42:06,  8.05s/it][A
  2%|▏         | 8/321 [01:04<42:03,  8.06s/it][A
  3%|▎         | 9/321 [01:12<41:55,  8.06s/it][A
  3%|▎         | 10/321 [01:20<41:35,  8.03s/it][A
  3%|▎         | 11/321 [01:28<41:20,  8.00s/it][A
  4%|▎         | 12/321 [01:36<41:26,  8.05s/it][A
  4%|▍         | 13/321 [01:44<41:12,  8.03s/it][A
  4%|▍         | 14/321 [01:52<41:00,  8.02s/it][A
  5%|▍         | 15/321 [02:00<41:05,  8.06s/it][A
  5%|▍         | 16/321 [02:08<41:05,  8.08s/it][A
  5%|▌         | 17/321 [02:16<40:54,  8.07s/it][A
  6%|▌         | 18/321 [02:24<40:41,  8.06s/it][A
  6%|▌         | 19/321 [02:3

  Batch    50  of    321.



 16%|█▌        | 51/321 [06:51<36:21,  8.08s/it][A
 16%|█▌        | 52/321 [06:59<36:13,  8.08s/it][A
 17%|█▋        | 53/321 [07:07<36:06,  8.08s/it][A
 17%|█▋        | 54/321 [07:15<36:02,  8.10s/it][A
 17%|█▋        | 55/321 [07:23<35:42,  8.06s/it][A
 17%|█▋        | 56/321 [07:31<35:33,  8.05s/it][A
 18%|█▊        | 57/321 [07:40<35:35,  8.09s/it][A
 18%|█▊        | 58/321 [07:48<35:26,  8.09s/it][A
 18%|█▊        | 59/321 [07:56<35:16,  8.08s/it][A
 19%|█▊        | 60/321 [08:04<35:10,  8.09s/it][A
 19%|█▉        | 61/321 [08:12<35:09,  8.11s/it][A
 19%|█▉        | 62/321 [08:20<34:51,  8.07s/it][A
 20%|█▉        | 63/321 [08:28<35:08,  8.17s/it][A
 20%|█▉        | 64/321 [08:37<34:54,  8.15s/it][A
 20%|██        | 65/321 [08:45<35:08,  8.24s/it][A
 21%|██        | 66/321 [08:53<34:49,  8.20s/it][A
 21%|██        | 67/321 [09:01<34:34,  8.17s/it][A
 21%|██        | 68/321 [09:09<34:22,  8.15s/it][A
 21%|██▏       | 69/321 [09:17<34:08,  8.13s/it][A
 22%|██▏   

  Batch   100  of    321.



 31%|███▏      | 101/321 [13:36<29:39,  8.09s/it][A
 32%|███▏      | 102/321 [13:44<29:31,  8.09s/it][A
 32%|███▏      | 103/321 [13:53<29:27,  8.11s/it][A
 32%|███▏      | 104/321 [14:01<29:13,  8.08s/it][A
 33%|███▎      | 105/321 [14:09<29:05,  8.08s/it][A
 33%|███▎      | 106/321 [14:17<29:03,  8.11s/it][A
 33%|███▎      | 107/321 [14:25<28:52,  8.10s/it][A
 34%|███▎      | 108/321 [14:33<28:44,  8.09s/it][A
 34%|███▍      | 109/321 [14:41<28:33,  8.08s/it][A
 34%|███▍      | 110/321 [14:49<28:27,  8.09s/it][A
 35%|███▍      | 111/321 [14:57<28:21,  8.10s/it][A
 35%|███▍      | 112/321 [15:05<28:11,  8.09s/it][A
 35%|███▌      | 113/321 [15:13<27:59,  8.08s/it][A
 36%|███▌      | 114/321 [15:21<27:47,  8.06s/it][A
 36%|███▌      | 115/321 [15:29<27:39,  8.06s/it][A
 36%|███▌      | 116/321 [15:38<27:34,  8.07s/it][A
 36%|███▋      | 117/321 [15:46<27:29,  8.09s/it][A
 37%|███▋      | 118/321 [15:54<27:21,  8.09s/it][A
 37%|███▋      | 119/321 [16:02<27:16,  8.10s

  Batch   150  of    321.



 47%|████▋     | 151/321 [20:21<23:11,  8.19s/it][A
 47%|████▋     | 152/321 [20:30<22:56,  8.14s/it][A
 48%|████▊     | 153/321 [20:38<22:45,  8.13s/it][A
 48%|████▊     | 154/321 [20:46<22:38,  8.13s/it][A
 48%|████▊     | 155/321 [20:54<22:31,  8.14s/it][A
 49%|████▊     | 156/321 [21:02<22:21,  8.13s/it][A
 49%|████▉     | 157/321 [21:10<22:12,  8.13s/it][A
 49%|████▉     | 158/321 [21:18<21:59,  8.09s/it][A
 50%|████▉     | 159/321 [21:26<21:50,  8.09s/it][A
 50%|████▉     | 160/321 [21:34<21:39,  8.07s/it][A
 50%|█████     | 161/321 [21:42<21:31,  8.07s/it][A
 50%|█████     | 162/321 [21:50<21:20,  8.05s/it][A
 51%|█████     | 163/321 [21:58<21:15,  8.07s/it][A
 51%|█████     | 164/321 [22:07<21:09,  8.09s/it][A
 51%|█████▏    | 165/321 [22:15<21:00,  8.08s/it][A
 52%|█████▏    | 166/321 [22:23<20:49,  8.06s/it][A
 52%|█████▏    | 167/321 [22:31<20:45,  8.09s/it][A
 52%|█████▏    | 168/321 [22:39<20:42,  8.12s/it][A
 53%|█████▎    | 169/321 [22:47<20:32,  8.11s

  Batch   200  of    321.



 63%|██████▎   | 201/321 [27:06<16:13,  8.12s/it][A
 63%|██████▎   | 202/321 [27:14<16:08,  8.14s/it][A
 63%|██████▎   | 203/321 [27:22<15:55,  8.10s/it][A
 64%|██████▎   | 204/321 [27:30<15:48,  8.11s/it][A
 64%|██████▍   | 205/321 [27:38<15:38,  8.09s/it][A
 64%|██████▍   | 206/321 [27:46<15:31,  8.10s/it][A
 64%|██████▍   | 207/321 [27:55<15:25,  8.12s/it][A
 65%|██████▍   | 208/321 [28:03<15:21,  8.15s/it][A
 65%|██████▌   | 209/321 [28:11<15:10,  8.13s/it][A
 65%|██████▌   | 210/321 [28:19<15:00,  8.11s/it][A
 66%|██████▌   | 211/321 [28:27<14:49,  8.09s/it][A
 66%|██████▌   | 212/321 [28:35<14:43,  8.10s/it][A
 66%|██████▋   | 213/321 [28:43<14:34,  8.10s/it][A
 67%|██████▋   | 214/321 [28:51<14:22,  8.06s/it][A
 67%|██████▋   | 215/321 [28:59<14:14,  8.06s/it][A
 67%|██████▋   | 216/321 [29:07<14:10,  8.10s/it][A
 68%|██████▊   | 217/321 [29:16<14:04,  8.12s/it][A
 68%|██████▊   | 218/321 [29:24<13:54,  8.10s/it][A
 68%|██████▊   | 219/321 [29:32<13:46,  8.10s

  Batch   250  of    321.



 78%|███████▊  | 251/321 [33:51<09:25,  8.07s/it][A
 79%|███████▊  | 252/321 [33:59<09:17,  8.07s/it][A
 79%|███████▉  | 253/321 [34:08<09:11,  8.12s/it][A
 79%|███████▉  | 254/321 [34:16<09:01,  8.09s/it][A
 79%|███████▉  | 255/321 [34:24<08:52,  8.07s/it][A
 80%|███████▉  | 256/321 [34:32<08:44,  8.07s/it][A
 80%|████████  | 257/321 [34:40<08:38,  8.11s/it][A
 80%|████████  | 258/321 [34:48<08:30,  8.11s/it][A
 81%|████████  | 259/321 [34:56<08:21,  8.09s/it][A
 81%|████████  | 260/321 [35:04<08:14,  8.11s/it][A
 81%|████████▏ | 261/321 [35:12<08:07,  8.12s/it][A
 82%|████████▏ | 262/321 [35:20<07:56,  8.07s/it][A
 82%|████████▏ | 263/321 [35:28<07:46,  8.05s/it][A
 82%|████████▏ | 264/321 [35:36<07:37,  8.03s/it][A
 83%|████████▎ | 265/321 [35:44<07:29,  8.03s/it][A
 83%|████████▎ | 266/321 [35:53<07:24,  8.09s/it][A
 83%|████████▎ | 267/321 [36:01<07:20,  8.15s/it][A
 83%|████████▎ | 268/321 [36:09<07:15,  8.22s/it][A
 84%|████████▍ | 269/321 [36:18<07:07,  8.23s

  Batch   300  of    321.



 94%|█████████▍| 301/321 [40:36<02:42,  8.10s/it][A
 94%|█████████▍| 302/321 [40:45<02:34,  8.12s/it][A
 94%|█████████▍| 303/321 [40:53<02:25,  8.10s/it][A
 95%|█████████▍| 304/321 [41:01<02:17,  8.09s/it][A
 95%|█████████▌| 305/321 [41:09<02:09,  8.10s/it][A
 95%|█████████▌| 306/321 [41:17<02:01,  8.10s/it][A
 96%|█████████▌| 307/321 [41:25<01:53,  8.10s/it][A
 96%|█████████▌| 308/321 [41:33<01:45,  8.10s/it][A
 96%|█████████▋| 309/321 [41:41<01:36,  8.07s/it][A
 97%|█████████▋| 310/321 [41:49<01:28,  8.08s/it][A
 97%|█████████▋| 311/321 [41:57<01:20,  8.07s/it][A
 97%|█████████▋| 312/321 [42:05<01:12,  8.07s/it][A
 98%|█████████▊| 313/321 [42:14<01:04,  8.08s/it][A
 98%|█████████▊| 314/321 [42:22<00:56,  8.09s/it][A
 98%|█████████▊| 315/321 [42:30<00:48,  8.09s/it][A
 98%|█████████▊| 316/321 [42:38<00:40,  8.11s/it][A
 99%|█████████▉| 317/321 [42:46<00:32,  8.13s/it][A
 99%|█████████▉| 318/321 [42:54<00:24,  8.12s/it][A
 99%|█████████▉| 319/321 [43:02<00:16,  8.09s


Evaluating...



  0%|          | 0/41 [00:00<?, ?it/s][A
  2%|▏         | 1/41 [00:07<05:11,  7.79s/it][A
  5%|▍         | 2/41 [00:15<05:01,  7.74s/it][A
  7%|▋         | 3/41 [00:23<04:52,  7.70s/it][A
 10%|▉         | 4/41 [00:30<04:45,  7.71s/it][A
 12%|█▏        | 5/41 [00:38<04:38,  7.73s/it][A
 15%|█▍        | 6/41 [00:46<04:30,  7.74s/it][A
 17%|█▋        | 7/41 [00:54<04:23,  7.76s/it][A
 20%|█▉        | 8/41 [01:01<04:15,  7.73s/it][A
 22%|██▏       | 9/41 [01:09<04:07,  7.73s/it][A
 24%|██▍       | 10/41 [01:17<03:59,  7.72s/it][A
 27%|██▋       | 11/41 [01:25<03:52,  7.74s/it][A
 29%|██▉       | 12/41 [01:32<03:45,  7.78s/it][A
 32%|███▏      | 13/41 [01:40<03:37,  7.77s/it][A
 34%|███▍      | 14/41 [01:48<03:29,  7.76s/it][A
 37%|███▋      | 15/41 [01:56<03:21,  7.75s/it][A
 39%|███▉      | 16/41 [02:03<03:13,  7.73s/it][A
 41%|████▏     | 17/41 [02:11<03:05,  7.74s/it][A
 44%|████▍     | 18/41 [02:19<02:58,  7.75s/it][A
 46%|████▋     | 19/41 [02:27<02:50,  7.75s/it]


Training Loss: 1.759
Validation Loss: 1.764

 Epoch 3 / 5



  0%|          | 0/321 [00:00<?, ?it/s][A
  0%|          | 1/321 [00:08<43:42,  8.20s/it][A
  1%|          | 2/321 [00:16<43:14,  8.13s/it][A
  1%|          | 3/321 [00:24<43:11,  8.15s/it][A
  1%|          | 4/321 [00:32<42:58,  8.13s/it][A
  2%|▏         | 5/321 [00:40<42:38,  8.10s/it][A
  2%|▏         | 6/321 [00:48<42:21,  8.07s/it][A
  2%|▏         | 7/321 [00:56<42:29,  8.12s/it][A
  2%|▏         | 8/321 [01:04<42:20,  8.12s/it][A
  3%|▎         | 9/321 [01:13<42:06,  8.10s/it][A
  3%|▎         | 10/321 [01:21<41:58,  8.10s/it][A
  3%|▎         | 11/321 [01:29<41:46,  8.08s/it][A
  4%|▎         | 12/321 [01:37<41:41,  8.10s/it][A
  4%|▍         | 13/321 [01:45<41:33,  8.09s/it][A
  4%|▍         | 14/321 [01:53<41:24,  8.09s/it][A
  5%|▍         | 15/321 [02:01<41:12,  8.08s/it][A
  5%|▍         | 16/321 [02:09<41:06,  8.09s/it][A
  5%|▌         | 17/321 [02:17<41:02,  8.10s/it][A
  6%|▌         | 18/321 [02:25<40:59,  8.12s/it][A
  6%|▌         | 19/321 [02:3

  Batch    50  of    321.



 16%|█▌        | 51/321 [06:53<36:25,  8.10s/it][A
 16%|█▌        | 52/321 [07:01<36:21,  8.11s/it][A
 17%|█▋        | 53/321 [07:09<36:08,  8.09s/it][A
 17%|█▋        | 54/321 [07:17<35:57,  8.08s/it][A
 17%|█▋        | 55/321 [07:25<35:46,  8.07s/it][A
 17%|█▋        | 56/321 [07:33<35:46,  8.10s/it][A
 18%|█▊        | 57/321 [07:41<35:43,  8.12s/it][A
 18%|█▊        | 58/321 [07:49<35:27,  8.09s/it][A
 18%|█▊        | 59/321 [07:57<35:22,  8.10s/it][A
 19%|█▊        | 60/321 [08:06<35:13,  8.10s/it][A
 19%|█▉        | 61/321 [08:14<35:02,  8.08s/it][A
 19%|█▉        | 62/321 [08:22<34:53,  8.08s/it][A
 20%|█▉        | 63/321 [08:30<34:56,  8.12s/it][A
 20%|█▉        | 64/321 [08:38<34:50,  8.13s/it][A
 20%|██        | 65/321 [08:46<34:40,  8.13s/it][A
 21%|██        | 66/321 [08:54<34:24,  8.10s/it][A
 21%|██        | 67/321 [09:02<34:26,  8.13s/it][A
 21%|██        | 68/321 [09:11<34:15,  8.12s/it][A
 21%|██▏       | 69/321 [09:19<34:00,  8.10s/it][A
 22%|██▏   

  Batch   100  of    321.



 31%|███▏      | 101/321 [13:39<29:41,  8.10s/it][A
 32%|███▏      | 102/321 [13:47<29:31,  8.09s/it][A
 32%|███▏      | 103/321 [13:56<29:23,  8.09s/it][A
 32%|███▏      | 104/321 [14:04<29:18,  8.10s/it][A
 33%|███▎      | 105/321 [14:12<29:16,  8.13s/it][A
 33%|███▎      | 106/321 [14:20<29:04,  8.11s/it][A
 33%|███▎      | 107/321 [14:28<29:03,  8.15s/it][A
 34%|███▎      | 108/321 [14:36<28:59,  8.17s/it][A
 34%|███▍      | 109/321 [14:45<28:52,  8.17s/it][A
 34%|███▍      | 110/321 [14:53<28:40,  8.15s/it][A
 35%|███▍      | 111/321 [15:01<28:28,  8.13s/it][A
 35%|███▍      | 112/321 [15:09<28:32,  8.20s/it][A
 35%|███▌      | 113/321 [15:17<28:14,  8.15s/it][A
 36%|███▌      | 114/321 [15:25<27:59,  8.11s/it][A
 36%|███▌      | 115/321 [15:33<27:53,  8.13s/it][A
 36%|███▌      | 116/321 [15:42<27:47,  8.14s/it][A
 36%|███▋      | 117/321 [15:50<27:33,  8.11s/it][A
 37%|███▋      | 118/321 [15:58<27:25,  8.11s/it][A
 37%|███▋      | 119/321 [16:06<27:17,  8.11s

  Batch   150  of    321.



 47%|████▋     | 151/321 [20:28<23:04,  8.14s/it][A
 47%|████▋     | 152/321 [20:36<22:59,  8.16s/it][A
 48%|████▊     | 153/321 [20:44<22:54,  8.18s/it][A
 48%|████▊     | 154/321 [20:52<22:40,  8.14s/it][A
 48%|████▊     | 155/321 [21:00<22:32,  8.15s/it][A
 49%|████▊     | 156/321 [21:09<22:26,  8.16s/it][A
 49%|████▉     | 157/321 [21:17<22:19,  8.17s/it][A
 49%|████▉     | 158/321 [21:25<22:09,  8.15s/it][A
 50%|████▉     | 159/321 [21:33<21:55,  8.12s/it][A
 50%|████▉     | 160/321 [21:41<21:55,  8.17s/it][A
 50%|█████     | 161/321 [21:49<21:39,  8.12s/it][A
 50%|█████     | 162/321 [21:57<21:30,  8.11s/it][A
 51%|█████     | 163/321 [22:06<21:24,  8.13s/it][A
 51%|█████     | 164/321 [22:14<21:21,  8.17s/it][A
 51%|█████▏    | 165/321 [22:22<21:08,  8.13s/it][A
 52%|█████▏    | 166/321 [22:30<21:01,  8.14s/it][A
 52%|█████▏    | 167/321 [22:38<20:58,  8.17s/it][A
 52%|█████▏    | 168/321 [22:46<20:50,  8.17s/it][A
 53%|█████▎    | 169/321 [22:54<20:39,  8.15s

  Batch   200  of    321.



 63%|██████▎   | 201/321 [27:18<17:51,  8.93s/it][A
 63%|██████▎   | 202/321 [27:26<17:11,  8.67s/it][A
 63%|██████▎   | 203/321 [27:34<16:44,  8.51s/it][A
 64%|██████▎   | 204/321 [27:42<16:22,  8.40s/it][A
 64%|██████▍   | 205/321 [27:50<16:03,  8.31s/it][A
 64%|██████▍   | 206/321 [27:58<15:51,  8.28s/it][A
 64%|██████▍   | 207/321 [28:06<15:39,  8.24s/it][A
 65%|██████▍   | 208/321 [28:15<15:26,  8.20s/it][A
 65%|██████▌   | 209/321 [28:23<15:16,  8.18s/it][A
 65%|██████▌   | 210/321 [28:34<16:43,  9.04s/it][A
 66%|██████▌   | 211/321 [28:42<16:06,  8.79s/it][A
 66%|██████▌   | 212/321 [28:50<15:34,  8.57s/it][A
 66%|██████▋   | 213/321 [28:58<15:20,  8.52s/it][A
 67%|██████▋   | 214/321 [29:07<15:05,  8.46s/it][A
 67%|██████▋   | 215/321 [29:15<14:54,  8.44s/it][A
 67%|██████▋   | 216/321 [29:23<14:35,  8.34s/it][A
 68%|██████▊   | 217/321 [29:31<14:18,  8.26s/it][A
 68%|██████▊   | 218/321 [29:42<15:38,  9.12s/it][A
 68%|██████▊   | 219/321 [29:50<14:57,  8.79s

  Batch   250  of    321.



 78%|███████▊  | 251/321 [34:23<10:15,  8.80s/it][A
 79%|███████▊  | 252/321 [34:31<09:55,  8.63s/it][A
 79%|███████▉  | 253/321 [34:39<09:39,  8.53s/it][A
 79%|███████▉  | 254/321 [34:47<09:24,  8.43s/it][A
 79%|███████▉  | 255/321 [34:56<09:10,  8.34s/it][A
 80%|███████▉  | 256/321 [35:04<08:59,  8.30s/it][A
 80%|████████  | 257/321 [35:12<08:48,  8.26s/it][A
 80%|████████  | 258/321 [35:23<09:31,  9.08s/it][A
 81%|████████  | 259/321 [35:31<09:07,  8.84s/it][A
 81%|████████  | 260/321 [35:39<08:46,  8.63s/it][A
 81%|████████▏ | 261/321 [35:47<08:27,  8.46s/it][A
 82%|████████▏ | 262/321 [35:56<08:14,  8.38s/it][A
 82%|████████▏ | 263/321 [36:04<08:03,  8.33s/it][A
 82%|████████▏ | 264/321 [36:12<07:52,  8.30s/it][A
 83%|████████▎ | 265/321 [36:20<07:41,  8.24s/it][A
 83%|████████▎ | 266/321 [36:31<08:21,  9.11s/it][A
 83%|████████▎ | 267/321 [36:39<07:57,  8.84s/it][A
 83%|████████▎ | 268/321 [36:47<07:36,  8.61s/it][A
 84%|████████▍ | 269/321 [36:56<07:18,  8.44s

  Batch   300  of    321.



 94%|█████████▍| 301/321 [41:29<02:50,  8.52s/it][A
 94%|█████████▍| 302/321 [41:37<02:40,  8.45s/it][A
 94%|█████████▍| 303/321 [41:45<02:30,  8.34s/it][A
 95%|█████████▍| 304/321 [41:53<02:20,  8.25s/it][A
 95%|█████████▌| 305/321 [42:02<02:11,  8.24s/it][A
 95%|█████████▌| 306/321 [42:11<02:09,  8.60s/it][A
 96%|█████████▌| 307/321 [42:21<02:05,  8.99s/it][A
 96%|█████████▌| 308/321 [42:29<01:54,  8.78s/it][A
 96%|█████████▋| 309/321 [42:38<01:44,  8.69s/it][A
 97%|█████████▋| 310/321 [42:46<01:34,  8.58s/it][A
 97%|█████████▋| 311/321 [42:54<01:24,  8.45s/it][A
 97%|█████████▋| 312/321 [43:02<01:15,  8.36s/it][A
 98%|█████████▊| 313/321 [43:11<01:06,  8.32s/it][A
 98%|█████████▊| 314/321 [43:19<00:59,  8.49s/it][A
 98%|█████████▊| 315/321 [43:30<00:54,  9.01s/it][A
 98%|█████████▊| 316/321 [43:38<00:43,  8.79s/it][A
 99%|█████████▉| 317/321 [43:46<00:34,  8.57s/it][A
 99%|█████████▉| 318/321 [43:54<00:25,  8.44s/it][A
 99%|█████████▉| 319/321 [44:02<00:16,  8.36s


Evaluating...



  0%|          | 0/41 [00:00<?, ?it/s][A
  2%|▏         | 1/41 [00:07<05:13,  7.84s/it][A
  5%|▍         | 2/41 [00:18<06:11,  9.53s/it][A
  7%|▋         | 3/41 [00:26<05:33,  8.78s/it][A
 10%|▉         | 4/41 [00:34<05:10,  8.38s/it][A
 12%|█▏        | 5/41 [00:41<04:53,  8.15s/it][A
 15%|█▍        | 6/41 [00:49<04:42,  8.06s/it][A
 17%|█▋        | 7/41 [00:57<04:31,  7.97s/it][A
 20%|█▉        | 8/41 [01:05<04:21,  7.92s/it][A
 22%|██▏       | 9/41 [01:13<04:12,  7.89s/it][A
 24%|██▍       | 10/41 [01:24<04:32,  8.80s/it][A
 27%|██▋       | 11/41 [01:31<04:13,  8.45s/it][A
 29%|██▉       | 12/41 [01:39<03:58,  8.24s/it][A
 32%|███▏      | 13/41 [01:47<03:46,  8.10s/it][A
 34%|███▍      | 14/41 [01:55<03:36,  8.03s/it][A
 37%|███▋      | 15/41 [02:02<03:26,  7.92s/it][A
 39%|███▉      | 16/41 [02:10<03:16,  7.87s/it][A
 41%|████▏     | 17/41 [02:18<03:09,  7.89s/it][A
 44%|████▍     | 18/41 [02:27<03:08,  8.21s/it][A
 46%|████▋     | 19/41 [02:37<03:10,  8.67s/it]


Training Loss: 1.760
Validation Loss: 1.762

 Epoch 4 / 5



  0%|          | 0/321 [00:00<?, ?it/s][A
  0%|          | 1/321 [00:08<43:26,  8.14s/it][A
  1%|          | 2/321 [00:16<43:22,  8.16s/it][A
  1%|          | 3/321 [00:26<46:58,  8.86s/it][A
  1%|          | 4/321 [00:35<48:18,  9.14s/it][A
  2%|▏         | 5/321 [00:43<46:12,  8.77s/it][A
  2%|▏         | 6/321 [00:52<45:14,  8.62s/it][A
  2%|▏         | 7/321 [01:00<44:20,  8.47s/it][A
  2%|▏         | 8/321 [01:08<43:46,  8.39s/it][A
  3%|▎         | 9/321 [01:16<43:18,  8.33s/it][A
  3%|▎         | 10/321 [01:24<43:01,  8.30s/it][A
  3%|▎         | 11/321 [01:33<43:37,  8.44s/it][A
  4%|▎         | 12/321 [01:43<46:24,  9.01s/it][A
  4%|▍         | 13/321 [01:52<45:00,  8.77s/it][A
  4%|▍         | 14/321 [02:00<44:08,  8.63s/it][A
  5%|▍         | 15/321 [02:08<43:10,  8.47s/it][A
  5%|▍         | 16/321 [02:16<42:30,  8.36s/it][A
  5%|▌         | 17/321 [02:24<42:07,  8.31s/it][A
  6%|▌         | 18/321 [02:32<41:41,  8.26s/it][A
  6%|▌         | 19/321 [02:4

  Batch    50  of    321.



 16%|█▌        | 51/321 [07:14<37:15,  8.28s/it][A
 16%|█▌        | 52/321 [07:25<39:54,  8.90s/it][A
 17%|█▋        | 53/321 [07:34<39:51,  8.92s/it][A
 17%|█▋        | 54/321 [07:42<38:32,  8.66s/it][A
 17%|█▋        | 55/321 [07:50<37:41,  8.50s/it][A
 17%|█▋        | 56/321 [07:58<37:10,  8.42s/it][A
 18%|█▊        | 57/321 [08:07<37:15,  8.47s/it][A
 18%|█▊        | 58/321 [08:15<36:45,  8.38s/it][A
 18%|█▊        | 59/321 [08:23<36:35,  8.38s/it][A
 19%|█▊        | 60/321 [08:34<39:14,  9.02s/it][A
 19%|█▉        | 61/321 [08:42<38:38,  8.92s/it][A
 19%|█▉        | 62/321 [08:51<37:39,  8.72s/it][A
 20%|█▉        | 63/321 [08:59<36:46,  8.55s/it][A
 20%|█▉        | 64/321 [09:07<36:13,  8.46s/it][A
 20%|██        | 65/321 [09:15<35:43,  8.37s/it][A
 21%|██        | 66/321 [09:23<35:27,  8.34s/it][A
 21%|██        | 67/321 [09:32<35:01,  8.27s/it][A
 21%|██        | 68/321 [09:42<37:23,  8.87s/it][A
 21%|██▏       | 69/321 [09:51<37:22,  8.90s/it][A
 22%|██▏   

  Batch   100  of    321.



 31%|███▏      | 101/321 [14:23<33:16,  9.07s/it][A
 32%|███▏      | 102/321 [14:31<32:04,  8.79s/it][A
 32%|███▏      | 103/321 [14:40<31:18,  8.62s/it][A
 32%|███▏      | 104/321 [14:48<30:39,  8.48s/it][A
 33%|███▎      | 105/321 [14:56<30:14,  8.40s/it][A
 33%|███▎      | 106/321 [15:04<29:45,  8.31s/it][A
 33%|███▎      | 107/321 [15:12<29:28,  8.27s/it][A
 34%|███▎      | 108/321 [15:20<29:15,  8.24s/it][A
 34%|███▍      | 109/321 [15:31<31:48,  9.00s/it][A
 34%|███▍      | 110/321 [15:40<30:56,  8.80s/it][A
 35%|███▍      | 111/321 [15:48<30:08,  8.61s/it][A
 35%|███▍      | 112/321 [15:56<29:32,  8.48s/it][A
 35%|███▌      | 113/321 [16:04<28:56,  8.35s/it][A
 36%|███▌      | 114/321 [16:12<28:37,  8.30s/it][A
 36%|███▌      | 115/321 [16:20<28:19,  8.25s/it][A
 36%|███▌      | 116/321 [16:28<28:01,  8.20s/it][A
 36%|███▋      | 117/321 [16:38<29:27,  8.66s/it][A
 37%|███▋      | 118/321 [16:48<30:05,  8.89s/it][A
 37%|███▋      | 119/321 [16:56<29:14,  8.69s

  Batch   150  of    321.



 47%|████▋     | 151/321 [21:29<25:37,  9.05s/it][A
 47%|████▋     | 152/321 [21:37<24:38,  8.75s/it][A
 48%|████▊     | 153/321 [21:45<24:18,  8.68s/it][A
 48%|████▊     | 154/321 [21:54<23:52,  8.58s/it][A
 48%|████▊     | 155/321 [22:02<23:25,  8.47s/it][A
 49%|████▊     | 156/321 [22:10<22:55,  8.34s/it][A
 49%|████▉     | 157/321 [22:18<22:39,  8.29s/it][A
 49%|████▉     | 158/321 [22:26<22:20,  8.23s/it][A
 50%|████▉     | 159/321 [22:36<23:50,  8.83s/it][A
 50%|████▉     | 160/321 [22:45<23:44,  8.85s/it][A
 50%|█████     | 161/321 [22:53<23:02,  8.64s/it][A
 50%|█████     | 162/321 [23:01<22:24,  8.46s/it][A
 51%|█████     | 163/321 [23:10<21:59,  8.35s/it][A
 51%|█████     | 164/321 [23:18<21:42,  8.30s/it][A
 51%|█████▏    | 165/321 [23:26<21:26,  8.25s/it][A
 52%|█████▏    | 166/321 [23:34<21:08,  8.18s/it][A
 52%|█████▏    | 167/321 [23:43<21:38,  8.43s/it][A
 52%|█████▏    | 168/321 [23:53<22:52,  8.97s/it][A
 53%|█████▎    | 169/321 [24:01<22:03,  8.71s

  Batch   200  of    321.



 63%|██████▎   | 201/321 [28:33<18:04,  9.04s/it][A
 63%|██████▎   | 202/321 [28:42<17:20,  8.75s/it][A
 63%|██████▎   | 203/321 [28:50<16:52,  8.58s/it][A
 64%|██████▎   | 204/321 [28:58<16:32,  8.48s/it][A
 64%|██████▍   | 205/321 [29:06<16:11,  8.38s/it][A
 64%|██████▍   | 206/321 [29:14<15:56,  8.32s/it][A
 64%|██████▍   | 207/321 [29:23<15:46,  8.31s/it][A
 65%|██████▍   | 208/321 [29:31<15:32,  8.25s/it][A
 65%|██████▌   | 209/321 [29:42<16:53,  9.05s/it][A
 65%|██████▌   | 210/321 [29:50<16:20,  8.83s/it][A
 66%|██████▌   | 211/321 [29:58<15:50,  8.64s/it][A
 66%|██████▌   | 212/321 [30:06<15:22,  8.47s/it][A
 66%|██████▋   | 213/321 [30:14<15:00,  8.34s/it][A
 67%|██████▋   | 214/321 [30:23<14:54,  8.36s/it][A
 67%|██████▋   | 215/321 [30:31<14:46,  8.36s/it][A
 67%|██████▋   | 216/321 [30:39<14:38,  8.36s/it][A
 68%|██████▊   | 217/321 [30:50<15:45,  9.09s/it][A
 68%|██████▊   | 218/321 [30:59<15:22,  8.95s/it][A
 68%|██████▊   | 219/321 [31:07<14:48,  8.71s

  Batch   250  of    321.



 78%|███████▊  | 251/321 [35:41<10:29,  9.00s/it][A
 79%|███████▊  | 252/321 [35:49<10:04,  8.76s/it][A
 79%|███████▉  | 253/321 [36:00<10:44,  9.48s/it][A
 79%|███████▉  | 254/321 [36:09<10:09,  9.09s/it][A
 79%|███████▉  | 255/321 [36:17<09:42,  8.83s/it][A
 80%|███████▉  | 256/321 [36:25<09:21,  8.64s/it][A
 80%|████████  | 257/321 [36:33<09:03,  8.49s/it][A
 80%|████████  | 258/321 [36:41<08:47,  8.38s/it][A
 81%|████████  | 259/321 [36:50<08:35,  8.32s/it][A
 81%|████████  | 260/321 [36:58<08:25,  8.29s/it][A
 81%|████████▏ | 261/321 [37:08<08:59,  9.00s/it][A
 82%|████████▏ | 262/321 [37:17<08:45,  8.90s/it][A
 82%|████████▏ | 263/321 [37:25<08:25,  8.71s/it][A
 82%|████████▏ | 264/321 [37:34<08:07,  8.55s/it][A
 83%|████████▎ | 265/321 [37:42<07:51,  8.42s/it][A
 83%|████████▎ | 266/321 [37:50<07:39,  8.36s/it][A
 83%|████████▎ | 267/321 [37:58<07:29,  8.32s/it][A
 83%|████████▎ | 268/321 [38:06<07:19,  8.30s/it][A
 84%|████████▍ | 269/321 [38:17<07:44,  8.94s

  Batch   300  of    321.



 94%|█████████▍| 301/321 [42:48<02:44,  8.23s/it][A
 94%|█████████▍| 302/321 [42:58<02:47,  8.81s/it][A
 94%|█████████▍| 303/321 [43:07<02:40,  8.91s/it][A
 95%|█████████▍| 304/321 [43:15<02:27,  8.70s/it][A
 95%|█████████▌| 305/321 [43:24<02:16,  8.56s/it][A
 95%|█████████▌| 306/321 [43:32<02:06,  8.43s/it][A
 96%|█████████▌| 307/321 [43:40<01:57,  8.37s/it][A
 96%|█████████▌| 308/321 [43:48<01:48,  8.33s/it][A
 96%|█████████▋| 309/321 [43:56<01:39,  8.30s/it][A
 97%|█████████▋| 310/321 [44:05<01:33,  8.47s/it][A
 97%|█████████▋| 311/321 [44:16<01:30,  9.02s/it][A
 97%|█████████▋| 312/321 [44:24<01:18,  8.77s/it][A
 98%|█████████▊| 313/321 [44:32<01:08,  8.55s/it][A
 98%|█████████▊| 314/321 [44:40<00:59,  8.43s/it][A
 98%|█████████▊| 315/321 [44:48<00:50,  8.34s/it][A
 98%|█████████▊| 316/321 [44:56<00:41,  8.31s/it][A
 99%|█████████▉| 317/321 [45:04<00:33,  8.26s/it][A
 99%|█████████▉| 318/321 [45:13<00:24,  8.26s/it][A
 99%|█████████▉| 319/321 [45:24<00:18,  9.09s


Evaluating...



  0%|          | 0/41 [00:00<?, ?it/s][A
  2%|▏         | 1/41 [00:07<05:18,  7.97s/it][A
  5%|▍         | 2/41 [00:16<05:15,  8.09s/it][A
  7%|▋         | 3/41 [00:23<05:01,  7.93s/it][A
 10%|▉         | 4/41 [00:31<04:52,  7.90s/it][A
 12%|█▏        | 5/41 [00:39<04:43,  7.88s/it][A
 15%|█▍        | 6/41 [00:49<05:01,  8.61s/it][A
 17%|█▋        | 7/41 [00:58<04:51,  8.58s/it][A
 20%|█▉        | 8/41 [01:05<04:34,  8.32s/it][A
 22%|██▏       | 9/41 [01:13<04:21,  8.16s/it][A
 24%|██▍       | 10/41 [01:21<04:09,  8.05s/it][A
 27%|██▋       | 11/41 [01:29<03:59,  7.99s/it][A
 29%|██▉       | 12/41 [01:37<03:50,  7.93s/it][A
 32%|███▏      | 13/41 [01:44<03:40,  7.89s/it][A
 34%|███▍      | 14/41 [01:52<03:31,  7.83s/it][A
 37%|███▋      | 15/41 [02:03<03:47,  8.75s/it][A
 39%|███▉      | 16/41 [02:11<03:31,  8.45s/it][A
 41%|████▏     | 17/41 [02:19<03:17,  8.24s/it][A
 44%|████▍     | 18/41 [02:26<03:06,  8.11s/it][A
 46%|████▋     | 19/41 [02:34<02:56,  8.02s/it]


Training Loss: 1.760
Validation Loss: 1.763

 Epoch 5 / 5



  0%|          | 0/321 [00:00<?, ?it/s][A
  0%|          | 1/321 [00:08<46:57,  8.81s/it][A
  1%|          | 2/321 [00:17<44:58,  8.46s/it][A
  1%|          | 3/321 [00:25<44:39,  8.43s/it][A
  1%|          | 4/321 [00:33<44:09,  8.36s/it][A
  2%|▏         | 5/321 [00:41<43:50,  8.33s/it][A
  2%|▏         | 6/321 [00:50<43:27,  8.28s/it][A
  2%|▏         | 7/321 [00:58<43:05,  8.23s/it][A
  2%|▏         | 8/321 [01:09<47:57,  9.19s/it][A
  3%|▎         | 9/321 [01:17<46:06,  8.87s/it][A
  3%|▎         | 10/321 [01:25<44:43,  8.63s/it][A
  3%|▎         | 11/321 [01:33<43:43,  8.46s/it][A
  4%|▎         | 12/321 [01:42<43:12,  8.39s/it][A
  4%|▍         | 13/321 [01:50<42:41,  8.32s/it][A
  4%|▍         | 14/321 [01:58<42:14,  8.26s/it][A
  5%|▍         | 15/321 [02:06<41:55,  8.22s/it][A
  5%|▍         | 16/321 [02:16<44:17,  8.71s/it][A
  5%|▌         | 17/321 [02:25<45:13,  8.93s/it][A
  6%|▌         | 18/321 [02:34<44:04,  8.73s/it][A
  6%|▌         | 19/321 [02:4

  Batch    50  of    321.



 16%|█▌        | 51/321 [07:15<38:55,  8.65s/it][A
 16%|█▌        | 52/321 [07:23<38:08,  8.51s/it][A
 17%|█▋        | 53/321 [07:31<37:31,  8.40s/it][A
 17%|█▋        | 54/321 [07:39<37:04,  8.33s/it][A
 17%|█▋        | 55/321 [07:48<36:54,  8.32s/it][A
 17%|█▋        | 56/321 [07:56<36:31,  8.27s/it][A
 18%|█▊        | 57/321 [08:06<38:50,  8.83s/it][A
 18%|█▊        | 58/321 [08:15<39:08,  8.93s/it][A
 18%|█▊        | 59/321 [08:23<37:58,  8.70s/it][A
 19%|█▊        | 60/321 [08:31<37:05,  8.53s/it][A
 19%|█▉        | 61/321 [08:40<36:31,  8.43s/it][A
 19%|█▉        | 62/321 [08:48<36:10,  8.38s/it][A
 20%|█▉        | 63/321 [08:56<35:42,  8.30s/it][A
 20%|█▉        | 64/321 [09:04<35:21,  8.25s/it][A
 20%|██        | 65/321 [09:13<36:29,  8.55s/it][A
 21%|██        | 66/321 [09:24<38:18,  9.01s/it][A
 21%|██        | 67/321 [09:32<36:58,  8.73s/it][A
 21%|██        | 68/321 [09:40<36:03,  8.55s/it][A
 21%|██▏       | 69/321 [09:48<35:32,  8.46s/it][A
 22%|██▏   

  Batch   100  of    321.



 31%|███▏      | 101/321 [14:21<31:07,  8.49s/it][A
 32%|███▏      | 102/321 [14:29<30:35,  8.38s/it][A
 32%|███▏      | 103/321 [14:37<30:19,  8.35s/it][A
 32%|███▏      | 104/321 [14:45<30:00,  8.30s/it][A
 33%|███▎      | 105/321 [14:54<29:45,  8.27s/it][A
 33%|███▎      | 106/321 [15:05<32:39,  9.12s/it][A
 33%|███▎      | 107/321 [15:13<31:31,  8.84s/it][A
 34%|███▎      | 108/321 [15:21<30:40,  8.64s/it][A
 34%|███▍      | 109/321 [15:29<30:00,  8.49s/it][A
 34%|███▍      | 110/321 [15:37<29:28,  8.38s/it][A
 35%|███▍      | 111/321 [15:45<28:59,  8.29s/it][A
 35%|███▍      | 112/321 [15:54<28:45,  8.25s/it][A
 35%|███▌      | 113/321 [16:02<28:27,  8.21s/it][A
 36%|███▌      | 114/321 [16:13<31:20,  9.09s/it][A
 36%|███▌      | 115/321 [16:21<30:14,  8.81s/it][A
 36%|███▌      | 116/321 [16:29<29:26,  8.62s/it][A
 36%|███▋      | 117/321 [16:37<28:52,  8.49s/it][A
 37%|███▋      | 118/321 [16:46<28:23,  8.39s/it][A
 37%|███▋      | 119/321 [16:54<28:00,  8.32s

  Batch   150  of    321.



 47%|████▋     | 151/321 [21:28<23:53,  8.43s/it][A
 47%|████▋     | 152/321 [21:36<23:29,  8.34s/it][A
 48%|████▊     | 153/321 [21:44<23:10,  8.28s/it][A
 48%|████▊     | 154/321 [21:52<22:54,  8.23s/it][A
 48%|████▊     | 155/321 [22:03<25:09,  9.09s/it][A
 49%|████▊     | 156/321 [22:12<24:17,  8.83s/it][A
 49%|████▉     | 157/321 [22:20<23:36,  8.64s/it][A
 49%|████▉     | 158/321 [22:28<23:03,  8.49s/it][A
 50%|████▉     | 159/321 [22:36<22:44,  8.42s/it][A
 50%|████▉     | 160/321 [22:45<22:26,  8.36s/it][A
 50%|█████     | 161/321 [22:53<22:07,  8.30s/it][A
 50%|█████     | 162/321 [23:01<21:55,  8.28s/it][A
 51%|█████     | 163/321 [23:12<24:11,  9.18s/it][A
 51%|█████     | 164/321 [23:20<23:13,  8.88s/it][A
 51%|█████▏    | 165/321 [23:28<22:30,  8.65s/it][A
 52%|█████▏    | 166/321 [23:37<22:02,  8.53s/it][A
 52%|█████▏    | 167/321 [23:45<21:40,  8.45s/it][A
 52%|█████▏    | 168/321 [23:53<21:21,  8.37s/it][A
 53%|█████▎    | 169/321 [24:01<21:03,  8.31s

  Batch   200  of    321.



 63%|██████▎   | 201/321 [28:35<16:41,  8.35s/it][A
 63%|██████▎   | 202/321 [28:43<16:28,  8.31s/it][A
 63%|██████▎   | 203/321 [28:53<16:58,  8.63s/it][A
 64%|██████▎   | 204/321 [29:02<17:30,  8.98s/it][A
 64%|██████▍   | 205/321 [29:11<16:57,  8.77s/it][A
 64%|██████▍   | 206/321 [29:19<16:29,  8.60s/it][A
 64%|██████▍   | 207/321 [29:27<16:05,  8.47s/it][A
 65%|██████▍   | 208/321 [29:35<15:48,  8.40s/it][A
 65%|██████▌   | 209/321 [29:44<15:32,  8.33s/it][A
 65%|██████▌   | 210/321 [29:52<15:16,  8.26s/it][A
 66%|██████▌   | 211/321 [30:01<15:44,  8.59s/it][A
 66%|██████▌   | 212/321 [30:11<16:18,  8.98s/it][A
 66%|██████▋   | 213/321 [30:19<15:41,  8.72s/it][A
 67%|██████▋   | 214/321 [30:27<15:08,  8.49s/it][A
 67%|██████▋   | 215/321 [30:35<14:50,  8.40s/it][A
 67%|██████▋   | 216/321 [30:44<14:48,  8.46s/it][A
 68%|██████▊   | 217/321 [30:52<14:36,  8.43s/it][A
 68%|██████▊   | 218/321 [31:00<14:26,  8.42s/it][A
 68%|██████▊   | 219/321 [31:09<14:36,  8.59s

  Batch   250  of    321.



 78%|███████▊  | 251/321 [35:42<09:38,  8.26s/it][A
 79%|███████▊  | 252/321 [35:53<10:25,  9.07s/it][A
 79%|███████▉  | 253/321 [36:01<09:56,  8.77s/it][A
 79%|███████▉  | 254/321 [36:10<09:39,  8.65s/it][A
 79%|███████▉  | 255/321 [36:18<09:27,  8.59s/it][A
 80%|███████▉  | 256/321 [36:26<09:10,  8.47s/it][A
 80%|████████  | 257/321 [36:35<09:02,  8.47s/it][A
 80%|████████  | 258/321 [36:43<08:51,  8.43s/it][A
 81%|████████  | 259/321 [36:51<08:38,  8.36s/it][A
 81%|████████  | 260/321 [37:02<09:20,  9.19s/it][A
 81%|████████▏ | 261/321 [37:11<08:54,  8.91s/it][A
 82%|████████▏ | 262/321 [37:19<08:34,  8.71s/it][A
 82%|████████▏ | 263/321 [37:27<08:15,  8.54s/it][A
 82%|████████▏ | 264/321 [37:35<08:00,  8.43s/it][A
 83%|████████▎ | 265/321 [37:43<07:48,  8.37s/it][A
 83%|████████▎ | 266/321 [37:52<07:37,  8.32s/it][A
 83%|████████▎ | 267/321 [38:00<07:26,  8.27s/it][A
 83%|████████▎ | 268/321 [38:11<08:03,  9.13s/it][A
 84%|████████▍ | 269/321 [38:19<07:41,  8.88s

  Batch   300  of    321.



 94%|█████████▍| 301/321 [42:53<02:57,  8.87s/it][A
 94%|█████████▍| 302/321 [43:01<02:44,  8.67s/it][A
 94%|█████████▍| 303/321 [43:09<02:33,  8.52s/it][A
 95%|█████████▍| 304/321 [43:17<02:23,  8.42s/it][A
 95%|█████████▌| 305/321 [43:25<02:13,  8.36s/it][A
 95%|█████████▌| 306/321 [43:34<02:04,  8.28s/it][A
 96%|█████████▌| 307/321 [43:42<01:55,  8.27s/it][A
 96%|█████████▌| 308/321 [43:53<01:57,  9.02s/it][A
 96%|█████████▋| 309/321 [44:01<01:47,  8.95s/it][A
 97%|█████████▋| 310/321 [44:09<01:35,  8.71s/it][A
 97%|█████████▋| 311/321 [44:18<01:25,  8.57s/it][A
 97%|█████████▋| 312/321 [44:26<01:16,  8.45s/it][A
 98%|█████████▊| 313/321 [44:34<01:06,  8.34s/it][A
 98%|█████████▊| 314/321 [44:42<00:58,  8.30s/it][A
 98%|█████████▊| 315/321 [44:50<00:49,  8.26s/it][A
 98%|█████████▊| 316/321 [45:01<00:44,  8.93s/it][A
 99%|█████████▉| 317/321 [45:10<00:35,  8.87s/it][A
 99%|█████████▉| 318/321 [45:18<00:26,  8.68s/it][A
 99%|█████████▉| 319/321 [45:26<00:17,  8.53s


Evaluating...



  0%|          | 0/41 [00:00<?, ?it/s][A
  2%|▏         | 1/41 [00:07<05:12,  7.82s/it][A
  5%|▍         | 2/41 [00:15<05:05,  7.83s/it][A
  7%|▋         | 3/41 [00:24<05:07,  8.09s/it][A
 10%|▉         | 4/41 [00:34<05:29,  8.91s/it][A
 12%|█▏        | 5/41 [00:42<05:06,  8.52s/it][A
 15%|█▍        | 6/41 [00:49<04:48,  8.26s/it][A
 17%|█▋        | 7/41 [00:57<04:35,  8.11s/it][A
 20%|█▉        | 8/41 [01:05<04:24,  8.03s/it][A
 22%|██▏       | 9/41 [01:13<04:14,  7.96s/it][A
 24%|██▍       | 10/41 [01:21<04:05,  7.93s/it][A
 27%|██▋       | 11/41 [01:29<03:57,  7.91s/it][A
 29%|██▉       | 12/41 [01:39<04:14,  8.79s/it][A
 32%|███▏      | 13/41 [01:47<03:57,  8.49s/it][A
 34%|███▍      | 14/41 [01:55<03:43,  8.29s/it][A
 37%|███▋      | 15/41 [02:03<03:31,  8.14s/it][A
 39%|███▉      | 16/41 [02:11<03:20,  8.03s/it][A
 41%|████▏     | 17/41 [02:18<03:11,  7.97s/it][A
 44%|████▍     | 18/41 [02:26<03:02,  7.92s/it][A
 46%|████▋     | 19/41 [02:34<02:53,  7.88s/it]


Training Loss: 1.759
Validation Loss: 1.762





In [15]:
# load weights of best model
path = '/content/fixed_weights_roberta.pt'
model.load_state_dict(torch.load(path))

<All keys matched successfully>

In [18]:
with torch.no_grad():
  preds = model(test_seq, test_mask)
  preds = preds.detach().cpu().numpy()

preds = np.argmax(preds, axis = 1)
print(classification_report(test_y, preds))

              precision    recall  f1-score   support

           0       0.00      0.00      0.00       250
           1       0.21      1.00      0.34       267
           2       0.00      0.00      0.00       249
           3       0.00      0.00      0.00       211
           4       0.00      0.00      0.00       214
           5       0.00      0.00      0.00        92

    accuracy                           0.21      1283
   macro avg       0.03      0.17      0.06      1283
weighted avg       0.04      0.21      0.07      1283



In [19]:
# testing on unseen data
unseen_news_text = ["Donald Trump Sends Out Embarrassing New Year’s Eve Message; This is Disturbing",     # Fake
                    "WATCH: George W. Bush Calls Out Trump For Supporting White Supremacy",               # Fake
                    "U.S. lawmakers question businessman at 2016 Trump Tower meeting: sources",           # True
                    "Trump administration issues new rules on U.S. visa waivers"                          # True
                    ]

# tokenize and encode sequences in the test set
MAX_LENGHT = 100
tokens_unseen = roberta_tokenizer.batch_encode_plus(
    unseen_news_text,
    max_length = MAX_LENGHT,
    pad_to_max_length=True,
    truncation=True
)

unseen_seq = torch.tensor(tokens_unseen['input_ids'])
unseen_mask = torch.tensor(tokens_unseen['attention_mask'])

with torch.no_grad():
  preds = model(unseen_seq, unseen_mask)
  preds = preds.detach().cpu().numpy()

preds = np.argmax(preds, axis = 1)
preds

array([1, 1, 1, 1])