In [None]:
!pip install transformers

Collecting transformers
[?25l  Downloading https://files.pythonhosted.org/packages/d5/43/cfe4ee779bbd6a678ac6a97c5a5cdeb03c35f9eaebbb9720b036680f9a2d/transformers-4.6.1-py3-none-any.whl (2.2MB)
[K     |████████████████████████████████| 2.3MB 10.2MB/s 
Collecting huggingface-hub==0.0.8
  Downloading https://files.pythonhosted.org/packages/a1/88/7b1e45720ecf59c6c6737ff332f41c955963090a18e72acbcbeac6b25e86/huggingface_hub-0.0.8-py3-none-any.whl
Collecting sacremoses
[?25l  Downloading https://files.pythonhosted.org/packages/75/ee/67241dc87f266093c533a2d4d3d69438e57d7a90abb216fa076e7d475d4a/sacremoses-0.0.45-py3-none-any.whl (895kB)
[K     |████████████████████████████████| 901kB 40.0MB/s 
Collecting tokenizers<0.11,>=0.10.1
[?25l  Downloading https://files.pythonhosted.org/packages/d4/e2/df3543e8ffdab68f5acc73f613de9c2b155ac47f162e725dcac87c521c11/tokenizers-0.10.3-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl (3.3MB)
[K     |██████

In [None]:
import torch
from torch import nn
from transformers import ElectraModel
from transformers import ElectraTokenizer
from transformers import BertTokenizer, BertModel

ELECTRA_TOKENIZER = ElectraTokenizer.from_pretrained('google/electra-small-discriminator')
ELECTRA_BASE_TOKENIZER = ElectraTokenizer.from_pretrained('google/electra-base-discriminator')
BERT_TOKENIZER = BertTokenizer.from_pretrained('bert-base-uncased')

class Electra(nn.Module):
    def __init__(self, output_size, size = 'small', device='cpu'):
        super().__init__()
        self.device = device
        self.model = ElectraModel.from_pretrained(f'google/electra-{size}-discriminator').to(device)
        self.output = nn.Linear(self.model.config.hidden_size, output_size).to(device)

    # What happens when passing input into the model.
    def forward(self, sents, locs):
        # sents = torch.tensor(sents).to(self.device)
        # print(self.electra(sents))
        sents = self.model(sents)[0]
        abbs = torch.stack([sents[n, idx, :] for n, idx in enumerate(locs)])  # (B * M)
        return self.output(abbs)

class Bert(nn.Module):
    def __init__(self, output_size, device='cpu'):
        super().__init__()
        self.device = device
        self.model = BertModel.from_pretrained('bert-base-uncased').to(device)
        self.output = nn.Linear(self.model.config.hidden_size, output_size).to(device)

    # What happens when passing input into the model.
    def forward(self, sents, locs):
        # sents = torch.tensor(sents).to(self.device)
        # print(self.electra(sents))
        sents = self.model(sents)[0]
        abbs = torch.stack([sents[n, idx, :] for n, idx in enumerate(locs)])  # (B * M)
        return self.output(abbs)




In [None]:
class MedalDatasetTokenizer(torch.utils.data.Dataset):
    def __init__(self, df, tokenizer, dictionary_file, max_length=256, device='cpu'):
        self.tokenizer = tokenizer
        self.max_length = max_length
        self.device = device
        self.df = df
        label_df = pd.read_csv(dictionary_file, sep='\t', index_col = "EXPANSION")
        self.label_ser = label_df["LABEL"].squeeze()


    def __len__(self):
        return self.df.shape[0]

    def __getitem__(self, idxs):
        
        # Code to remove entries that are larger than the max length size
        # batch_df = self.df.iloc[idxs]
        # # ic(batch_df['TEXT'].apply(lambda string: len(string.split())))
        # filter = batch_df['TEXT'].apply(lambda string: len(string.split()) < self.max_length).to_list()
        # # ic(idxs, filter)
        # idxs = list(compress(idxs, filter))


        batch_df = self.df.iloc[idxs]
        locs = batch_df['LOCATION'].values
        label_strings = batch_df['LABEL'].values
        labels = self.label_ser[label_strings].to_numpy()

        # ic(batch_df['TEXT'].tolist())
        # ic(type(batch_df['TEXT'].tolist()[0]))
        batch_encode = self.tokenizer(batch_df['TEXT'].tolist(), max_length=self.max_length, \
                    padding=True, truncation = True)
        
        # ic(batch_encode)
        # ic(type(batch_encode))

        tokenized = batch_encode['input_ids']
        # decoded = self.tokenizer.batch_decode(tokenized)
        # ic(decoded, len(decoded[0].split()))
        # ic(len(tokenized[0]), len(tokenized[1]), type(tokenized))
        return torch.tensor(tokenized).to(self.device), torch.tensor(locs).to(self.device), torch.tensor(labels).to(self.device)


In [None]:
from torch.utils.data import DataLoader
from tqdm import tqdm
import numpy as np
from datetime import datetime
import os
from time import time 
import pandas as pd

In [None]:
def train_loop(train_data, model, loss_fn, optimizer, train_loader, max = -1):
    
    size = len(train_data)

    # Switches model to training mode.
    model.train()
    
    # List of all values for the loss. Output at the end.
    loss_list = []
    # For computing accuracy
    correct = 0

    for batch, idx in enumerate(tqdm(train_loader)):
    # for batch, idx in enumerate(train_loader):
        # print(idx)
        X = train_data[idx][0]
        loc = train_data[idx][1]
        y = train_data[idx][2]

        # Compute prediction and loss
        pred = model(X, loc)
        loss = loss_fn(pred, y)
        
        # Backpropagation
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # Record the loss and accuracy
        loss_value = loss.item()
        batch_correct = (pred.argmax(1) == y).type(torch.float).sum().item()
        correct += batch_correct
        loss_list.append(loss_value)

        # Terminate early for testing purposes
        if max > 0:
            if batch >= max:
                print("\nMax iterations reached.")
                break

        # Minibatch loss
        if batch % 20 == 0 and batch != 0:
            print(f"\nBatch loss: {loss_value:>7f}")
    
    loss_list = np.array(loss_list)
    mean_loss = np.mean(loss_list)
    accuracy = correct/size
    print(f"Accuracy: {accuracy:>3f} | Average Loss: {mean_loss:>7f}\n")
    return mean_loss, accuracy

# Tests the model on the validation data
def valid_loop(valid_data, model, loss_fn, valid_loader, max = -1):

    # Switches model to evaluation mode
    model.eval()

    size = len(valid_data)
    loss_list = [] 
    correct = 0

    with torch.no_grad():
        for batch, idx in tqdm(enumerate(valid_loader)):
        # for batch, idx in enumerate(valid_loader):
            # idx = torch.tensor([id])
            # print(id, idx)
            X = valid_data[idx][0]
            loc = valid_data[idx][1]
            y = valid_data[idx][2]
            pred = model(X, loc)
            loss_list.append(loss_fn(pred, y).item())
            correct += (pred.argmax(1) == y).type(torch.float).sum().item()

            if max > 0:
                if batch > max:
                    break

    valid_loss = np.mean(np.array(loss_list))
    correct /= size
    print(f"Validation| \nAccuracy: {correct:>3f} | Average loss: {valid_loss:>8f} \n")
    return valid_loss, correct

In [None]:
num_abbr = "two_abbr"
folder = "drive/MyDrive/Bootcamp"

In [None]:
# Save the model's state_dict in its current state. 
# Saved file name records current time and epoch number
def save_model(model, save_dir):
    now = datetime.now()
    time_formatted = now.strftime("%d")+"_"+now.strftime("%H")+"_"+now.strftime("%M")
    torch.save(model.state_dict(), save_dir + f"_{time_formatted}_StateDict.pt")
    print("Model saved\n")

In [None]:
if torch.cuda.is_available():  
    dev = "cuda:0" 
else:  
    dev = "cpu" 
device = torch.device(dev) 

# N_CPU_CORES = 2
# torch.set_num_threads(N_CPU_CORES)



max = -1

# Data
num_abbr = "two_abbr"
train_df = pd.read_csv(f"{folder}/{num_abbr}/train.csv")
dictionary_file = f"{folder}/{num_abbr}/dict.txt"
train_data = MedalDatasetTokenizer(train_df, tokenizer, dictionary_file, device = device)

valid_df = pd.read_csv(f"{folder}/{num_abbr}/valid.csv")
valid_data = MedalDatasetTokenizer(valid_df, tokenizer, dictionary_file, device = device)

# Hyperparameters
learning_rate = 4e-5
batch_size = 16
epochs = 15

### Models 
output_size = 25 # Should be set to the size of the dictionary

tokenizer = BERT_TOKENIZER
# model = Bert(output_size, device)

# tokenizer = ELECTRA_TOKENIZER
# tokenizer = ELECTRA_BASE_TOKENIZER
model = Electra(output_size=output_size, size = 'small', device=device)

### Load a saved model. The correct model above must be initialized.
#path = f"{folder}/saves/Finished/BERT_TwoAbbr_Epoch8.pt"
#model = torch.load(path)

optimizer = torch.optim.Adam(model.parameters(), lr = learning_rate)
loss_fn = nn.CrossEntropyLoss()


# Train the model
for t in range(epochs):
    print(f"\nEpoch {t+1}\n-------------------------------")
    
    train_loader = DataLoader(
        range(len(train_data)), 
        shuffle=True, 
        batch_size=batch_size
    )

    valid_loader = DataLoader(
        range(len(valid_data)), 
        shuffle=True, 
        batch_size=batch_size
    )

    start = time()
    train_loss, train_accuracy = train_loop(train_data, model, loss_fn, optimizer, train_loader, max = max)
    end = time()
    print(f"Training time: {end-start:>0.1f} sec\n")

    valid_loss, valid_accuracy = valid_loop(valid_data, model, loss_fn, valid_loader, max = max)

    with open(f"{folder}/saves/loss.txt", "a") as file:
        file.writelines(f"\n{t+1},{train_loss},{train_accuracy},{valid_loss},{valid_accuracy}")

    save_model(model, f"{folder}/saves/{num_abbr}_epoch{t+1}")

Some weights of the model checkpoint at google/electra-small-discriminator were not used when initializing ElectraModel: ['discriminator_predictions.dense.weight', 'discriminator_predictions.dense_prediction.weight', 'discriminator_predictions.dense_prediction.bias', 'discriminator_predictions.dense.bias']
- This IS expected if you are initializing ElectraModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing ElectraModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
  0%|          | 0/271 [00:00<?, ?it/s]


Epoch 1
-------------------------------


  8%|▊         | 21/271 [00:11<02:12,  1.88it/s]


Batch loss: 3.316675


 15%|█▌        | 41/271 [00:21<02:03,  1.86it/s]


Batch loss: 3.209951


 23%|██▎       | 61/271 [00:32<01:57,  1.79it/s]


Batch loss: 3.182309


 30%|██▉       | 81/271 [00:43<01:40,  1.89it/s]


Batch loss: 3.106736


 37%|███▋      | 101/271 [00:53<01:29,  1.89it/s]


Batch loss: 2.986698


 45%|████▍     | 121/271 [01:04<01:20,  1.87it/s]


Batch loss: 3.222370


 52%|█████▏    | 141/271 [01:14<01:07,  1.93it/s]


Batch loss: 3.355489


 59%|█████▉    | 161/271 [01:25<00:57,  1.91it/s]


Batch loss: 3.151188


 67%|██████▋   | 181/271 [01:36<00:48,  1.86it/s]


Batch loss: 3.098004


 74%|███████▍  | 201/271 [01:46<00:37,  1.88it/s]


Batch loss: 2.916601


 82%|████████▏ | 221/271 [01:57<00:26,  1.89it/s]


Batch loss: 2.874860


 89%|████████▉ | 241/271 [02:07<00:15,  1.88it/s]


Batch loss: 2.727791


 96%|█████████▋| 261/271 [02:18<00:05,  1.92it/s]


Batch loss: 2.936265


100%|██████████| 271/271 [02:23<00:00,  1.89it/s]
0it [00:00, ?it/s]

Accuracy: 0.103448 | Average Loss: 3.068583

Training time: 143.3 sec



89it [00:29,  3.02it/s]


Validation| 
Accuracy: 0.218440 | Average loss: 2.748866 



  0%|          | 0/271 [00:00<?, ?it/s]

Model saved


Epoch 2
-------------------------------


  8%|▊         | 21/271 [00:11<02:11,  1.90it/s]


Batch loss: 2.680961


 15%|█▌        | 41/271 [00:21<02:00,  1.91it/s]


Batch loss: 2.751211


 23%|██▎       | 61/271 [00:32<01:47,  1.95it/s]


Batch loss: 2.056321


 30%|██▉       | 81/271 [00:42<01:44,  1.82it/s]


Batch loss: 2.320968


 37%|███▋      | 101/271 [00:53<01:32,  1.84it/s]


Batch loss: 2.173233


 45%|████▍     | 121/271 [01:04<01:19,  1.88it/s]


Batch loss: 2.059298


 52%|█████▏    | 141/271 [01:15<01:09,  1.88it/s]


Batch loss: 2.507837


 59%|█████▉    | 161/271 [01:25<00:57,  1.90it/s]


Batch loss: 1.871620


 67%|██████▋   | 181/271 [01:36<00:46,  1.92it/s]


Batch loss: 1.907808


 74%|███████▍  | 201/271 [01:47<00:38,  1.84it/s]


Batch loss: 1.895721


 82%|████████▏ | 221/271 [01:58<00:26,  1.85it/s]


Batch loss: 1.688257


 89%|████████▉ | 241/271 [02:08<00:15,  1.95it/s]


Batch loss: 2.039112


 96%|█████████▋| 261/271 [02:19<00:05,  1.89it/s]


Batch loss: 1.880659


100%|██████████| 271/271 [02:24<00:00,  1.88it/s]
0it [00:00, ?it/s]

Accuracy: 0.383707 | Average Loss: 2.219148

Training time: 144.5 sec



89it [00:29,  2.97it/s]


Validation| 
Accuracy: 0.510638 | Average loss: 1.710829 



  0%|          | 0/271 [00:00<?, ?it/s]

Model saved


Epoch 3
-------------------------------


  8%|▊         | 21/271 [00:11<02:12,  1.89it/s]


Batch loss: 1.548982


 15%|█▌        | 41/271 [00:22<02:00,  1.91it/s]


Batch loss: 1.221999


 23%|██▎       | 61/271 [00:32<01:50,  1.90it/s]


Batch loss: 1.434161


 30%|██▉       | 81/271 [00:43<01:42,  1.85it/s]


Batch loss: 1.443007


 37%|███▋      | 101/271 [00:54<01:30,  1.88it/s]


Batch loss: 1.252715


 45%|████▍     | 121/271 [01:04<01:20,  1.87it/s]


Batch loss: 1.068679


 52%|█████▏    | 141/271 [01:15<01:08,  1.91it/s]


Batch loss: 1.556013


 59%|█████▉    | 161/271 [01:26<00:59,  1.86it/s]


Batch loss: 1.164331


 67%|██████▋   | 181/271 [01:37<00:50,  1.80it/s]


Batch loss: 1.366306


 74%|███████▍  | 201/271 [01:47<00:37,  1.85it/s]


Batch loss: 0.960295


 82%|████████▏ | 221/271 [01:58<00:26,  1.88it/s]


Batch loss: 1.369870


 89%|████████▉ | 241/271 [02:09<00:15,  1.88it/s]


Batch loss: 1.299167


 96%|█████████▋| 261/271 [02:20<00:05,  1.83it/s]


Batch loss: 1.027079


100%|██████████| 271/271 [02:25<00:00,  1.86it/s]
0it [00:00, ?it/s]

Accuracy: 0.619301 | Average Loss: 1.398892

Training time: 145.4 sec



89it [00:29,  2.98it/s]


Validation| 
Accuracy: 0.648936 | Average loss: 1.180418 



  0%|          | 0/271 [00:00<?, ?it/s]

Model saved


Epoch 4
-------------------------------


  8%|▊         | 21/271 [00:11<02:13,  1.87it/s]


Batch loss: 1.036129


 15%|█▌        | 41/271 [00:22<02:03,  1.86it/s]


Batch loss: 0.738744


 23%|██▎       | 61/271 [00:33<01:53,  1.85it/s]


Batch loss: 1.173003


 30%|██▉       | 81/271 [00:43<01:41,  1.88it/s]


Batch loss: 1.234691


 37%|███▋      | 101/271 [00:54<01:32,  1.84it/s]


Batch loss: 0.879656


 45%|████▍     | 121/271 [01:05<01:19,  1.89it/s]


Batch loss: 1.479029


 52%|█████▏    | 141/271 [01:16<01:11,  1.83it/s]


Batch loss: 1.353399


 59%|█████▉    | 161/271 [01:26<00:58,  1.90it/s]


Batch loss: 0.764790


 67%|██████▋   | 181/271 [01:37<00:48,  1.84it/s]


Batch loss: 0.973625


 74%|███████▍  | 201/271 [01:48<00:36,  1.90it/s]


Batch loss: 0.778133


 82%|████████▏ | 221/271 [01:58<00:25,  1.95it/s]


Batch loss: 0.910890


 89%|████████▉ | 241/271 [02:09<00:16,  1.87it/s]


Batch loss: 0.699733


 96%|█████████▋| 261/271 [02:20<00:05,  1.87it/s]


Batch loss: 0.709679


100%|██████████| 271/271 [02:25<00:00,  1.87it/s]
0it [00:00, ?it/s]

Accuracy: 0.734321 | Average Loss: 0.989171

Training time: 145.1 sec



89it [00:29,  3.03it/s]


Validation| 
Accuracy: 0.724823 | Average loss: 0.920022 



  0%|          | 0/271 [00:00<?, ?it/s]

Model saved


Epoch 5
-------------------------------


  8%|▊         | 21/271 [00:11<02:13,  1.87it/s]


Batch loss: 1.066071


 15%|█▌        | 41/271 [00:22<02:01,  1.90it/s]


Batch loss: 0.900566


 23%|██▎       | 61/271 [00:32<01:48,  1.93it/s]


Batch loss: 0.462323


 30%|██▉       | 81/271 [00:43<01:39,  1.90it/s]


Batch loss: 0.767311


 37%|███▋      | 101/271 [00:53<01:30,  1.88it/s]


Batch loss: 0.882168


 45%|████▍     | 121/271 [01:04<01:18,  1.92it/s]


Batch loss: 0.476189


 52%|█████▏    | 141/271 [01:14<01:10,  1.85it/s]


Batch loss: 0.953483


 59%|█████▉    | 161/271 [01:25<00:57,  1.91it/s]


Batch loss: 1.413320


 67%|██████▋   | 181/271 [01:35<00:47,  1.89it/s]


Batch loss: 0.624808


 74%|███████▍  | 201/271 [01:45<00:36,  1.91it/s]


Batch loss: 0.704657


 82%|████████▏ | 221/271 [01:56<00:25,  1.96it/s]


Batch loss: 0.544443


 89%|████████▉ | 241/271 [02:06<00:15,  1.94it/s]


Batch loss: 0.532923


 96%|█████████▋| 261/271 [02:17<00:05,  1.96it/s]


Batch loss: 0.583559


100%|██████████| 271/271 [02:21<00:00,  1.91it/s]
0it [00:00, ?it/s]

Accuracy: 0.809535 | Average Loss: 0.708362

Training time: 141.9 sec



89it [00:28,  3.13it/s]


Validation| 
Accuracy: 0.785816 | Average loss: 0.699816 



  0%|          | 0/271 [00:00<?, ?it/s]

Model saved


Epoch 6
-------------------------------


  8%|▊         | 21/271 [00:11<02:11,  1.90it/s]


Batch loss: 0.404163


 15%|█▌        | 41/271 [00:21<01:59,  1.93it/s]


Batch loss: 0.495398


 23%|██▎       | 61/271 [00:31<01:49,  1.91it/s]


Batch loss: 0.324185


 30%|██▉       | 81/271 [00:42<01:36,  1.96it/s]


Batch loss: 0.530171


 37%|███▋      | 101/271 [00:52<01:29,  1.90it/s]


Batch loss: 0.477407


 45%|████▍     | 121/271 [01:03<01:19,  1.89it/s]


Batch loss: 0.315491


 52%|█████▏    | 141/271 [01:13<01:06,  1.94it/s]


Batch loss: 0.569764


 59%|█████▉    | 161/271 [01:23<00:57,  1.91it/s]


Batch loss: 0.659653


 67%|██████▋   | 181/271 [01:34<00:46,  1.92it/s]


Batch loss: 0.694194


 74%|███████▍  | 201/271 [01:44<00:36,  1.94it/s]


Batch loss: 0.656626


 82%|████████▏ | 221/271 [01:54<00:26,  1.90it/s]


Batch loss: 0.538707


 89%|████████▉ | 241/271 [02:05<00:15,  1.96it/s]


Batch loss: 0.541560


 96%|█████████▋| 261/271 [02:15<00:05,  1.92it/s]


Batch loss: 0.770484


100%|██████████| 271/271 [02:20<00:00,  1.93it/s]
0it [00:00, ?it/s]

Accuracy: 0.858598 | Average Loss: 0.533286

Training time: 140.5 sec



89it [00:28,  3.16it/s]


Validation| 
Accuracy: 0.809220 | Average loss: 0.603367 



  0%|          | 0/271 [00:00<?, ?it/s]

Model saved


Epoch 7
-------------------------------


  8%|▊         | 21/271 [00:11<02:11,  1.90it/s]


Batch loss: 0.311199


 15%|█▌        | 41/271 [00:21<01:59,  1.92it/s]


Batch loss: 0.715249


 23%|██▎       | 61/271 [00:31<01:45,  1.98it/s]


Batch loss: 0.505921


 30%|██▉       | 81/271 [00:42<01:35,  1.98it/s]


Batch loss: 0.451979


 37%|███▋      | 101/271 [00:52<01:26,  1.96it/s]


Batch loss: 0.439361


 45%|████▍     | 121/271 [01:02<01:17,  1.93it/s]


Batch loss: 0.531023


 52%|█████▏    | 141/271 [01:12<01:08,  1.91it/s]


Batch loss: 0.252437


 59%|█████▉    | 161/271 [01:23<00:55,  2.00it/s]


Batch loss: 0.249036


 67%|██████▋   | 181/271 [01:33<00:46,  1.93it/s]


Batch loss: 0.264207


 74%|███████▍  | 201/271 [01:44<00:36,  1.91it/s]


Batch loss: 0.399537


 82%|████████▏ | 221/271 [01:54<00:25,  1.95it/s]


Batch loss: 0.680194


 89%|████████▉ | 241/271 [02:04<00:15,  1.92it/s]


Batch loss: 0.388533


 96%|█████████▋| 261/271 [02:15<00:05,  1.88it/s]


Batch loss: 0.147733


100%|██████████| 271/271 [02:20<00:00,  1.93it/s]
0it [00:00, ?it/s]

Accuracy: 0.878963 | Average Loss: 0.431386

Training time: 140.1 sec



89it [00:28,  3.13it/s]


Validation| 
Accuracy: 0.820567 | Average loss: 0.565737 



  0%|          | 0/271 [00:00<?, ?it/s]

Model saved


Epoch 8
-------------------------------


  8%|▊         | 21/271 [00:11<02:12,  1.89it/s]


Batch loss: 0.156401


 15%|█▌        | 41/271 [00:21<02:03,  1.86it/s]


Batch loss: 0.220656


 23%|██▎       | 61/271 [00:31<01:45,  1.99it/s]


Batch loss: 0.214086


 30%|██▉       | 81/271 [00:42<01:36,  1.97it/s]


Batch loss: 0.241881


 37%|███▋      | 101/271 [00:52<01:25,  1.98it/s]


Batch loss: 0.278031


 45%|████▍     | 121/271 [01:02<01:14,  2.02it/s]


Batch loss: 0.699552


 52%|█████▏    | 141/271 [01:12<01:08,  1.89it/s]


Batch loss: 0.173785


 59%|█████▉    | 161/271 [01:23<00:56,  1.94it/s]


Batch loss: 0.196016


 67%|██████▋   | 181/271 [01:33<00:46,  1.92it/s]


Batch loss: 0.156062


 74%|███████▍  | 201/271 [01:44<00:35,  1.96it/s]


Batch loss: 0.278502


 82%|████████▏ | 221/271 [01:54<00:25,  1.96it/s]


Batch loss: 0.489513


 89%|████████▉ | 241/271 [02:04<00:15,  1.93it/s]


Batch loss: 0.399361


 96%|█████████▋| 261/271 [02:15<00:05,  1.97it/s]


Batch loss: 0.174789


100%|██████████| 271/271 [02:19<00:00,  1.94it/s]
0it [00:00, ?it/s]

Accuracy: 0.900255 | Average Loss: 0.346762

Training time: 139.8 sec



89it [00:27,  3.20it/s]


Validation| 
Accuracy: 0.825532 | Average loss: 0.564165 



  0%|          | 0/271 [00:00<?, ?it/s]

Model saved


Epoch 9
-------------------------------


  8%|▊         | 21/271 [00:11<02:05,  1.99it/s]


Batch loss: 0.422853


 15%|█▌        | 41/271 [00:21<02:00,  1.91it/s]


Batch loss: 0.470206


 23%|██▎       | 61/271 [00:31<01:45,  1.98it/s]


Batch loss: 0.287862


 30%|██▉       | 81/271 [00:41<01:37,  1.94it/s]


Batch loss: 0.265546


 37%|███▋      | 101/271 [00:52<01:26,  1.96it/s]


Batch loss: 0.240593


 45%|████▍     | 121/271 [01:02<01:16,  1.96it/s]


Batch loss: 0.381315


 52%|█████▏    | 141/271 [01:12<01:07,  1.93it/s]


Batch loss: 0.395015


 59%|█████▉    | 161/271 [01:23<00:57,  1.91it/s]


Batch loss: 0.201274


 67%|██████▋   | 181/271 [01:33<00:45,  1.96it/s]


Batch loss: 0.180784


 74%|███████▍  | 201/271 [01:44<00:37,  1.88it/s]


Batch loss: 0.305480


 82%|████████▏ | 221/271 [01:54<00:26,  1.90it/s]


Batch loss: 0.186168


 89%|████████▉ | 241/271 [02:05<00:15,  1.92it/s]


Batch loss: 0.330500


 96%|█████████▋| 261/271 [02:15<00:05,  1.98it/s]


Batch loss: 0.178136


100%|██████████| 271/271 [02:19<00:00,  1.94it/s]
0it [00:00, ?it/s]

Accuracy: 0.898866 | Average Loss: 0.337770

Training time: 139.7 sec



89it [00:28,  3.10it/s]


Validation| 
Accuracy: 0.825532 | Average loss: 0.535783 



  0%|          | 0/271 [00:00<?, ?it/s]

Model saved


Epoch 10
-------------------------------


  8%|▊         | 21/271 [00:11<02:13,  1.87it/s]


Batch loss: 0.168528


 15%|█▌        | 41/271 [00:21<01:59,  1.93it/s]


Batch loss: 0.318833


 23%|██▎       | 61/271 [00:31<01:48,  1.94it/s]


Batch loss: 0.196145


 30%|██▉       | 81/271 [00:42<01:40,  1.90it/s]


Batch loss: 0.221891


 37%|███▋      | 101/271 [00:52<01:25,  1.98it/s]


Batch loss: 0.478945


 45%|████▍     | 121/271 [01:02<01:15,  1.97it/s]


Batch loss: 0.085633


 52%|█████▏    | 141/271 [01:13<01:08,  1.90it/s]


Batch loss: 0.345623


 59%|█████▉    | 161/271 [01:23<00:57,  1.93it/s]


Batch loss: 0.232022


 67%|██████▋   | 181/271 [01:33<00:47,  1.88it/s]


Batch loss: 0.109503


 74%|███████▍  | 201/271 [01:44<00:36,  1.93it/s]


Batch loss: 0.081754


 82%|████████▏ | 221/271 [01:54<00:26,  1.91it/s]


Batch loss: 0.362622


 89%|████████▉ | 241/271 [02:05<00:15,  1.90it/s]


Batch loss: 0.175513


 96%|█████████▋| 261/271 [02:15<00:05,  1.90it/s]


Batch loss: 0.264297


100%|██████████| 271/271 [02:20<00:00,  1.93it/s]
0it [00:00, ?it/s]

Accuracy: 0.928026 | Average Loss: 0.236551

Training time: 140.2 sec



89it [00:28,  3.15it/s]


Validation| 
Accuracy: 0.830496 | Average loss: 0.529013 



  0%|          | 0/271 [00:00<?, ?it/s]

Model saved


Epoch 11
-------------------------------


  8%|▊         | 21/271 [00:11<02:13,  1.87it/s]


Batch loss: 0.153109


 15%|█▌        | 41/271 [00:21<02:01,  1.90it/s]


Batch loss: 0.163488


 23%|██▎       | 61/271 [00:32<01:46,  1.97it/s]


Batch loss: 0.358695


 30%|██▉       | 81/271 [00:42<01:37,  1.94it/s]


Batch loss: 0.235993


 37%|███▋      | 101/271 [00:52<01:29,  1.89it/s]


Batch loss: 0.218272


 45%|████▍     | 121/271 [01:03<01:15,  1.98it/s]


Batch loss: 0.331826


 52%|█████▏    | 141/271 [01:13<01:07,  1.94it/s]


Batch loss: 0.232839


 59%|█████▉    | 161/271 [01:24<00:57,  1.92it/s]


Batch loss: 0.229420


 67%|██████▋   | 181/271 [01:34<00:45,  1.96it/s]


Batch loss: 0.220101


 74%|███████▍  | 201/271 [01:44<00:36,  1.93it/s]


Batch loss: 0.089465


 82%|████████▏ | 221/271 [01:55<00:26,  1.89it/s]


Batch loss: 0.063082


 89%|████████▉ | 241/271 [02:05<00:15,  1.94it/s]


Batch loss: 0.070616


 96%|█████████▋| 261/271 [02:16<00:05,  1.90it/s]


Batch loss: 0.195583


100%|██████████| 271/271 [02:21<00:00,  1.92it/s]
0it [00:00, ?it/s]

Accuracy: 0.936126 | Average Loss: 0.203626

Training time: 141.0 sec



89it [00:27,  3.21it/s]


Validation| 
Accuracy: 0.843972 | Average loss: 0.507432 



  0%|          | 0/271 [00:00<?, ?it/s]

Model saved


Epoch 12
-------------------------------


  8%|▊         | 21/271 [00:11<02:12,  1.89it/s]


Batch loss: 0.139116


 15%|█▌        | 41/271 [00:21<01:56,  1.97it/s]


Batch loss: 0.231439


 23%|██▎       | 61/271 [00:31<01:50,  1.90it/s]


Batch loss: 0.074023


 30%|██▉       | 81/271 [00:42<01:36,  1.98it/s]


Batch loss: 0.184569


 37%|███▋      | 101/271 [00:52<01:28,  1.92it/s]


Batch loss: 0.047464


 45%|████▍     | 121/271 [01:03<01:17,  1.93it/s]


Batch loss: 0.150521


 52%|█████▏    | 141/271 [01:13<01:07,  1.94it/s]


Batch loss: 0.101123


 59%|█████▉    | 161/271 [01:24<00:56,  1.94it/s]


Batch loss: 0.255401


 67%|██████▋   | 181/271 [01:34<00:47,  1.91it/s]


Batch loss: 0.085913


 74%|███████▍  | 201/271 [01:45<00:37,  1.84it/s]


Batch loss: 0.231738


 82%|████████▏ | 221/271 [01:55<00:25,  1.98it/s]


Batch loss: 0.155118


 89%|████████▉ | 241/271 [02:05<00:15,  1.93it/s]


Batch loss: 0.350716


 96%|█████████▋| 261/271 [02:16<00:05,  1.93it/s]


Batch loss: 0.127959


100%|██████████| 271/271 [02:21<00:00,  1.92it/s]
0it [00:00, ?it/s]

Accuracy: 0.944457 | Average Loss: 0.185640

Training time: 141.2 sec



89it [00:28,  3.16it/s]


Validation| 
Accuracy: 0.833333 | Average loss: 0.578073 



  0%|          | 0/271 [00:00<?, ?it/s]

Model saved


Epoch 13
-------------------------------


  8%|▊         | 21/271 [00:11<02:12,  1.88it/s]


Batch loss: 0.118706


 15%|█▌        | 41/271 [00:21<02:01,  1.89it/s]


Batch loss: 0.236919


 23%|██▎       | 61/271 [00:31<01:46,  1.96it/s]


Batch loss: 0.058333


 30%|██▉       | 81/271 [00:42<01:40,  1.89it/s]


Batch loss: 0.142686


 37%|███▋      | 101/271 [00:52<01:30,  1.88it/s]


Batch loss: 0.087969


 45%|████▍     | 121/271 [01:03<01:21,  1.85it/s]


Batch loss: 0.042907


 52%|█████▏    | 141/271 [01:13<01:08,  1.89it/s]


Batch loss: 0.321643


 59%|█████▉    | 161/271 [01:24<00:58,  1.88it/s]


Batch loss: 0.218697


 67%|██████▋   | 181/271 [01:34<00:45,  1.98it/s]


Batch loss: 0.510103


 74%|███████▍  | 201/271 [01:45<00:35,  1.94it/s]


Batch loss: 0.274774


 82%|████████▏ | 221/271 [01:55<00:25,  1.97it/s]


Batch loss: 0.149375


 89%|████████▉ | 241/271 [02:05<00:15,  1.91it/s]


Batch loss: 0.179435


 96%|█████████▋| 261/271 [02:16<00:05,  1.92it/s]


Batch loss: 0.058986


100%|██████████| 271/271 [02:21<00:00,  1.92it/s]
0it [00:00, ?it/s]

Accuracy: 0.945152 | Average Loss: 0.171151

Training time: 141.1 sec



89it [00:28,  3.08it/s]


Validation| 
Accuracy: 0.851773 | Average loss: 0.509286 



  0%|          | 0/271 [00:00<?, ?it/s]

Model saved


Epoch 14
-------------------------------


  8%|▊         | 21/271 [00:11<02:09,  1.93it/s]


Batch loss: 0.131341


 15%|█▌        | 41/271 [00:21<01:59,  1.92it/s]


Batch loss: 0.080656


 23%|██▎       | 61/271 [00:31<01:48,  1.94it/s]


Batch loss: 0.052505


 30%|██▉       | 81/271 [00:42<01:37,  1.94it/s]


Batch loss: 0.054230


 37%|███▋      | 101/271 [00:52<01:29,  1.90it/s]


Batch loss: 0.409438


 45%|████▍     | 121/271 [01:03<01:19,  1.89it/s]


Batch loss: 0.204820


 52%|█████▏    | 141/271 [01:13<01:09,  1.87it/s]


Batch loss: 0.133093


 59%|█████▉    | 161/271 [01:24<00:57,  1.90it/s]


Batch loss: 0.036316


 67%|██████▋   | 181/271 [01:34<00:47,  1.89it/s]


Batch loss: 0.239916


 74%|███████▍  | 201/271 [01:45<00:37,  1.87it/s]


Batch loss: 0.070382


 82%|████████▏ | 221/271 [01:55<00:26,  1.89it/s]


Batch loss: 0.187570


 89%|████████▉ | 241/271 [02:06<00:15,  1.93it/s]


Batch loss: 0.074272


 96%|█████████▋| 261/271 [02:16<00:05,  1.94it/s]


Batch loss: 0.052778


100%|██████████| 271/271 [02:21<00:00,  1.91it/s]
0it [00:00, ?it/s]

Accuracy: 0.953483 | Average Loss: 0.142216

Training time: 141.6 sec



89it [00:28,  3.10it/s]


Validation| 
Accuracy: 0.850355 | Average loss: 0.525161 



  0%|          | 0/271 [00:00<?, ?it/s]

Model saved


Epoch 15
-------------------------------


  8%|▊         | 21/271 [00:10<02:07,  1.97it/s]


Batch loss: 0.135405


 15%|█▌        | 41/271 [00:21<02:00,  1.91it/s]


Batch loss: 0.058578


 23%|██▎       | 61/271 [00:31<01:48,  1.94it/s]


Batch loss: 0.209884


 30%|██▉       | 81/271 [00:42<01:38,  1.92it/s]


Batch loss: 0.060957


 37%|███▋      | 101/271 [00:52<01:28,  1.92it/s]


Batch loss: 0.058514


 45%|████▍     | 121/271 [01:03<01:18,  1.92it/s]


Batch loss: 0.154392


 52%|█████▏    | 141/271 [01:13<01:07,  1.93it/s]


Batch loss: 0.022605


 59%|█████▉    | 161/271 [01:24<00:59,  1.86it/s]


Batch loss: 0.090222


 67%|██████▋   | 181/271 [01:34<00:46,  1.93it/s]


Batch loss: 0.090156


 74%|███████▍  | 201/271 [01:45<00:35,  1.96it/s]


Batch loss: 0.235534


 82%|████████▏ | 221/271 [01:56<00:27,  1.83it/s]


Batch loss: 0.081376


 89%|████████▉ | 241/271 [02:06<00:15,  1.91it/s]


Batch loss: 0.103751


 96%|█████████▋| 261/271 [02:16<00:05,  1.91it/s]


Batch loss: 0.031581


100%|██████████| 271/271 [02:21<00:00,  1.91it/s]
0it [00:00, ?it/s]

Accuracy: 0.956492 | Average Loss: 0.137436

Training time: 141.8 sec



89it [00:28,  3.09it/s]


Validation| 
Accuracy: 0.848227 | Average loss: 0.558989 

Model saved

