In [1]:
import numpy as np
import torch

from learners import Learner, GEM, AGEM, ER

In [2]:
seed = 42
n_tasks = 5

### Download MNIST

In [3]:
# Copyright 2017-present, Facebook, Inc.
# All rights reserved.
#
# This source code is licensed under the license found in the
# LICENSE file in the root directory of this source tree.

import subprocess

mnist_path = "data/mnist.npz"

# URL from: https://github.com/fchollet/keras/blob/master/keras/datasets/mnist.py
if not os.path.exists(mnist_path):
    subprocess.call("wget https://s3.amazonaws.com/img-datasets/mnist.npz", shell=True)
    !mv mnist.npz data/

f = np.load('data/mnist.npz')
x_tr = torch.from_numpy(f['x_train'])
y_tr = torch.from_numpy(f['y_train']).long()
x_te = torch.from_numpy(f['x_test'])
y_te = torch.from_numpy(f['y_test']).long()
f.close()

torch.save((x_tr, y_tr), 'data/mnist_train.pt')
torch.save((x_te, y_te), 'data/mnist_test.pt')

### Preprocessing and Train/Test Split

In [4]:
torch.manual_seed(seed)

x_tr, y_tr = torch.load('data/mnist_train.pt') # 60000 samples
x_te, y_te = torch.load('data/mnist_test.pt') # 10000 samples

# reshape and normalize data
x_tr = x_tr.float().view(x_tr.size(0), -1) / 255.0
x_te = x_te.float().view(x_te.size(0), -1) / 255.0
y_tr = y_tr.view(-1).long()
y_te = y_te.view(-1).long()

# shuffle datasets
p_tr = torch.randperm(x_tr.size(0))
p_te = torch.randperm(x_te.size(0))

x_tr, y_tr = x_tr[p_tr], y_tr[p_tr]
x_te, y_te = x_te[p_te], y_te[p_te]

### Split MNIST

In [5]:
tr_task_size = 10000
te_task_size = 2000

tasks_tr = []
tasks_te = []

for t in range(n_tasks):
    tasks_tr.append([x_tr[t*tr_task_size:(t+1)*tr_task_size], y_tr[t*tr_task_size:(t+1)*tr_task_size]])
    tasks_te.append([x_te[t*te_task_size:(t+1)*te_task_size], y_te[t*te_task_size:(t+1)*te_task_size]])

torch.save([tasks_tr, tasks_te], 'data/mnist_splitted.pt')
torch.save([[x_tr[:(tr_task_size*n_tasks)], y_tr[:(tr_task_size*n_tasks)]],
            [x_te[:(te_task_size*n_tasks)], y_te[:(te_task_size*n_tasks)]]], 'data/mnist_all.pt')

### Skewed Split: For simulating training on unbalanced datasets

In [6]:
from collections import Counter

# probability for each class in each split
# each row correspond to a split. each column correspond to a class (0-9)
# a cell tells what percentage of data to get from a class, to include in a split
class_probs = [
    [0.6, 0.6, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1],
    [0.1, 0.1, 0.6, 0.6, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1],
    [0.1, 0.1, 0.1, 0.1, 0.6, 0.6, 0.1, 0.1, 0.1, 0.1],
    [0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.6, 0.6, 0.1, 0.1],
    [0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.6, 0.6],
]

def skewed_split(X, y, class_probs):
    '''
    '''
    count_dict = Counter(y.numpy()) # count_dict[class] = num_of_data_in_class
    indices_per_class = [(y==c).nonzero().squeeze() for c in range(len(count_dict))]
    # generate random indices TO INDEX THE ACTUAL INDICES for each class
    idxs = [torch.randperm(count_dict[i]) for i in range(len(count_dict))]
    for prob_set in class_probs:
        idxs_to_get = []
        for i in range(len(prob_set)):
            end_idx = int(prob_set[i]*count_dict[i])
            idxs_to_get.append(indices_per_class[i][idxs[i][:end_idx]])
            # update indices, we treat the idxs like a stack where we
            # remove indices we have already used
            idxs[i] = idxs[i][end_idx:]
        
        idxs_to_get = torch.cat(idxs_to_get)
        yield X[idxs_to_get], y[idxs_to_get]


# print(Counter(y_te.numpy()))        
# for new_x, new_y in skewed_split(x_te, y_te, class_probs):
#     print(Counter(new_y.numpy()))

### ML Model

In [7]:
import torch.nn as nn

class Classifier(nn.Module):
    def __init__(self, input_size, hidden_size, drop_prob, output_size):
        super(Classifier, self).__init__()
        
        self.relu = nn.ReLU()
        self.dropout = nn.Dropout(drop_prob)

        self.fc1 = nn.Linear(input_size, hidden_size)
        self.fc2 = nn.Linear(hidden_size, output_size)
        
    def forward(self, x):
        out = self.dropout(self.relu(self.fc1(x)))
        logits = self.fc2(out)
        
        return logits

In [8]:
# Hyper-parameters
input_size = 784
output_size = 10

hidden_size = 256
drop_prob = 0.8
num_epochs = 5
learning_rate = 0.001
batch_size = 128

In [9]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

### Non-continual Baseline

In [10]:
from tqdm import tqdm
import torch.nn.functional as f
from torch.utils.data import TensorDataset, DataLoader

np.random.seed(42)
torch.manual_seed(42)
if device.type == 'cuda':
    torch.cuda.manual_seed_all(42)

train_data = TensorDataset(x_tr, y_tr)
train_loader = DataLoader(train_data, shuffle=True, batch_size=batch_size)
test_data = TensorDataset(x_te, y_te)
test_loader = DataLoader(test_data, shuffle=True, batch_size=batch_size)

model = Classifier(input_size, hidden_size, drop_prob, output_size).to(device)
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

model.train()
for ep in tqdm(range(num_epochs)):
    for inputs, labels in train_loader:
        if device.type == 'cuda':
            inputs, labels = inputs.cuda(), labels.cuda()
    
        optimizer.zero_grad()

        out = model(inputs.float())
        loss = criterion(out, labels.long())
        loss.backward()

        optimizer.step()

model.eval()
val_loss = 0
corrects = 0
total = 0
for inputs, labels in test_loader:
    if device.type == 'cuda':
        inputs, labels = inputs.cuda(), labels.cuda()

    out = model(inputs.float())
    preds = torch.argmax(f.softmax(out, dim=-1), dim=-1).cpu().numpy()

    tmp_val_loss = criterion(out, labels.long())
    val_loss += tmp_val_loss.item()

    corrects += sum(preds == labels.cpu().numpy())
    total += len(preds)

print("Loss: {:.6f}, Acc: {:.6f}".format(val_loss/len(test_loader), (corrects/total)*100))

100%|██████████| 5/5 [00:13<00:00,  2.71s/it]

Loss: 0.129925, Acc: 96.160000





### Continual Baseline

In [11]:
from tqdm import tqdm
import torch.nn.functional as f
from torch.utils.data import TensorDataset, DataLoader

np.random.seed(42)
torch.manual_seed(42)
if device.type == 'cuda':
    torch.cuda.manual_seed_all(42)

all_trn_f1_mean = np.array([])
all_val_f1_mean = np.array([])

task_perm_final_accs = []

# constant validation data across tasks
test_data = TensorDataset(x_te, y_te)
test_loader = DataLoader(test_data, shuffle=True, batch_size=batch_size)

class_probs = [
    [0.025] * 10,
    [0.025] * 10,
    [0.025] * 10,
    [0.025] * 10,
    [0.025] * 10,
]

a = 0.9
class_probs[0][0], class_probs[0][1] = a, a
class_probs[1][2], class_probs[1][3] = a, a
class_probs[2][4], class_probs[2][5] = a, a
class_probs[3][6], class_probs[3][7] = a, a
class_probs[4][8], class_probs[4][9] = a, a

for t in range(n_tasks):
    
    # initialize models
    model = Classifier(input_size, hidden_size, drop_prob, output_size).to(device)
    criterion = nn.CrossEntropyLoss()
    learner = Learner(model, criterion, device=device)
    
    # task loop
    np.random.shuffle(class_probs)
    for T_x, T_y in skewed_split(x_tr, y_tr, class_probs):
        train_data = TensorDataset(T_x, T_y)
        train_loader = DataLoader(train_data, shuffle=True, batch_size=batch_size)

        learner.prepare(optimizer=torch.optim.Adam, lr=learning_rate)

        model.train()
        for ep in tqdm(range(num_epochs)):
            for inputs, labels in train_loader:
                if device.type == 'cuda':
                    inputs, labels = inputs.cuda(), labels.cuda()

                learner.run(inputs, labels)


        model.eval()
        val_loss = 0
        corrects = 0
        total = 0
        for inputs, labels in test_loader:
            if device.type == 'cuda':
                inputs, labels = inputs.cuda(), labels.cuda()

            out = model(inputs.float())
            preds = torch.argmax(f.softmax(out, dim=-1), dim=-1).cpu().numpy()

            tmp_val_loss = criterion(out, labels.long())
            val_loss += tmp_val_loss.item()

            corrects += sum(preds == labels.cpu().numpy())
            total += len(preds)

        print("Loss: {:.6f}, Acc: {:.6f}".format(val_loss/len(test_loader), (corrects/total)*100))

    task_perm_final_accs.append((corrects/total)*100) # save final accuracy in current task permutation
    
print("Final Accs: ", task_perm_final_accs, " Average Final Acc: ", np.array(task_perm_final_accs).mean())

100%|██████████| 5/5 [00:02<00:00,  1.98it/s]
  0%|          | 0/5 [00:00<?, ?it/s]

Loss: 0.870035, Acc: 72.390000


100%|██████████| 5/5 [00:02<00:00,  1.73it/s]
  0%|          | 0/5 [00:00<?, ?it/s]

Loss: 0.689178, Acc: 75.490000


100%|██████████| 5/5 [00:02<00:00,  1.88it/s]
  0%|          | 0/5 [00:00<?, ?it/s]

Loss: 0.538116, Acc: 82.050000


100%|██████████| 5/5 [00:03<00:00,  1.65it/s]
  0%|          | 0/5 [00:00<?, ?it/s]

Loss: 0.370541, Acc: 89.170000


100%|██████████| 5/5 [00:02<00:00,  1.97it/s]
  0%|          | 0/5 [00:00<?, ?it/s]

Loss: 0.407775, Acc: 88.600000


100%|██████████| 5/5 [00:02<00:00,  1.76it/s]
  0%|          | 0/5 [00:00<?, ?it/s]

Loss: 0.647544, Acc: 82.090000


100%|██████████| 5/5 [00:02<00:00,  1.78it/s]
  0%|          | 0/5 [00:00<?, ?it/s]

Loss: 0.723041, Acc: 74.550000


100%|██████████| 5/5 [00:02<00:00,  2.13it/s]
  0%|          | 0/5 [00:00<?, ?it/s]

Loss: 0.586379, Acc: 81.410000


100%|██████████| 5/5 [00:02<00:00,  1.80it/s]
  0%|          | 0/5 [00:00<?, ?it/s]

Loss: 0.433023, Acc: 86.390000


100%|██████████| 5/5 [00:02<00:00,  1.70it/s]
  0%|          | 0/5 [00:00<?, ?it/s]

Loss: 0.398603, Acc: 88.270000


100%|██████████| 5/5 [00:02<00:00,  2.05it/s]
  0%|          | 0/5 [00:00<?, ?it/s]

Loss: 1.026162, Acc: 62.520000


100%|██████████| 5/5 [00:02<00:00,  1.87it/s]
  0%|          | 0/5 [00:00<?, ?it/s]

Loss: 0.541745, Acc: 83.940000


100%|██████████| 5/5 [00:02<00:00,  1.80it/s]
  0%|          | 0/5 [00:00<?, ?it/s]

Loss: 0.557365, Acc: 82.540000


100%|██████████| 5/5 [00:02<00:00,  1.78it/s]
  0%|          | 0/5 [00:00<?, ?it/s]

Loss: 0.437216, Acc: 87.280000


100%|██████████| 5/5 [00:02<00:00,  1.85it/s]
  0%|          | 0/5 [00:00<?, ?it/s]

Loss: 0.466650, Acc: 84.510000


100%|██████████| 5/5 [00:02<00:00,  1.83it/s]
  0%|          | 0/5 [00:00<?, ?it/s]

Loss: 0.937706, Acc: 65.550000


100%|██████████| 5/5 [00:03<00:00,  1.67it/s]
  0%|          | 0/5 [00:00<?, ?it/s]

Loss: 0.456596, Acc: 86.460000


100%|██████████| 5/5 [00:02<00:00,  1.80it/s]
  0%|          | 0/5 [00:00<?, ?it/s]

Loss: 0.520393, Acc: 84.010000


100%|██████████| 5/5 [00:02<00:00,  1.78it/s]
  0%|          | 0/5 [00:00<?, ?it/s]

Loss: 0.446332, Acc: 86.510000


100%|██████████| 5/5 [00:02<00:00,  1.88it/s]
  0%|          | 0/5 [00:00<?, ?it/s]

Loss: 0.476800, Acc: 84.710000


100%|██████████| 5/5 [00:02<00:00,  1.79it/s]
  0%|          | 0/5 [00:00<?, ?it/s]

Loss: 0.966641, Acc: 65.320000


100%|██████████| 5/5 [00:02<00:00,  1.76it/s]
  0%|          | 0/5 [00:00<?, ?it/s]

Loss: 0.629363, Acc: 80.830000


100%|██████████| 5/5 [00:02<00:00,  1.71it/s]
  0%|          | 0/5 [00:00<?, ?it/s]

Loss: 0.509547, Acc: 85.270000


100%|██████████| 5/5 [00:03<00:00,  1.67it/s]
  0%|          | 0/5 [00:00<?, ?it/s]

Loss: 0.401000, Acc: 88.000000


100%|██████████| 5/5 [00:02<00:00,  1.90it/s]

Loss: 0.522265, Acc: 82.730000
Final Accs:  [88.6, 88.27000000000001, 84.50999999999999, 84.71, 82.73]  Average Final Acc:  85.764





### A-GEM

In [12]:
memory_capacity = 10240
task_memory_size = 2048
memory_sample_size = 64

In [13]:
from tqdm import tqdm
import torch.nn.functional as f
from torch.utils.data import TensorDataset, DataLoader

np.random.seed(42)
torch.manual_seed(42)
if device.type == 'cuda':
    torch.cuda.manual_seed_all(42)

all_trn_f1_mean = np.array([])
all_val_f1_mean = np.array([])

task_perm_final_accs = []

# constant validation data across tasks
test_data = TensorDataset(x_te, y_te)
test_loader = DataLoader(test_data, shuffle=True, batch_size=batch_size)

class_probs = [
    [0.025] * 10,
    [0.025] * 10,
    [0.025] * 10,
    [0.025] * 10,
    [0.025] * 10,
]

a = 0.9
class_probs[0][0], class_probs[0][1] = a, a
class_probs[1][2], class_probs[1][3] = a, a
class_probs[2][4], class_probs[2][5] = a, a
class_probs[3][6], class_probs[3][7] = a, a
class_probs[4][8], class_probs[4][9] = a, a

for t in range(n_tasks):
    
    # initialize models
    model = Classifier(input_size, hidden_size, drop_prob, output_size).to(device)
    criterion = nn.CrossEntropyLoss()
    learner = AGEM(model, criterion, device=device,
                   memory_capacity=memory_capacity, memory_sample_sz=memory_sample_size)
    
    # task loop
    np.random.shuffle(class_probs)
    for T_x, T_y in skewed_split(x_tr, y_tr, class_probs):
        
        train_data = TensorDataset(T_x, T_y)
        train_loader = DataLoader(train_data, shuffle=True, batch_size=batch_size)

        learner.prepare(optimizer=torch.optim.Adam, lr=learning_rate)

        model.train()
        for ep in tqdm(range(num_epochs)):
            for inputs, labels in train_loader:
                if device.type == 'cuda':
                    inputs, labels = inputs.cuda(), labels.cuda()

                learner.run(inputs, labels)

        # remember a subset
        learner.remember(train_data, min_save_sz=task_memory_size)
                
        model.eval()
        val_loss = 0
        corrects = 0
        total = 0
        for inputs, labels in test_loader:
            if device.type == 'cuda':
                inputs, labels = inputs.cuda(), labels.cuda()

            out = model(inputs.float())
            preds = torch.argmax(f.softmax(out, dim=-1), dim=-1).cpu().numpy()

            tmp_val_loss = criterion(out, labels.long())
            val_loss += tmp_val_loss.item()

            corrects += sum(preds == labels.cpu().numpy())
            total += len(preds)

        print("Loss: {:.6f}, Acc: {:.6f}".format(val_loss/len(test_loader), (corrects/total)*100))

    task_perm_final_accs.append((corrects/total)*100) # save final accuracy in current task permutation
    
print("Final Accs: ", task_perm_final_accs, " Average Final Acc: ", np.array(task_perm_final_accs).mean())

100%|██████████| 5/5 [00:02<00:00,  1.81it/s]
  0%|          | 0/5 [00:00<?, ?it/s]

Loss: 0.866402, Acc: 72.390000


100%|██████████| 5/5 [00:03<00:00,  1.35it/s]
  0%|          | 0/5 [00:00<?, ?it/s]

Loss: 0.684069, Acc: 76.460000


100%|██████████| 5/5 [00:04<00:00,  1.22it/s]
  0%|          | 0/5 [00:00<?, ?it/s]

Loss: 0.536516, Acc: 82.120000


100%|██████████| 5/5 [00:04<00:00,  1.25it/s]
  0%|          | 0/5 [00:00<?, ?it/s]

Loss: 0.347333, Acc: 89.980000


100%|██████████| 5/5 [00:03<00:00,  1.29it/s]
  0%|          | 0/5 [00:00<?, ?it/s]

Loss: 0.366177, Acc: 89.570000


100%|██████████| 5/5 [00:02<00:00,  1.79it/s]
  0%|          | 0/5 [00:00<?, ?it/s]

Loss: 0.686351, Acc: 80.190000


100%|██████████| 5/5 [00:04<00:00,  1.19it/s]
  0%|          | 0/5 [00:00<?, ?it/s]

Loss: 0.703333, Acc: 75.260000


100%|██████████| 5/5 [00:04<00:00,  1.21it/s]
  0%|          | 0/5 [00:00<?, ?it/s]

Loss: 0.510532, Acc: 83.310000


100%|██████████| 5/5 [00:04<00:00,  1.03it/s]
  0%|          | 0/5 [00:00<?, ?it/s]

Loss: 0.442791, Acc: 85.820000


100%|██████████| 5/5 [00:04<00:00,  1.11it/s]
  0%|          | 0/5 [00:00<?, ?it/s]

Loss: 0.359244, Acc: 89.610000


100%|██████████| 5/5 [00:02<00:00,  1.91it/s]
  0%|          | 0/5 [00:00<?, ?it/s]

Loss: 1.047842, Acc: 62.300000


100%|██████████| 5/5 [00:04<00:00,  1.11it/s]
  0%|          | 0/5 [00:00<?, ?it/s]

Loss: 0.508270, Acc: 85.240000


100%|██████████| 5/5 [00:03<00:00,  1.27it/s]
  0%|          | 0/5 [00:00<?, ?it/s]

Loss: 0.507403, Acc: 84.420000


100%|██████████| 5/5 [00:04<00:00,  1.11it/s]
  0%|          | 0/5 [00:00<?, ?it/s]

Loss: 0.442829, Acc: 86.990000


100%|██████████| 5/5 [00:04<00:00,  1.18it/s]
  0%|          | 0/5 [00:00<?, ?it/s]

Loss: 0.436878, Acc: 85.190000


100%|██████████| 5/5 [00:02<00:00,  2.19it/s]
  0%|          | 0/5 [00:00<?, ?it/s]

Loss: 0.991985, Acc: 63.740000


100%|██████████| 5/5 [00:03<00:00,  1.26it/s]
  0%|          | 0/5 [00:00<?, ?it/s]

Loss: 0.505864, Acc: 84.990000


100%|██████████| 5/5 [00:03<00:00,  1.31it/s]
  0%|          | 0/5 [00:00<?, ?it/s]

Loss: 0.544707, Acc: 83.490000


100%|██████████| 5/5 [00:04<00:00,  1.15it/s]
  0%|          | 0/5 [00:00<?, ?it/s]

Loss: 0.418356, Acc: 87.690000


100%|██████████| 5/5 [00:04<00:00,  1.17it/s]
  0%|          | 0/5 [00:00<?, ?it/s]

Loss: 0.474081, Acc: 84.230000


100%|██████████| 5/5 [00:02<00:00,  1.77it/s]
  0%|          | 0/5 [00:00<?, ?it/s]

Loss: 1.046776, Acc: 61.720000


100%|██████████| 5/5 [00:05<00:00,  1.00it/s]
  0%|          | 0/5 [00:00<?, ?it/s]

Loss: 0.554103, Acc: 82.430000


100%|██████████| 5/5 [00:05<00:00,  1.08s/it]
  0%|          | 0/5 [00:00<?, ?it/s]

Loss: 0.450923, Acc: 86.940000


100%|██████████| 5/5 [00:05<00:00,  1.10s/it]
  0%|          | 0/5 [00:00<?, ?it/s]

Loss: 0.385926, Acc: 88.800000


100%|██████████| 5/5 [00:04<00:00,  1.12it/s]

Loss: 0.463445, Acc: 84.170000
Final Accs:  [89.57000000000001, 89.61, 85.19, 84.23, 84.17]  Average Final Acc:  86.554





### Experience Replay

In [None]:
memory_capacity = 10240
task_memory_size = 2048
memory_sample_size = 64

In [14]:
from tqdm import tqdm
import torch.nn.functional as f
from torch.utils.data import TensorDataset, DataLoader

np.random.seed(42)
torch.manual_seed(42)
if device.type == 'cuda':
    torch.cuda.manual_seed_all(42)

all_trn_f1_mean = np.array([])
all_val_f1_mean = np.array([])

task_perm_final_accs = []

# constant validation data across tasks
test_data = TensorDataset(x_te, y_te)
test_loader = DataLoader(test_data, shuffle=True, batch_size=batch_size)

class_probs = [
    [0.025] * 10,
    [0.025] * 10,
    [0.025] * 10,
    [0.025] * 10,
    [0.025] * 10,
]

a = 0.9
class_probs[0][0], class_probs[0][1] = a, a
class_probs[1][2], class_probs[1][3] = a, a
class_probs[2][4], class_probs[2][5] = a, a
class_probs[3][6], class_probs[3][7] = a, a
class_probs[4][8], class_probs[4][9] = a, a

for t in range(n_tasks):
    
    # initialize models
    model = Classifier(input_size, hidden_size, drop_prob, output_size).to(device)
    criterion = nn.CrossEntropyLoss()
    learner = ER(model, criterion, device=device,
                 memory_capacity=memory_capacity, memory_sample_sz=memory_sample_size)
    
    # task loop
    np.random.shuffle(class_probs)
    for T_x, T_y in skewed_split(x_tr, y_tr, class_probs):
        
        train_data = TensorDataset(T_x, T_y)
        train_loader = DataLoader(train_data, shuffle=True, batch_size=batch_size)

        learner.prepare(optimizer=torch.optim.Adam, lr=learning_rate)

        model.train()
        for ep in tqdm(range(num_epochs)):
            for inputs, labels in train_loader:
                if device.type == 'cuda':
                    inputs, labels = inputs.cuda(), labels.cuda()

                learner.run(inputs, labels)

        # remember a subset
        learner.remember(train_data, min_save_sz=task_memory_size)
                
        model.eval()
        val_loss = 0
        corrects = 0
        total = 0
        for inputs, labels in test_loader:
            if device.type == 'cuda':
                inputs, labels = inputs.cuda(), labels.cuda()

            out = model(inputs.float())
            preds = torch.argmax(f.softmax(out, dim=-1), dim=-1).cpu().numpy()

            tmp_val_loss = criterion(out, labels.long())
            val_loss += tmp_val_loss.item()

            corrects += sum(preds == labels.cpu().numpy())
            total += len(preds)

        print("Loss: {:.6f}, Acc: {:.6f}".format(val_loss/len(test_loader), (corrects/total)*100))

    task_perm_final_accs.append((corrects/total)*100) # save final accuracy in current task permutation
    
print("Final Accs: ", task_perm_final_accs, " Average Final Acc: ", np.array(task_perm_final_accs).mean())

100%|██████████| 5/5 [00:02<00:00,  1.72it/s]
  0%|          | 0/5 [00:00<?, ?it/s]

Loss: 0.866402, Acc: 72.390000


100%|██████████| 5/5 [00:03<00:00,  1.32it/s]
  0%|          | 0/5 [00:00<?, ?it/s]

Loss: 0.585152, Acc: 79.640000


100%|██████████| 5/5 [00:03<00:00,  1.42it/s]
  0%|          | 0/5 [00:00<?, ?it/s]

Loss: 0.320832, Acc: 90.290000


100%|██████████| 5/5 [00:04<00:00,  1.16it/s]
  0%|          | 0/5 [00:00<?, ?it/s]

Loss: 0.234624, Acc: 92.890000


100%|██████████| 5/5 [00:04<00:00,  1.22it/s]
  0%|          | 0/5 [00:00<?, ?it/s]

Loss: 0.186608, Acc: 94.400000


100%|██████████| 5/5 [00:02<00:00,  1.72it/s]
  0%|          | 0/5 [00:00<?, ?it/s]

Loss: 0.702959, Acc: 80.470000


100%|██████████| 5/5 [00:03<00:00,  1.32it/s]
  0%|          | 0/5 [00:00<?, ?it/s]

Loss: 0.645184, Acc: 77.040000


100%|██████████| 5/5 [00:03<00:00,  1.36it/s]
  0%|          | 0/5 [00:00<?, ?it/s]

Loss: 0.430667, Acc: 86.510000


100%|██████████| 5/5 [00:03<00:00,  1.27it/s]
  0%|          | 0/5 [00:00<?, ?it/s]

Loss: 0.280535, Acc: 91.560000


100%|██████████| 5/5 [00:03<00:00,  1.45it/s]
  0%|          | 0/5 [00:00<?, ?it/s]

Loss: 0.190151, Acc: 94.450000


100%|██████████| 5/5 [00:02<00:00,  2.03it/s]
  0%|          | 0/5 [00:00<?, ?it/s]

Loss: 1.072220, Acc: 61.240000


100%|██████████| 5/5 [00:03<00:00,  1.30it/s]
  0%|          | 0/5 [00:00<?, ?it/s]

Loss: 0.634849, Acc: 79.110000


100%|██████████| 5/5 [00:03<00:00,  1.32it/s]
  0%|          | 0/5 [00:00<?, ?it/s]

Loss: 0.467497, Acc: 85.600000


100%|██████████| 5/5 [00:03<00:00,  1.45it/s]
  0%|          | 0/5 [00:00<?, ?it/s]

Loss: 0.293324, Acc: 90.950000


100%|██████████| 5/5 [00:03<00:00,  1.36it/s]
  0%|          | 0/5 [00:00<?, ?it/s]

Loss: 0.239104, Acc: 92.240000


100%|██████████| 5/5 [00:02<00:00,  1.83it/s]
  0%|          | 0/5 [00:00<?, ?it/s]

Loss: 0.994789, Acc: 65.180000


100%|██████████| 5/5 [00:04<00:00,  1.18it/s]
  0%|          | 0/5 [00:00<?, ?it/s]

Loss: 0.504372, Acc: 84.120000


100%|██████████| 5/5 [00:04<00:00,  1.18it/s]
  0%|          | 0/5 [00:00<?, ?it/s]

Loss: 0.405203, Acc: 87.360000


100%|██████████| 5/5 [00:03<00:00,  1.30it/s]
  0%|          | 0/5 [00:00<?, ?it/s]

Loss: 0.315597, Acc: 90.660000


100%|██████████| 5/5 [00:04<00:00,  1.20it/s]
  0%|          | 0/5 [00:00<?, ?it/s]

Loss: 0.216350, Acc: 93.240000


100%|██████████| 5/5 [00:02<00:00,  1.81it/s]
  0%|          | 0/5 [00:00<?, ?it/s]

Loss: 0.997628, Acc: 63.310000


100%|██████████| 5/5 [00:04<00:00,  1.24it/s]
  0%|          | 0/5 [00:00<?, ?it/s]

Loss: 0.544294, Acc: 81.980000


100%|██████████| 5/5 [00:04<00:00,  1.22it/s]
  0%|          | 0/5 [00:00<?, ?it/s]

Loss: 0.402807, Acc: 87.980000


100%|██████████| 5/5 [00:04<00:00,  1.18it/s]
  0%|          | 0/5 [00:00<?, ?it/s]

Loss: 0.279255, Acc: 91.770000


100%|██████████| 5/5 [00:03<00:00,  1.32it/s]

Loss: 0.228038, Acc: 92.710000
Final Accs:  [94.39999999999999, 94.45, 92.24, 93.24, 92.71000000000001]  Average Final Acc:  93.40799999999999



