
**Install requirements**

In [0]:
#!pip3 install 'torch==1.3.1'
#!pip3 install 'torchvision==0.4.2'
#!pip3 install 'Pillow-SIMD'
#!pip3 install 'tqdm'



**Import libraries**

In [0]:
import os
import logging

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Subset, DataLoader
from torch.backends import cudnn

import torchvision
from torchvision import transforms 
from torchvision.models import alexnet
from torchvision.models import vgg11
from torchvision.models import resnet18


import torch.utils.data as data
import torchvision.datasets

from PIL import Image
from tqdm import tqdm

import matplotlib.pyplot as plt


**Set Arguments**

In [0]:
DEVICE = 'cuda' # 'cuda' or 'cpu'

NUM_CLASSES = 101 # 101 + 1: There is am extra Background class that should be removed 

BATCH_SIZE =256    # Higher batch sizes allows for larger learning rates. An empirical heuristic suggests that, when changing
                     # the batch size, learning rate should change by the same factor to have comparable results

LR = 1e-3           # The initial Learning Rate
MOMENTUM = 0.9       # Hyperparameter for SGD, keep this at 0.9 when using SGD
WEIGHT_DECAY = 5e-5  # Regularization, you can keep this at the default

NUM_EPOCHS = 30     # Total number of training epochs (iterations over dataset)
STEP_SIZE = 20       # How many epochs before decreasing learning rate (if using a step-down policy)
GAMMA = 0.1          # Multiplicative factor for learning rate step-down

LOG_FREQUENCY = 10

**Define Data Preprocessing**

In [0]:
# Define transforms for training phase

train_transform = transforms.Compose([transforms.Resize(256),      # Resizes short size of the PIL image to 256
                                       #transforms.RandomHorizontalFlip(),  
                                       #transforms.RandomRotation(10),  
                                       #transforms.RandomGrayscale(0.5),
                                       #transforms.ColorJitter(),
                                      
                                       transforms.CenterCrop(224),  # Crops a central square patch of the image
                                                                   # 224 because torchvision's AlexNet needs a 224x224 input!
                                                                   # Remember this when applying different transformations, otherwise you get an error
                                      transforms.ToTensor(), # Turn PIL Image to torch.Tensor
                                      #transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)) # Normalizes tensor with mean and standard deviation
                                      transforms.Normalize ((0.485, 0.456, 0.406),(0.229, 0.224, 0.225))#Using mean and std of imageNet
])
# Define transforms for the evaluation phase
eval_transform = transforms.Compose([transforms.Resize(256),
                                      transforms.CenterCrop(224),
                                      transforms.ToTensor(),
                                      #transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
                                      transforms.Normalize((0.485, 0.456, 0.406),(0.229, 0.224, 0.225))#Using mean and std of imageNet                                   
])


def pil_loader(path):
    # open path as file to avoid ResourceWarning (https://github.com/python-pillow/Pillow/issues/835)
    with open(path, 'rb') as f:
        img = Image.open(f)
        return img.convert('RGB')



class Caltech():
    def __init__(self, root, split, transform=None, target_transform=None):
        #super(Caltech, self).__init__(root, transform=transform, target_transform=target_transform)
        self.transform = transform
        self.root = root
        self.split = split # This defines the split you are going to use
                           # (split files are called 'train.txt' and 'test.txt')

        #Apro il file per lo split dei dati,rimuovo da queste BACKGROUND_Google,applico split seguendo il file di split
        #mi salvo in lines i path relativi di  tutte le immagini e in target_map il mapping tra target scritto e target numerico comodo per modelli
       

        target_lines = [l.split("/")[0] for l in self.split]
        targets = []
        for i in range(len(target_lines)):
            if i == 0 or target_lines[i - 1] != target_lines[i]:
                targets.append(target_lines[i])
        self.map_target = {targets[i] : i  for i in range(len(targets))}




        #Using RAM to go faster

        self.image = []
        self.label = []



        '''
        - Here you should implement the logic for reading the splits files and accessing elements
        - If the RAM size allows it, it is faster to store all data in memory
        - PyTorch Dataset classes use indexes to read elements
        - You should provide a way for the __getitem__ method to access the image-label pair
          through the index
        - Labels should start from 0, so for Caltech you will have lables 0...100 (excluding the background class) 
        '''

    def __getitem__(self, index):
        '''
        __getitem__ should access an element through its index
        Args:
            index (int): Index

        Returns:
            tuple: (sample, target) where target is class_index of the target class.
        '''
        # Provide a way to access image and label via index
        # Image should be a PIL Image
        # label can be int

        image, label = pil_loader(self.root+"/"+self.split[index]),self.map_target[self.split[index].split("/")[0]]
        # Applies preprocessing when accessing the image
        if self.transform is not None:
            image = self.transform(image)
        return image, label

    def __len__(self):
        '''
        The __len__ method returns the length of the dataset
        It is mandatory, as this is used by several other components
        '''
        length = len(self.split)
        return length

**Prepare Dataset**

In [47]:
# Clone github repository with data
if not os.path.isdir('./Homework2-Caltech101'):
  !git clone https://github.com/MachineLearning2020/Homework2-Caltech101.git

DATA_DIR = 'Homework2-Caltech101/101_ObjectCategories'
f1 = open('Homework2-Caltech101/train.txt','r')
f2 = open('Homework2-Caltech101/test.txt','r')

train_split = f1.readlines()
test_split = f2.readlines()

train_split = [l.replace('\n', '') for l in train_split if l.startswith("BACKGROUND_Google") == False]
test_split = [l.replace('\n', '') for l in test_split if l.startswith("BACKGROUND_Google") == False]


#Use this code to class balance
validation_split = []
train2_split = []
j = 0
for i in range(len(train_split)):
  j = (j + 1) % 2
  if i != 0 and train_split[i].split("/")[0] != train_split[i - 1].split("/")[0]:
    j = 0
  if j > 0:
    train2_split.append(train_split[i])
  else:
    validation_split.append(train_split[i])

#train_split = [train_split[i] for i in range(len(train_split)) if i % 3 > 0]
#validation_split = [train_split[i] for i in range(len(train_split)) if i % 3 == 0]


# Prepare Pytorch train/test Datasets
train_dataset = Caltech(DATA_DIR,train2_split,train_transform)
validation_dataset = Caltech(DATA_DIR,validation_split,eval_transform)
test_dataset = Caltech(DATA_DIR,test_split,eval_transform)






# Check dataset sizes
print('Train Dataset: {}'.format(len(train_dataset)))
print('Validation Dataset: {}'.format(len(validation_dataset)))
print('Test Dataset: {}'.format(len(test_dataset)))

Train Dataset: 2868
Validation Dataset: 2916
Test Dataset: 2893


**Prepare Dataloaders**

In [0]:
# Dataloaders iterate over pytorch datasets and transparently provide useful functions (e.g. parallelization and shuffling)
train_dataloader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=4, drop_last=True)
validation_dataloader = DataLoader(validation_dataset,batch_size=BATCH_SIZE,shuffle=False,num_workers=4)
test_dataloader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=4)

**Prepare Network**

In [0]:
#net = alexnet() 
net = alexnet(pretrained=True) # Loading AlexNet model
#net =  vgg11(pretrained = True)
#net = resnet18(pretrained = True)


# AlexNet has 1000 output neurons, corresponding to the 1000 ImageNet's classes
# We need 101 outputs for Caltech-101
net.classifier[6] = nn.Linear(4096, NUM_CLASSES) # nn.Linear in pytorch is a fully connected layer
                                                 # The convolutional layer is nn.Conv2d
#net.fc = nn.Linear(512 * torchvision.models.resnet.BasicBlock.expansion,NUM_CLASSES)     #change num_classes for resnet                                        


# We just changed the last layer of AlexNet with a new fully connected layer with 101 outputs
# It is mandatory to study torchvision.models.alexnet source code

**Prepare Training**

In [0]:
# Define loss function
criterion = nn.CrossEntropyLoss() # for classification, we use Cross Entropy

# Choose parameters to optimize
# To access a different set of parameters, you have to access submodules of AlexNet
# (nn.Module objects, like AlexNet, implement the Composite Pattern)
# e.g.: parameters of the fully connected layers: net.classifier.parameters()
# e.g.: parameters of the convolutional layers: look at alexnet's source code ;) 

parameters_to_optimize = net.parameters() # In this case we optimize over all the parameters of AlexNet
#parameters_to_optimize = net.classifier.parameters()#freeze conv layer
#parameters_to_optimize = net.features.parameters()#freeze fully layer 

# Define optimizer
# An optimizer updates the weights based on loss
# We use SGD with momentum
optimizer = optim.SGD(parameters_to_optimize, lr=LR, momentum=MOMENTUM, weight_decay=WEIGHT_DECAY)

# Define scheduler
# A scheduler dynamically changes learning rate
# The most common schedule is the step(-down), which multiplies learning rate by gamma every STEP_SIZE epochs
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=STEP_SIZE, gamma=GAMMA)

**Train**

In [51]:
# By default, everything is loaded to cpu
net = net.to(DEVICE) # this will bring the network to GPU if DEVICE is cuda
bestNet = net
maxAccuracy = 0
losses = []
accuracies = []
train_accuracies = []


cudnn.benchmark # Calling this optimizes runtime

current_step = 0
# Start iterating over the epochs


for epoch in range(NUM_EPOCHS):
  print('Starting epoch {}/{}, LR = {}'.format(epoch+1, NUM_EPOCHS, scheduler.get_lr()))

  # Iterate over the dataset
  for images, labels in train_dataloader:
    # Bring data over the device of choice
    
    images = images.to(DEVICE)
    labels = labels.to(DEVICE)

    net.train() # Sets module in training mode

    # PyTorch, by default, accumulates gradients after each backward pass
    # We need to manually set the gradients to zero before starting a new iteration
    optimizer.zero_grad() # Zero-ing the gradients

    

    # Forward pass to the network
    outputs = net(images)

    # Compute loss based on output and ground truth
    loss = criterion(outputs, labels)
    


    # Log loss
    if current_step % LOG_FREQUENCY == 0:
      print('Step {}, Loss {}'.format(current_step, loss.item()))

    # Compute gradients for each layer and update weights
    loss.backward()  # backward pass: computes gradients
    optimizer.step() # update weights based on accumulated gradients

    current_step += 1
  


  #validation phase
  net.train(False)
  running_corrects = 0
  for images, labels in tqdm(validation_dataloader):
    images = images.to(DEVICE)
    labels = labels.to(DEVICE)

    outputs = net(images)
    _, preds = torch.max(outputs.data, 1)

    running_corrects += torch.sum(preds == labels.data).data.item()
  
  accuracy = running_corrects / float(len(validation_dataset))
  if(maxAccuracy < accuracy):
    
    maxAccuracy = accuracy
    bestNet = net
    print('Best accuracy {}\n'.format(maxAccuracy))
  else:
    print('accuracy {}\n'.format(accuracy))
  
  #train accuracy
  running_corrects = 0
  for images, labels in tqdm(train_dataloader):
    images = images.to(DEVICE)
    labels = labels.to(DEVICE)

    outputs = net(images)
    _, preds = torch.max(outputs.data, 1)

    running_corrects += torch.sum(preds == labels.data).data.item()
  
  train_accuracy = running_corrects / float(len(train_dataset))

  train_accuracies.append(train_accuracy)
  accuracies.append(accuracy)
  losses.append(loss.item())


  # Step the scheduler
  scheduler.step() 


print("Loss = {}".format(losses))
print("Validation_accuracy = {} ".format(accuracies))
print("Train accuracy = {} ".format(train_accuracies))


Starting epoch 1/30, LR = [0.01]
Step 0, Loss 4.866646766662598



  0%|          | 0/12 [00:00<?, ?it/s][A

Step 10, Loss 1.7154978513717651



  8%|▊         | 1/12 [00:04<00:46,  4.26s/it][A
 17%|█▋        | 2/12 [00:04<00:30,  3.05s/it][A
 25%|██▌       | 3/12 [00:04<00:19,  2.19s/it][A
 33%|███▎      | 4/12 [00:04<00:12,  1.59s/it][A
 42%|████▏     | 5/12 [00:08<00:15,  2.19s/it][A
 50%|█████     | 6/12 [00:08<00:09,  1.58s/it][A
 58%|█████▊    | 7/12 [00:08<00:05,  1.16s/it][A
 67%|██████▋   | 8/12 [00:08<00:03,  1.16it/s][A
 75%|███████▌  | 9/12 [00:10<00:02,  1.03it/s][A
 92%|█████████▏| 11/12 [00:10<00:00,  1.41it/s][A
100%|██████████| 12/12 [00:10<00:00,  1.14it/s][A
  0%|          | 0/11 [00:00<?, ?it/s][A

Best accuracy 0.6532921810699589




  9%|▉         | 1/11 [00:03<00:39,  3.98s/it][A
 18%|█▊        | 2/11 [00:04<00:25,  2.86s/it][A
 27%|██▋       | 3/11 [00:04<00:16,  2.07s/it][A
 36%|███▋      | 4/11 [00:04<00:10,  1.52s/it][A
 45%|████▌     | 5/11 [00:07<00:11,  1.85s/it][A
 55%|█████▍    | 6/11 [00:07<00:06,  1.37s/it][A
 64%|██████▎   | 7/11 [00:07<00:04,  1.01s/it][A
 73%|███████▎  | 8/11 [00:07<00:02,  1.30it/s][A
 82%|████████▏ | 9/11 [00:09<00:02,  1.11s/it][A
100%|██████████| 11/11 [00:10<00:00,  1.25it/s][A
[A

Starting epoch 2/30, LR = [0.01]
Step 20, Loss 0.6231852173805237



  0%|          | 0/12 [00:00<?, ?it/s][A
  8%|▊         | 1/12 [00:04<00:52,  4.81s/it][A
 17%|█▋        | 2/12 [00:05<00:34,  3.44s/it][A
 25%|██▌       | 3/12 [00:05<00:22,  2.48s/it][A
 33%|███▎      | 4/12 [00:05<00:14,  1.80s/it][A
 42%|████▏     | 5/12 [00:09<00:16,  2.33s/it][A
 50%|█████     | 6/12 [00:09<00:10,  1.69s/it][A
 58%|█████▊    | 7/12 [00:09<00:06,  1.23s/it][A
 67%|██████▋   | 8/12 [00:09<00:03,  1.09it/s][A
 75%|███████▌  | 9/12 [00:10<00:02,  1.05it/s][A
 92%|█████████▏| 11/12 [00:10<00:00,  1.44it/s][A
100%|██████████| 12/12 [00:10<00:00,  1.09it/s][A
  0%|          | 0/11 [00:00<?, ?it/s][A

Best accuracy 0.8103566529492455




  9%|▉         | 1/11 [00:03<00:39,  3.94s/it][A
 18%|█▊        | 2/11 [00:04<00:25,  2.83s/it][A
 27%|██▋       | 3/11 [00:04<00:16,  2.04s/it][A
 36%|███▋      | 4/11 [00:04<00:10,  1.50s/it][A
 45%|████▌     | 5/11 [00:07<00:11,  1.93s/it][A
 55%|█████▍    | 6/11 [00:07<00:07,  1.44s/it][A
 64%|██████▎   | 7/11 [00:08<00:04,  1.07s/it][A
 73%|███████▎  | 8/11 [00:08<00:02,  1.25it/s][A
 82%|████████▏ | 9/11 [00:09<00:02,  1.06s/it][A
 91%|█████████ | 10/11 [00:10<00:00,  1.21it/s][A
100%|██████████| 11/11 [00:10<00:00,  1.06it/s][A

Starting epoch 3/30, LR = [0.01]
Step 30, Loss 0.3404132127761841



  0%|          | 0/12 [00:00<?, ?it/s][A
  8%|▊         | 1/12 [00:04<00:48,  4.43s/it][A
 17%|█▋        | 2/12 [00:04<00:31,  3.17s/it][A
 25%|██▌       | 3/12 [00:04<00:20,  2.29s/it][A
 33%|███▎      | 4/12 [00:05<00:13,  1.68s/it][A
 42%|████▏     | 5/12 [00:08<00:15,  2.19s/it][A
 50%|█████     | 6/12 [00:08<00:09,  1.57s/it][A
 58%|█████▊    | 7/12 [00:08<00:05,  1.15s/it][A
 67%|██████▋   | 8/12 [00:09<00:03,  1.17it/s][A
 75%|███████▌  | 9/12 [00:10<00:03,  1.03s/it][A
 92%|█████████▏| 11/12 [00:10<00:00,  1.34it/s][A
100%|██████████| 12/12 [00:10<00:00,  1.12it/s][A
  0%|          | 0/11 [00:00<?, ?it/s][A

Best accuracy 0.8261316872427984




  9%|▉         | 1/11 [00:04<00:41,  4.20s/it][A
 18%|█▊        | 2/11 [00:04<00:27,  3.01s/it][A
 27%|██▋       | 3/11 [00:04<00:17,  2.18s/it][A
 36%|███▋      | 4/11 [00:04<00:11,  1.58s/it][A
 45%|████▌     | 5/11 [00:07<00:11,  1.97s/it][A
 55%|█████▍    | 6/11 [00:07<00:07,  1.44s/it][A
 64%|██████▎   | 7/11 [00:08<00:04,  1.06s/it][A
 73%|███████▎  | 8/11 [00:08<00:02,  1.26it/s][A
 82%|████████▏ | 9/11 [00:10<00:02,  1.14s/it][A
100%|██████████| 11/11 [00:10<00:00,  1.21it/s][A
[A

Starting epoch 4/30, LR = [0.01]
Step 40, Loss 0.1603250801563263



  0%|          | 0/12 [00:00<?, ?it/s][A
  8%|▊         | 1/12 [00:04<00:50,  4.55s/it][A
 17%|█▋        | 2/12 [00:04<00:32,  3.25s/it][A
 25%|██▌       | 3/12 [00:05<00:21,  2.35s/it][A
 33%|███▎      | 4/12 [00:05<00:13,  1.72s/it][A
 42%|████▏     | 5/12 [00:09<00:16,  2.40s/it][A
 50%|█████     | 6/12 [00:09<00:10,  1.73s/it][A
 58%|█████▊    | 7/12 [00:09<00:06,  1.26s/it][A
 67%|██████▋   | 8/12 [00:09<00:03,  1.06it/s][A
 75%|███████▌  | 9/12 [00:10<00:02,  1.07it/s][A
 92%|█████████▏| 11/12 [00:10<00:00,  1.47it/s][A
100%|██████████| 12/12 [00:11<00:00,  1.09it/s][A
  0%|          | 0/11 [00:00<?, ?it/s][A

Best accuracy 0.8401920438957476




  9%|▉         | 1/11 [00:04<00:41,  4.12s/it][A
 18%|█▊        | 2/11 [00:04<00:26,  2.95s/it][A
 27%|██▋       | 3/11 [00:04<00:17,  2.13s/it][A
 36%|███▋      | 4/11 [00:04<00:10,  1.56s/it][A
 45%|████▌     | 5/11 [00:07<00:11,  1.97s/it][A
 55%|█████▍    | 6/11 [00:07<00:07,  1.43s/it][A
 64%|██████▎   | 7/11 [00:08<00:04,  1.06s/it][A
 73%|███████▎  | 8/11 [00:08<00:02,  1.26it/s][A
 82%|████████▏ | 9/11 [00:10<00:02,  1.11s/it][A
100%|██████████| 11/11 [00:10<00:00,  1.24it/s][A
[A

Starting epoch 5/30, LR = [0.01]
Step 50, Loss 0.1260709911584854



  0%|          | 0/12 [00:00<?, ?it/s][A
  8%|▊         | 1/12 [00:04<00:51,  4.69s/it][A
 17%|█▋        | 2/12 [00:04<00:33,  3.35s/it][A
 25%|██▌       | 3/12 [00:05<00:21,  2.41s/it][A
 33%|███▎      | 4/12 [00:05<00:13,  1.75s/it][A
 42%|████▏     | 5/12 [00:08<00:16,  2.29s/it][A
 50%|█████     | 6/12 [00:09<00:09,  1.65s/it][A
 58%|█████▊    | 7/12 [00:09<00:06,  1.21s/it][A
 67%|██████▋   | 8/12 [00:09<00:03,  1.11it/s][A
 75%|███████▌  | 9/12 [00:10<00:02,  1.07it/s][A
 92%|█████████▏| 11/12 [00:10<00:00,  1.47it/s][A
100%|██████████| 12/12 [00:10<00:00,  1.12it/s][A
  0%|          | 0/11 [00:00<?, ?it/s][A

Best accuracy 0.8453360768175583




  9%|▉         | 1/11 [00:04<00:40,  4.09s/it][A
 18%|█▊        | 2/11 [00:04<00:26,  2.91s/it][A
 27%|██▋       | 3/11 [00:04<00:16,  2.11s/it][A
 36%|███▋      | 4/11 [00:04<00:10,  1.55s/it][A
 45%|████▌     | 5/11 [00:07<00:12,  2.01s/it][A
 55%|█████▍    | 6/11 [00:07<00:07,  1.46s/it][A
 64%|██████▎   | 7/11 [00:08<00:04,  1.07s/it][A
 73%|███████▎  | 8/11 [00:08<00:02,  1.24it/s][A
 82%|████████▏ | 9/11 [00:10<00:02,  1.13s/it][A
100%|██████████| 11/11 [00:10<00:00,  1.23it/s][A
[A

Starting epoch 6/30, LR = [0.01]
Step 60, Loss 0.06620554625988007



  0%|          | 0/12 [00:00<?, ?it/s][A
  8%|▊         | 1/12 [00:04<00:51,  4.68s/it][A
 17%|█▋        | 2/12 [00:04<00:33,  3.33s/it][A
 25%|██▌       | 3/12 [00:05<00:21,  2.40s/it][A
 33%|███▎      | 4/12 [00:05<00:13,  1.74s/it][A
 42%|████▏     | 5/12 [00:08<00:16,  2.33s/it][A
 50%|█████     | 6/12 [00:09<00:10,  1.68s/it][A
 58%|█████▊    | 7/12 [00:09<00:06,  1.23s/it][A
 67%|██████▋   | 8/12 [00:09<00:03,  1.09it/s][A
 75%|███████▌  | 9/12 [00:10<00:02,  1.02it/s][A
 92%|█████████▏| 11/12 [00:10<00:00,  1.40it/s][A
100%|██████████| 12/12 [00:10<00:00,  1.09it/s][A
  0%|          | 0/11 [00:00<?, ?it/s][A

accuracy 0.8446502057613169




  9%|▉         | 1/11 [00:04<00:40,  4.05s/it][A
 18%|█▊        | 2/11 [00:04<00:26,  2.91s/it][A
 27%|██▋       | 3/11 [00:04<00:16,  2.11s/it][A
 36%|███▋      | 4/11 [00:04<00:10,  1.55s/it][A
 45%|████▌     | 5/11 [00:07<00:11,  1.98s/it][A
 55%|█████▍    | 6/11 [00:07<00:07,  1.44s/it][A
 64%|██████▎   | 7/11 [00:08<00:04,  1.06s/it][A
 73%|███████▎  | 8/11 [00:08<00:02,  1.26it/s][A
 82%|████████▏ | 9/11 [00:10<00:02,  1.18s/it][A
100%|██████████| 11/11 [00:10<00:00,  1.17it/s][A
[A

Starting epoch 7/30, LR = [0.01]
Step 70, Loss 0.05574095994234085



  0%|          | 0/12 [00:00<?, ?it/s][A
  8%|▊         | 1/12 [00:04<00:52,  4.76s/it][A
 17%|█▋        | 2/12 [00:04<00:33,  3.39s/it][A
 25%|██▌       | 3/12 [00:05<00:21,  2.43s/it][A
 33%|███▎      | 4/12 [00:05<00:14,  1.77s/it][A
 42%|████▏     | 5/12 [00:09<00:16,  2.42s/it][A
 50%|█████     | 6/12 [00:09<00:10,  1.75s/it][A
 58%|█████▊    | 7/12 [00:09<00:06,  1.27s/it][A
 67%|██████▋   | 8/12 [00:09<00:03,  1.08it/s][A
 75%|███████▌  | 9/12 [00:10<00:02,  1.13it/s][A
 92%|█████████▏| 11/12 [00:10<00:00,  1.55it/s][A
100%|██████████| 12/12 [00:10<00:00,  1.10it/s][A
  0%|          | 0/11 [00:00<?, ?it/s][A

Best accuracy 0.845679012345679




  9%|▉         | 1/11 [00:03<00:39,  3.98s/it][A
 18%|█▊        | 2/11 [00:04<00:25,  2.84s/it][A
 27%|██▋       | 3/11 [00:04<00:16,  2.06s/it][A
 36%|███▋      | 4/11 [00:04<00:10,  1.51s/it][A
 45%|████▌     | 5/11 [00:07<00:11,  1.96s/it][A
 55%|█████▍    | 6/11 [00:07<00:07,  1.43s/it][A
 64%|██████▎   | 7/11 [00:08<00:04,  1.05s/it][A
 73%|███████▎  | 8/11 [00:08<00:02,  1.26it/s][A
 82%|████████▏ | 9/11 [00:10<00:02,  1.17s/it][A
100%|██████████| 11/11 [00:10<00:00,  1.18it/s][A
[A

Starting epoch 8/30, LR = [0.01]
Step 80, Loss 0.02418368309736252



  0%|          | 0/12 [00:00<?, ?it/s][A
  8%|▊         | 1/12 [00:04<00:53,  4.84s/it][A
 17%|█▋        | 2/12 [00:05<00:34,  3.46s/it][A
 25%|██▌       | 3/12 [00:05<00:22,  2.49s/it][A
 33%|███▎      | 4/12 [00:05<00:14,  1.81s/it][A
 42%|████▏     | 5/12 [00:09<00:16,  2.41s/it][A
 50%|█████     | 6/12 [00:09<00:10,  1.74s/it][A
 58%|█████▊    | 7/12 [00:09<00:06,  1.27s/it][A
 67%|██████▋   | 8/12 [00:09<00:03,  1.08it/s][A
 75%|███████▌  | 9/12 [00:10<00:02,  1.03it/s][A
 92%|█████████▏| 11/12 [00:11<00:00,  1.42it/s][A
100%|██████████| 12/12 [00:11<00:00,  1.07it/s][A
  0%|          | 0/11 [00:00<?, ?it/s][A

accuracy 0.840877914951989




  9%|▉         | 1/11 [00:04<00:41,  4.16s/it][A
 18%|█▊        | 2/11 [00:04<00:26,  2.98s/it][A
 27%|██▋       | 3/11 [00:04<00:17,  2.16s/it][A
 36%|███▋      | 4/11 [00:04<00:11,  1.59s/it][A
 45%|████▌     | 5/11 [00:07<00:11,  1.91s/it][A
 55%|█████▍    | 6/11 [00:07<00:07,  1.40s/it][A
 64%|██████▎   | 7/11 [00:07<00:04,  1.04s/it][A
 73%|███████▎  | 8/11 [00:08<00:02,  1.28it/s][A
 82%|████████▏ | 9/11 [00:10<00:02,  1.13s/it][A
 91%|█████████ | 10/11 [00:10<00:00,  1.21it/s][A
100%|██████████| 11/11 [00:10<00:00,  1.58it/s][A
[A

Starting epoch 9/30, LR = [0.01]
Step 90, Loss 0.02551000751554966



  0%|          | 0/12 [00:00<?, ?it/s][A
  8%|▊         | 1/12 [00:04<00:52,  4.74s/it][A
 17%|█▋        | 2/12 [00:04<00:33,  3.38s/it][A
 25%|██▌       | 3/12 [00:05<00:21,  2.42s/it][A
 33%|███▎      | 4/12 [00:05<00:14,  1.76s/it][A
 42%|████▏     | 5/12 [00:09<00:16,  2.35s/it][A
 50%|█████     | 6/12 [00:09<00:10,  1.70s/it][A
 58%|█████▊    | 7/12 [00:09<00:06,  1.25s/it][A
 67%|██████▋   | 8/12 [00:09<00:03,  1.08it/s][A
 75%|███████▌  | 9/12 [00:10<00:02,  1.04it/s][A
 83%|████████▎ | 10/12 [00:10<00:01,  1.42it/s][A
100%|██████████| 12/12 [00:10<00:00,  1.95it/s][A
[A
  0%|          | 0/11 [00:00<?, ?it/s][A

accuracy 0.8412208504801097




  9%|▉         | 1/11 [00:03<00:39,  3.98s/it][A
 18%|█▊        | 2/11 [00:04<00:25,  2.86s/it][A
 27%|██▋       | 3/11 [00:04<00:16,  2.07s/it][A
 36%|███▋      | 4/11 [00:04<00:10,  1.52s/it][A
 45%|████▌     | 5/11 [00:07<00:11,  1.97s/it][A
 55%|█████▍    | 6/11 [00:07<00:07,  1.43s/it][A
 64%|██████▎   | 7/11 [00:08<00:04,  1.05s/it][A
 73%|███████▎  | 8/11 [00:08<00:02,  1.27it/s][A
 82%|████████▏ | 9/11 [00:10<00:02,  1.17s/it][A
100%|██████████| 11/11 [00:10<00:00,  1.18it/s][A
[A

Starting epoch 10/30, LR = [0.01]
Step 100, Loss 0.02783270925283432



  0%|          | 0/12 [00:00<?, ?it/s][A
  8%|▊         | 1/12 [00:04<00:49,  4.53s/it][A
 17%|█▋        | 2/12 [00:04<00:32,  3.22s/it][A
 25%|██▌       | 3/12 [00:04<00:20,  2.31s/it][A
 33%|███▎      | 4/12 [00:05<00:13,  1.67s/it][A
 42%|████▏     | 5/12 [00:09<00:16,  2.36s/it][A
 50%|█████     | 6/12 [00:09<00:10,  1.71s/it][A
 58%|█████▊    | 7/12 [00:09<00:06,  1.25s/it][A
 67%|██████▋   | 8/12 [00:09<00:03,  1.08it/s][A
 75%|███████▌  | 9/12 [00:10<00:02,  1.04it/s][A
 92%|█████████▏| 11/12 [00:10<00:00,  1.43it/s][A
100%|██████████| 12/12 [00:10<00:00,  1.10it/s][A
  0%|          | 0/11 [00:00<?, ?it/s][A

Best accuracy 0.848079561042524




  9%|▉         | 1/11 [00:04<00:40,  4.04s/it][A
 18%|█▊        | 2/11 [00:04<00:26,  2.89s/it][A
 27%|██▋       | 3/11 [00:04<00:16,  2.09s/it][A
 36%|███▋      | 4/11 [00:04<00:10,  1.52s/it][A
 45%|████▌     | 5/11 [00:07<00:11,  1.91s/it][A
 55%|█████▍    | 6/11 [00:07<00:07,  1.41s/it][A
 64%|██████▎   | 7/11 [00:07<00:04,  1.04s/it][A
 73%|███████▎  | 8/11 [00:08<00:02,  1.28it/s][A
 82%|████████▏ | 9/11 [00:10<00:02,  1.20s/it][A
100%|██████████| 11/11 [00:10<00:00,  1.15it/s][A
[A

Starting epoch 11/30, LR = [0.01]
Step 110, Loss 0.025963682681322098



  0%|          | 0/12 [00:00<?, ?it/s][A

Step 120, Loss 0.027139194309711456



  8%|▊         | 1/12 [00:04<00:49,  4.46s/it][A
 17%|█▋        | 2/12 [00:04<00:31,  3.19s/it][A
 25%|██▌       | 3/12 [00:04<00:20,  2.30s/it][A
 33%|███▎      | 4/12 [00:05<00:13,  1.66s/it][A
 42%|████▏     | 5/12 [00:08<00:16,  2.31s/it][A
 50%|█████     | 6/12 [00:09<00:09,  1.67s/it][A
 58%|█████▊    | 7/12 [00:09<00:06,  1.22s/it][A
 67%|██████▋   | 8/12 [00:09<00:03,  1.10it/s][A
 75%|███████▌  | 9/12 [00:10<00:02,  1.00it/s][A
 92%|█████████▏| 11/12 [00:10<00:00,  1.38it/s][A
100%|██████████| 12/12 [00:10<00:00,  1.09it/s][A
  0%|          | 0/11 [00:00<?, ?it/s][A

accuracy 0.8477366255144033




  9%|▉         | 1/11 [00:03<00:39,  3.98s/it][A
 18%|█▊        | 2/11 [00:04<00:25,  2.84s/it][A
 27%|██▋       | 3/11 [00:04<00:16,  2.07s/it][A
 36%|███▋      | 4/11 [00:04<00:10,  1.52s/it][A
 45%|████▌     | 5/11 [00:07<00:11,  1.96s/it][A
 55%|█████▍    | 6/11 [00:07<00:07,  1.43s/it][A
 64%|██████▎   | 7/11 [00:08<00:04,  1.05s/it][A
 73%|███████▎  | 8/11 [00:08<00:02,  1.27it/s][A
 82%|████████▏ | 9/11 [00:10<00:02,  1.17s/it][A
 91%|█████████ | 10/11 [00:10<00:00,  1.18it/s][A
100%|██████████| 11/11 [00:10<00:00,  1.05it/s][A

Starting epoch 12/30, LR = [0.01]
Step 130, Loss 0.025785792618989944



  0%|          | 0/12 [00:00<?, ?it/s][A
  8%|▊         | 1/12 [00:04<00:48,  4.42s/it][A
 17%|█▋        | 2/12 [00:04<00:31,  3.17s/it][A
 25%|██▌       | 3/12 [00:04<00:20,  2.27s/it][A
 33%|███▎      | 4/12 [00:05<00:13,  1.66s/it][A
 42%|████▏     | 5/12 [00:09<00:17,  2.47s/it][A
 50%|█████     | 6/12 [00:09<00:10,  1.79s/it][A
 58%|█████▊    | 7/12 [00:09<00:06,  1.32s/it][A
 67%|██████▋   | 8/12 [00:10<00:03,  1.02it/s][A
 75%|███████▌  | 9/12 [00:10<00:02,  1.09it/s][A
 92%|█████████▏| 11/12 [00:11<00:00,  1.49it/s][A
100%|██████████| 12/12 [00:11<00:00,  1.07it/s][A
  0%|          | 0/11 [00:00<?, ?it/s][A

accuracy 0.8463648834019204




  9%|▉         | 1/11 [00:04<00:40,  4.01s/it][A
 18%|█▊        | 2/11 [00:04<00:25,  2.87s/it][A
 27%|██▋       | 3/11 [00:04<00:16,  2.08s/it][A
 36%|███▋      | 4/11 [00:04<00:10,  1.53s/it][A
 45%|████▌     | 5/11 [00:07<00:11,  1.93s/it][A
 55%|█████▍    | 6/11 [00:07<00:07,  1.43s/it][A
 64%|██████▎   | 7/11 [00:08<00:04,  1.05s/it][A
 73%|███████▎  | 8/11 [00:08<00:02,  1.27it/s][A
 82%|████████▏ | 9/11 [00:10<00:02,  1.18s/it][A
100%|██████████| 11/11 [00:10<00:00,  1.17it/s][A
[A

Starting epoch 13/30, LR = [0.01]
Step 140, Loss 0.0183846615254879



  0%|          | 0/12 [00:00<?, ?it/s][A
  8%|▊         | 1/12 [00:04<00:52,  4.81s/it][A
 17%|█▋        | 2/12 [00:05<00:34,  3.44s/it][A
 25%|██▌       | 3/12 [00:05<00:22,  2.46s/it][A
 33%|███▎      | 4/12 [00:05<00:14,  1.80s/it][A
 42%|████▏     | 5/12 [00:09<00:16,  2.40s/it][A
 50%|█████     | 6/12 [00:09<00:10,  1.74s/it][A
 58%|█████▊    | 7/12 [00:09<00:06,  1.27s/it][A
 67%|██████▋   | 8/12 [00:09<00:03,  1.05it/s][A
 75%|███████▌  | 9/12 [00:10<00:02,  1.02it/s][A
 92%|█████████▏| 11/12 [00:11<00:00,  1.40it/s][A
100%|██████████| 12/12 [00:11<00:00,  1.07it/s][A
  0%|          | 0/11 [00:00<?, ?it/s][A

accuracy 0.8463648834019204




  9%|▉         | 1/11 [00:04<00:42,  4.28s/it][A
 18%|█▊        | 2/11 [00:04<00:27,  3.06s/it][A
 27%|██▋       | 3/11 [00:04<00:17,  2.20s/it][A
 36%|███▋      | 4/11 [00:04<00:11,  1.60s/it][A
 45%|████▌     | 5/11 [00:07<00:12,  2.03s/it][A
 55%|█████▍    | 6/11 [00:08<00:07,  1.48s/it][A
 64%|██████▎   | 7/11 [00:08<00:04,  1.09s/it][A
 73%|███████▎  | 8/11 [00:08<00:02,  1.23it/s][A
 82%|████████▏ | 9/11 [00:10<00:02,  1.12s/it][A
 91%|█████████ | 10/11 [00:10<00:00,  1.22it/s][A
100%|██████████| 11/11 [00:10<00:00,  1.04it/s][A

Starting epoch 14/30, LR = [0.01]
Step 150, Loss 0.021410446614027023



  0%|          | 0/12 [00:00<?, ?it/s][A
  8%|▊         | 1/12 [00:04<00:51,  4.70s/it][A
 17%|█▋        | 2/12 [00:04<00:33,  3.36s/it][A
 25%|██▌       | 3/12 [00:05<00:21,  2.41s/it][A
 33%|███▎      | 4/12 [00:05<00:14,  1.76s/it][A
 42%|████▏     | 5/12 [00:09<00:17,  2.44s/it][A
 50%|█████     | 6/12 [00:09<00:10,  1.76s/it][A
 58%|█████▊    | 7/12 [00:09<00:06,  1.28s/it][A
 67%|██████▋   | 8/12 [00:09<00:03,  1.06it/s][A
 75%|███████▌  | 9/12 [00:10<00:02,  1.07it/s][A
 92%|█████████▏| 11/12 [00:10<00:00,  1.46it/s][A
100%|██████████| 12/12 [00:11<00:00,  1.08it/s][A
  0%|          | 0/11 [00:00<?, ?it/s][A

Best accuracy 0.8494513031550068




  9%|▉         | 1/11 [00:03<00:39,  3.95s/it][A
 18%|█▊        | 2/11 [00:04<00:25,  2.84s/it][A
 27%|██▋       | 3/11 [00:04<00:16,  2.07s/it][A
 36%|███▋      | 4/11 [00:04<00:10,  1.50s/it][A
 45%|████▌     | 5/11 [00:07<00:11,  1.86s/it][A
 55%|█████▍    | 6/11 [00:07<00:06,  1.36s/it][A
 64%|██████▎   | 7/11 [00:07<00:04,  1.08s/it][A
 73%|███████▎  | 8/11 [00:08<00:02,  1.24it/s][A
 82%|████████▏ | 9/11 [00:09<00:02,  1.05s/it][A
 91%|█████████ | 10/11 [00:09<00:00,  1.29it/s][A
100%|██████████| 11/11 [00:10<00:00,  1.57it/s][A
[A

Starting epoch 15/30, LR = [0.01]
Step 160, Loss 0.0118942279368639



  0%|          | 0/12 [00:00<?, ?it/s][A
  8%|▊         | 1/12 [00:04<00:52,  4.79s/it][A
 17%|█▋        | 2/12 [00:04<00:34,  3.41s/it][A
 25%|██▌       | 3/12 [00:05<00:22,  2.46s/it][A
 33%|███▎      | 4/12 [00:05<00:14,  1.80s/it][A
 42%|████▏     | 5/12 [00:09<00:16,  2.42s/it][A
 50%|█████     | 6/12 [00:09<00:10,  1.74s/it][A
 58%|█████▊    | 7/12 [00:09<00:06,  1.27s/it][A
 67%|██████▋   | 8/12 [00:09<00:03,  1.06it/s][A
 75%|███████▌  | 9/12 [00:10<00:02,  1.07it/s][A
 92%|█████████▏| 11/12 [00:10<00:00,  1.47it/s][A
100%|██████████| 12/12 [00:11<00:00,  1.08it/s][A
  0%|          | 0/11 [00:00<?, ?it/s][A

accuracy 0.8477366255144033




  9%|▉         | 1/11 [00:04<00:41,  4.13s/it][A
 18%|█▊        | 2/11 [00:04<00:26,  2.96s/it][A
 27%|██▋       | 3/11 [00:04<00:17,  2.15s/it][A
 36%|███▋      | 4/11 [00:04<00:10,  1.56s/it][A
 45%|████▌     | 5/11 [00:07<00:11,  1.99s/it][A
 55%|█████▍    | 6/11 [00:07<00:07,  1.44s/it][A
 64%|██████▎   | 7/11 [00:08<00:04,  1.06s/it][A
 73%|███████▎  | 8/11 [00:08<00:02,  1.26it/s][A
 82%|████████▏ | 9/11 [00:10<00:02,  1.12s/it][A
100%|██████████| 11/11 [00:10<00:00,  1.23it/s][A
[A

Starting epoch 16/30, LR = [0.01]
Step 170, Loss 0.019450560212135315



  0%|          | 0/12 [00:00<?, ?it/s][A
  8%|▊         | 1/12 [00:04<00:53,  4.82s/it][A
 17%|█▋        | 2/12 [00:05<00:34,  3.45s/it][A
 25%|██▌       | 3/12 [00:05<00:22,  2.48s/it][A
 33%|███▎      | 4/12 [00:05<00:14,  1.79s/it][A
 42%|████▏     | 5/12 [00:09<00:17,  2.45s/it][A
 50%|█████     | 6/12 [00:09<00:10,  1.76s/it][A
 58%|█████▊    | 7/12 [00:09<00:06,  1.29s/it][A
 67%|██████▋   | 8/12 [00:09<00:03,  1.05it/s][A
 75%|███████▌  | 9/12 [00:10<00:02,  1.09it/s][A
 92%|█████████▏| 11/12 [00:10<00:00,  1.49it/s][A
100%|██████████| 12/12 [00:11<00:00,  1.08it/s][A
  0%|          | 0/11 [00:00<?, ?it/s][A

accuracy 0.8460219478737997




  9%|▉         | 1/11 [00:03<00:38,  3.90s/it][A
 18%|█▊        | 2/11 [00:04<00:25,  2.80s/it][A
 27%|██▋       | 3/11 [00:04<00:16,  2.03s/it][A
 36%|███▋      | 4/11 [00:04<00:10,  1.48s/it][A
 45%|████▌     | 5/11 [00:07<00:11,  1.94s/it][A
 55%|█████▍    | 6/11 [00:07<00:07,  1.44s/it][A
 64%|██████▎   | 7/11 [00:08<00:04,  1.06s/it][A
 73%|███████▎  | 8/11 [00:08<00:02,  1.26it/s][A
 82%|████████▏ | 9/11 [00:10<00:02,  1.12s/it][A
 91%|█████████ | 10/11 [00:10<00:00,  1.18it/s][A
100%|██████████| 11/11 [00:10<00:00,  1.05it/s][A

Starting epoch 17/30, LR = [0.01]
Step 180, Loss 0.017065927386283875



  0%|          | 0/12 [00:00<?, ?it/s][A
  8%|▊         | 1/12 [00:04<00:51,  4.64s/it][A
 17%|█▋        | 2/12 [00:04<00:33,  3.31s/it][A
 25%|██▌       | 3/12 [00:05<00:21,  2.38s/it][A
 33%|███▎      | 4/12 [00:05<00:13,  1.72s/it][A
 42%|████▏     | 5/12 [00:08<00:16,  2.32s/it][A
 50%|█████     | 6/12 [00:09<00:10,  1.68s/it][A
 58%|█████▊    | 7/12 [00:09<00:06,  1.23s/it][A
 67%|██████▋   | 8/12 [00:09<00:03,  1.10it/s][A
 75%|███████▌  | 9/12 [00:10<00:02,  1.01it/s][A
 92%|█████████▏| 11/12 [00:10<00:00,  1.39it/s][A
100%|██████████| 12/12 [00:10<00:00,  1.09it/s][A
  0%|          | 0/11 [00:00<?, ?it/s][A

accuracy 0.8487654320987654




  9%|▉         | 1/11 [00:04<00:40,  4.04s/it][A
 18%|█▊        | 2/11 [00:04<00:26,  2.90s/it][A
 27%|██▋       | 3/11 [00:04<00:16,  2.10s/it][A
 36%|███▋      | 4/11 [00:04<00:10,  1.54s/it][A
 45%|████▌     | 5/11 [00:07<00:11,  1.92s/it][A
 55%|█████▍    | 6/11 [00:07<00:06,  1.40s/it][A
 64%|██████▎   | 7/11 [00:07<00:04,  1.03s/it][A
 73%|███████▎  | 8/11 [00:08<00:02,  1.29it/s][A
 82%|████████▏ | 9/11 [00:10<00:02,  1.12s/it][A
 91%|█████████ | 10/11 [00:10<00:00,  1.21it/s][A
100%|██████████| 11/11 [00:10<00:00,  1.07it/s][A

Starting epoch 18/30, LR = [0.01]
Step 190, Loss 0.010159406810998917



  0%|          | 0/12 [00:00<?, ?it/s][A
  8%|▊         | 1/12 [00:04<00:51,  4.68s/it][A
 17%|█▋        | 2/12 [00:04<00:33,  3.35s/it][A
 25%|██▌       | 3/12 [00:05<00:21,  2.41s/it][A
 33%|███▎      | 4/12 [00:05<00:14,  1.75s/it][A
 42%|████▏     | 5/12 [00:09<00:16,  2.39s/it][A
 50%|█████     | 6/12 [00:09<00:10,  1.73s/it][A
 58%|█████▊    | 7/12 [00:09<00:06,  1.27s/it][A
 67%|██████▋   | 8/12 [00:09<00:03,  1.06it/s][A
 75%|███████▌  | 9/12 [00:10<00:02,  1.07it/s][A
 92%|█████████▏| 11/12 [00:10<00:00,  1.47it/s][A
100%|██████████| 12/12 [00:11<00:00,  1.09it/s][A
  0%|          | 0/11 [00:00<?, ?it/s][A

accuracy 0.8491083676268861




  9%|▉         | 1/11 [00:04<00:41,  4.13s/it][A
 18%|█▊        | 2/11 [00:04<00:27,  3.01s/it][A
 27%|██▋       | 3/11 [00:04<00:17,  2.16s/it][A
 36%|███▋      | 4/11 [00:04<00:11,  1.57s/it][A
 45%|████▌     | 5/11 [00:07<00:11,  1.94s/it][A
 55%|█████▍    | 6/11 [00:08<00:07,  1.47s/it][A
 64%|██████▎   | 7/11 [00:08<00:04,  1.08s/it][A
 73%|███████▎  | 8/11 [00:08<00:02,  1.24it/s][A
 82%|████████▏ | 9/11 [00:10<00:02,  1.04s/it][A
 91%|█████████ | 10/11 [00:10<00:00,  1.20it/s][A
100%|██████████| 11/11 [00:10<00:00,  1.05it/s][A

Starting epoch 19/30, LR = [0.01]
Step 200, Loss 0.010770853608846664



  0%|          | 0/12 [00:00<?, ?it/s][A
  8%|▊         | 1/12 [00:04<00:51,  4.71s/it][A
 17%|█▋        | 2/12 [00:04<00:33,  3.37s/it][A
 25%|██▌       | 3/12 [00:05<00:21,  2.43s/it][A
 33%|███▎      | 4/12 [00:05<00:14,  1.76s/it][A
 42%|████▏     | 5/12 [00:09<00:16,  2.39s/it][A
 50%|█████     | 6/12 [00:09<00:10,  1.72s/it][A
 58%|█████▊    | 7/12 [00:09<00:06,  1.26s/it][A
 67%|██████▋   | 8/12 [00:09<00:03,  1.07it/s][A
 75%|███████▌  | 9/12 [00:10<00:02,  1.07it/s][A
 92%|█████████▏| 11/12 [00:10<00:00,  1.47it/s][A
100%|██████████| 12/12 [00:10<00:00,  1.09it/s][A
  0%|          | 0/11 [00:00<?, ?it/s][A

Best accuracy 0.8511659807956105




  9%|▉         | 1/11 [00:03<00:39,  3.93s/it][A
 18%|█▊        | 2/11 [00:04<00:25,  2.83s/it][A
 27%|██▋       | 3/11 [00:04<00:16,  2.06s/it][A
 36%|███▋      | 4/11 [00:04<00:10,  1.51s/it][A
 45%|████▌     | 5/11 [00:07<00:11,  1.95s/it][A
 55%|█████▍    | 6/11 [00:07<00:07,  1.42s/it][A
 64%|██████▎   | 7/11 [00:08<00:04,  1.05s/it][A
 73%|███████▎  | 8/11 [00:08<00:02,  1.27it/s][A
 82%|████████▏ | 9/11 [00:10<00:02,  1.16s/it][A
100%|██████████| 11/11 [00:10<00:00,  1.19it/s][A
[A

Starting epoch 20/30, LR = [0.01]
Step 210, Loss 0.006424985826015472



  0%|          | 0/12 [00:00<?, ?it/s][A
  8%|▊         | 1/12 [00:04<00:51,  4.68s/it][A
 17%|█▋        | 2/12 [00:04<00:33,  3.35s/it][A
 25%|██▌       | 3/12 [00:05<00:21,  2.42s/it][A
 33%|███▎      | 4/12 [00:05<00:14,  1.76s/it][A
 42%|████▏     | 5/12 [00:09<00:16,  2.36s/it][A
 50%|█████     | 6/12 [00:09<00:10,  1.70s/it][A
 58%|█████▊    | 7/12 [00:09<00:06,  1.24s/it][A
 67%|██████▋   | 8/12 [00:09<00:03,  1.08it/s][A
 75%|███████▌  | 9/12 [00:10<00:02,  1.03it/s][A
 92%|█████████▏| 11/12 [00:10<00:00,  1.42it/s][A
100%|██████████| 12/12 [00:11<00:00,  1.08it/s][A
  0%|          | 0/11 [00:00<?, ?it/s][A

accuracy 0.8511659807956105




  9%|▉         | 1/11 [00:04<00:40,  4.01s/it][A
 18%|█▊        | 2/11 [00:04<00:25,  2.87s/it][A
 27%|██▋       | 3/11 [00:04<00:16,  2.08s/it][A
 36%|███▋      | 4/11 [00:04<00:10,  1.51s/it][A
 45%|████▌     | 5/11 [00:07<00:11,  1.93s/it][A
 55%|█████▍    | 6/11 [00:07<00:07,  1.41s/it][A
 64%|██████▎   | 7/11 [00:07<00:04,  1.04s/it][A
 73%|███████▎  | 8/11 [00:08<00:02,  1.28it/s][A
 82%|████████▏ | 9/11 [00:10<00:02,  1.16s/it][A
 91%|█████████ | 10/11 [00:10<00:00,  1.19it/s][A
100%|██████████| 11/11 [00:10<00:00,  1.05it/s][A

Starting epoch 21/30, LR = [0.001]
Step 220, Loss 0.005101710557937622



  0%|          | 0/12 [00:00<?, ?it/s][A

Step 230, Loss 0.037313226610422134



  8%|▊         | 1/12 [00:04<00:48,  4.41s/it][A
 17%|█▋        | 2/12 [00:04<00:31,  3.15s/it][A
 25%|██▌       | 3/12 [00:04<00:20,  2.28s/it][A
 33%|███▎      | 4/12 [00:05<00:13,  1.66s/it][A
 42%|████▏     | 5/12 [00:08<00:15,  2.27s/it][A
 50%|█████     | 6/12 [00:08<00:09,  1.65s/it][A
 58%|█████▊    | 7/12 [00:09<00:06,  1.21s/it][A
 67%|██████▋   | 8/12 [00:09<00:03,  1.11it/s][A
 75%|███████▌  | 9/12 [00:10<00:03,  1.02s/it][A
 92%|█████████▏| 11/12 [00:10<00:00,  1.35it/s][A
100%|██████████| 12/12 [00:10<00:00,  1.10it/s][A
  0%|          | 0/11 [00:00<?, ?it/s][A

accuracy 0.8511659807956105




  9%|▉         | 1/11 [00:03<00:39,  3.99s/it][A
 18%|█▊        | 2/11 [00:04<00:25,  2.86s/it][A
 27%|██▋       | 3/11 [00:04<00:16,  2.07s/it][A
 36%|███▋      | 4/11 [00:04<00:10,  1.52s/it][A
 45%|████▌     | 5/11 [00:07<00:11,  1.96s/it][A
 55%|█████▍    | 6/11 [00:07<00:07,  1.43s/it][A
 64%|██████▎   | 7/11 [00:08<00:04,  1.05s/it][A
 73%|███████▎  | 8/11 [00:08<00:02,  1.26it/s][A
 82%|████████▏ | 9/11 [00:10<00:02,  1.12s/it][A
100%|██████████| 11/11 [00:10<00:00,  1.23it/s][A
[A

Starting epoch 22/30, LR = [0.001]
Step 240, Loss 0.006269175559282303



  0%|          | 0/12 [00:00<?, ?it/s][A
  8%|▊         | 1/12 [00:04<00:51,  4.73s/it][A
 17%|█▋        | 2/12 [00:04<00:33,  3.38s/it][A
 25%|██▌       | 3/12 [00:05<00:21,  2.42s/it][A
 33%|███▎      | 4/12 [00:05<00:14,  1.77s/it][A
 42%|████▏     | 5/12 [00:09<00:16,  2.40s/it][A
 50%|█████     | 6/12 [00:09<00:10,  1.73s/it][A
 58%|█████▊    | 7/12 [00:09<00:06,  1.27s/it][A
 67%|██████▋   | 8/12 [00:09<00:03,  1.06it/s][A
 75%|███████▌  | 9/12 [00:10<00:02,  1.07it/s][A
 92%|█████████▏| 11/12 [00:10<00:00,  1.47it/s][A
100%|██████████| 12/12 [00:11<00:00,  1.09it/s][A
  0%|          | 0/11 [00:00<?, ?it/s][A

accuracy 0.8511659807956105




  9%|▉         | 1/11 [00:04<00:42,  4.24s/it][A
 18%|█▊        | 2/11 [00:04<00:27,  3.02s/it][A
 27%|██▋       | 3/11 [00:04<00:17,  2.19s/it][A
 36%|███▋      | 4/11 [00:04<00:11,  1.60s/it][A
 45%|████▌     | 5/11 [00:07<00:12,  2.02s/it][A
 55%|█████▍    | 6/11 [00:08<00:07,  1.47s/it][A
 64%|██████▎   | 7/11 [00:08<00:04,  1.08s/it][A
 73%|███████▎  | 8/11 [00:08<00:02,  1.24it/s][A
 82%|████████▏ | 9/11 [00:10<00:02,  1.12s/it][A
100%|██████████| 11/11 [00:10<00:00,  1.23it/s][A
[A

Starting epoch 23/30, LR = [0.001]
Step 250, Loss 0.017256498336791992



  0%|          | 0/12 [00:00<?, ?it/s][A
  8%|▊         | 1/12 [00:04<00:53,  4.89s/it][A
 17%|█▋        | 2/12 [00:05<00:34,  3.49s/it][A
 25%|██▌       | 3/12 [00:05<00:22,  2.50s/it][A
 33%|███▎      | 4/12 [00:05<00:14,  1.82s/it][A
 42%|████▏     | 5/12 [00:09<00:17,  2.46s/it][A
 50%|█████     | 6/12 [00:09<00:10,  1.78s/it][A
 58%|█████▊    | 7/12 [00:09<00:06,  1.30s/it][A
 67%|██████▋   | 8/12 [00:10<00:03,  1.04it/s][A
 75%|███████▌  | 9/12 [00:10<00:02,  1.07it/s][A
 92%|█████████▏| 11/12 [00:11<00:00,  1.46it/s][A
100%|██████████| 12/12 [00:11<00:00,  1.07it/s][A
  0%|          | 0/11 [00:00<?, ?it/s][A

accuracy 0.8508230452674898




  9%|▉         | 1/11 [00:04<00:43,  4.31s/it][A
 18%|█▊        | 2/11 [00:04<00:27,  3.08s/it][A
 27%|██▋       | 3/11 [00:04<00:17,  2.22s/it][A
 36%|███▋      | 4/11 [00:04<00:11,  1.62s/it][A
 45%|████▌     | 5/11 [00:07<00:12,  2.03s/it][A
 55%|█████▍    | 6/11 [00:08<00:07,  1.47s/it][A
 64%|██████▎   | 7/11 [00:08<00:04,  1.08s/it][A
 73%|███████▎  | 8/11 [00:08<00:02,  1.23it/s][A
 82%|████████▏ | 9/11 [00:10<00:02,  1.08s/it][A
 91%|█████████ | 10/11 [00:10<00:00,  1.27it/s][A
100%|██████████| 11/11 [00:10<00:00,  1.69it/s][A
[A

Starting epoch 24/30, LR = [0.001]
Step 260, Loss 0.015126504004001617



  0%|          | 0/12 [00:00<?, ?it/s][A
  8%|▊         | 1/12 [00:05<00:56,  5.17s/it][A
 17%|█▋        | 2/12 [00:05<00:36,  3.67s/it][A
 25%|██▌       | 3/12 [00:05<00:23,  2.63s/it][A
 33%|███▎      | 4/12 [00:05<00:15,  1.92s/it][A
 42%|████▏     | 5/12 [00:09<00:16,  2.37s/it][A
 50%|█████     | 6/12 [00:09<00:10,  1.71s/it][A
 58%|█████▊    | 7/12 [00:09<00:06,  1.25s/it][A
 67%|██████▋   | 8/12 [00:09<00:03,  1.08it/s][A
 75%|███████▌  | 9/12 [00:10<00:02,  1.06it/s][A
 92%|█████████▏| 11/12 [00:10<00:00,  1.46it/s][A
100%|██████████| 12/12 [00:11<00:00,  1.09it/s][A
  0%|          | 0/11 [00:00<?, ?it/s][A

Best accuracy 0.8521947873799726




  9%|▉         | 1/11 [00:04<00:40,  4.08s/it][A
 18%|█▊        | 2/11 [00:04<00:26,  2.92s/it][A
 27%|██▋       | 3/11 [00:04<00:16,  2.11s/it][A
 36%|███▋      | 4/11 [00:04<00:10,  1.55s/it][A
 45%|████▌     | 5/11 [00:07<00:11,  1.96s/it][A
 55%|█████▍    | 6/11 [00:07<00:07,  1.42s/it][A
 64%|██████▎   | 7/11 [00:08<00:04,  1.05s/it][A
 73%|███████▎  | 8/11 [00:08<00:02,  1.27it/s][A
 82%|████████▏ | 9/11 [00:10<00:02,  1.15s/it][A
 91%|█████████ | 10/11 [00:10<00:00,  1.17it/s][A
100%|██████████| 11/11 [00:10<00:00,  1.04it/s][A

Starting epoch 25/30, LR = [0.001]
Step 270, Loss 0.004493106156587601



  0%|          | 0/12 [00:00<?, ?it/s][A
  8%|▊         | 1/12 [00:04<00:53,  4.83s/it][A
 17%|█▋        | 2/12 [00:05<00:34,  3.44s/it][A
 25%|██▌       | 3/12 [00:05<00:22,  2.48s/it][A
 33%|███▎      | 4/12 [00:05<00:14,  1.80s/it][A
 42%|████▏     | 5/12 [00:09<00:16,  2.33s/it][A
 50%|█████     | 6/12 [00:09<00:10,  1.69s/it][A
 58%|█████▊    | 7/12 [00:09<00:06,  1.24s/it][A
 67%|██████▋   | 8/12 [00:09<00:03,  1.08it/s][A
 75%|███████▌  | 9/12 [00:10<00:02,  1.01it/s][A
 92%|█████████▏| 11/12 [00:10<00:00,  1.39it/s][A
100%|██████████| 12/12 [00:11<00:00,  1.08it/s][A
  0%|          | 0/11 [00:00<?, ?it/s][A

accuracy 0.8511659807956105




  9%|▉         | 1/11 [00:03<00:39,  3.97s/it][A
 18%|█▊        | 2/11 [00:04<00:25,  2.85s/it][A
 27%|██▋       | 3/11 [00:04<00:16,  2.06s/it][A
 36%|███▋      | 4/11 [00:04<00:10,  1.51s/it][A
 45%|████▌     | 5/11 [00:07<00:11,  1.96s/it][A
 55%|█████▍    | 6/11 [00:07<00:07,  1.43s/it][A
 64%|██████▎   | 7/11 [00:08<00:04,  1.05s/it][A
 73%|███████▎  | 8/11 [00:08<00:02,  1.26it/s][A
 82%|████████▏ | 9/11 [00:09<00:02,  1.08s/it][A
100%|██████████| 11/11 [00:10<00:00,  1.28it/s][A
[A

Starting epoch 26/30, LR = [0.001]
Step 280, Loss 0.009347908198833466



  0%|          | 0/12 [00:00<?, ?it/s][A
  8%|▊         | 1/12 [00:04<00:52,  4.82s/it][A
 17%|█▋        | 2/12 [00:05<00:34,  3.44s/it][A
 25%|██▌       | 3/12 [00:05<00:22,  2.47s/it][A
 33%|███▎      | 4/12 [00:05<00:14,  1.80s/it][A
 42%|████▏     | 5/12 [00:08<00:16,  2.29s/it][A
 50%|█████     | 6/12 [00:09<00:09,  1.65s/it][A
 58%|█████▊    | 7/12 [00:09<00:06,  1.21s/it][A
 67%|██████▋   | 8/12 [00:09<00:03,  1.11it/s][A
 75%|███████▌  | 9/12 [00:10<00:02,  1.05it/s][A
 83%|████████▎ | 10/12 [00:10<00:01,  1.44it/s][A
100%|██████████| 12/12 [00:10<00:00,  1.97it/s][A
[A
  0%|          | 0/11 [00:00<?, ?it/s][A

accuracy 0.850480109739369




  9%|▉         | 1/11 [00:04<00:40,  4.02s/it][A
 18%|█▊        | 2/11 [00:04<00:25,  2.88s/it][A
 27%|██▋       | 3/11 [00:04<00:16,  2.07s/it][A
 36%|███▋      | 4/11 [00:04<00:10,  1.52s/it][A
 45%|████▌     | 5/11 [00:07<00:11,  1.90s/it][A
 55%|█████▍    | 6/11 [00:07<00:06,  1.38s/it][A
 64%|██████▎   | 7/11 [00:07<00:04,  1.02s/it][A
 73%|███████▎  | 8/11 [00:07<00:02,  1.30it/s][A
 82%|████████▏ | 9/11 [00:09<00:02,  1.04s/it][A
100%|██████████| 11/11 [00:09<00:00,  1.33it/s][A
[A

Starting epoch 27/30, LR = [0.001]
Step 290, Loss 0.01466970145702362



  0%|          | 0/12 [00:00<?, ?it/s][A
  8%|▊         | 1/12 [00:04<00:50,  4.56s/it][A
 17%|█▋        | 2/12 [00:04<00:32,  3.26s/it][A
 25%|██▌       | 3/12 [00:05<00:21,  2.35s/it][A
 33%|███▎      | 4/12 [00:05<00:13,  1.71s/it][A
 42%|████▏     | 5/12 [00:08<00:16,  2.29s/it][A
 50%|█████     | 6/12 [00:09<00:09,  1.66s/it][A
 58%|█████▊    | 7/12 [00:09<00:06,  1.21s/it][A
 67%|██████▋   | 8/12 [00:09<00:03,  1.10it/s][A
 75%|███████▌  | 9/12 [00:10<00:02,  1.11it/s][A
 92%|█████████▏| 11/12 [00:10<00:00,  1.52it/s][A
100%|██████████| 12/12 [00:10<00:00,  1.13it/s][A
  0%|          | 0/11 [00:00<?, ?it/s][A

accuracy 0.8508230452674898




  9%|▉         | 1/11 [00:03<00:38,  3.85s/it][A
 18%|█▊        | 2/11 [00:04<00:24,  2.76s/it][A
 27%|██▋       | 3/11 [00:04<00:16,  2.01s/it][A
 36%|███▋      | 4/11 [00:04<00:10,  1.48s/it][A
 45%|████▌     | 5/11 [00:07<00:11,  1.84s/it][A
 55%|█████▍    | 6/11 [00:07<00:06,  1.34s/it][A
 64%|██████▎   | 7/11 [00:07<00:03,  1.02it/s][A
 73%|███████▎  | 8/11 [00:07<00:02,  1.36it/s][A
 82%|████████▏ | 9/11 [00:09<00:02,  1.11s/it][A
100%|██████████| 11/11 [00:09<00:00,  1.25it/s][A
[A

Starting epoch 28/30, LR = [0.001]
Step 300, Loss 0.016494762152433395



  0%|          | 0/12 [00:00<?, ?it/s][A
  8%|▊         | 1/12 [00:04<00:51,  4.67s/it][A
 17%|█▋        | 2/12 [00:04<00:33,  3.32s/it][A
 25%|██▌       | 3/12 [00:05<00:21,  2.39s/it][A
 33%|███▎      | 4/12 [00:05<00:13,  1.74s/it][A
 42%|████▏     | 5/12 [00:09<00:16,  2.38s/it][A
 50%|█████     | 6/12 [00:09<00:10,  1.72s/it][A
 58%|█████▊    | 7/12 [00:09<00:06,  1.26s/it][A
 67%|██████▋   | 8/12 [00:09<00:03,  1.09it/s][A
 75%|███████▌  | 9/12 [00:10<00:02,  1.13it/s][A
 92%|█████████▏| 11/12 [00:10<00:00,  1.55it/s][A
100%|██████████| 12/12 [00:10<00:00,  1.11it/s][A
  0%|          | 0/11 [00:00<?, ?it/s][A

accuracy 0.8515089163237312




  9%|▉         | 1/11 [00:03<00:37,  3.78s/it][A
 18%|█▊        | 2/11 [00:04<00:24,  2.74s/it][A
 27%|██▋       | 3/11 [00:04<00:15,  1.99s/it][A
 36%|███▋      | 4/11 [00:04<00:10,  1.47s/it][A
 45%|████▌     | 5/11 [00:07<00:11,  1.84s/it][A
 55%|█████▍    | 6/11 [00:07<00:06,  1.38s/it][A
 64%|██████▎   | 7/11 [00:07<00:04,  1.02s/it][A
 73%|███████▎  | 8/11 [00:07<00:02,  1.31it/s][A
 82%|████████▏ | 9/11 [00:09<00:02,  1.13s/it][A
100%|██████████| 11/11 [00:10<00:00,  1.22it/s][A
[A

Starting epoch 29/30, LR = [0.001]
Step 310, Loss 0.00493708997964859



  0%|          | 0/12 [00:00<?, ?it/s][A
  8%|▊         | 1/12 [00:04<00:49,  4.54s/it][A
 17%|█▋        | 2/12 [00:04<00:32,  3.25s/it][A
 25%|██▌       | 3/12 [00:05<00:21,  2.34s/it][A
 33%|███▎      | 4/12 [00:05<00:13,  1.71s/it][A
 42%|████▏     | 5/12 [00:08<00:15,  2.25s/it][A
 50%|█████     | 6/12 [00:08<00:09,  1.63s/it][A
 58%|█████▊    | 7/12 [00:09<00:05,  1.19s/it][A
 67%|██████▋   | 8/12 [00:09<00:03,  1.13it/s][A
 75%|███████▌  | 9/12 [00:10<00:02,  1.06it/s][A
 92%|█████████▏| 11/12 [00:10<00:00,  1.46it/s][A
100%|██████████| 12/12 [00:10<00:00,  1.13it/s][A
  0%|          | 0/11 [00:00<?, ?it/s][A

accuracy 0.8511659807956105




  9%|▉         | 1/11 [00:03<00:39,  3.91s/it][A
 18%|█▊        | 2/11 [00:04<00:25,  2.80s/it][A
 27%|██▋       | 3/11 [00:04<00:16,  2.02s/it][A
 36%|███▋      | 4/11 [00:04<00:10,  1.48s/it][A
 45%|████▌     | 5/11 [00:07<00:11,  1.88s/it][A
 55%|█████▍    | 6/11 [00:07<00:06,  1.37s/it][A
 64%|██████▎   | 7/11 [00:07<00:04,  1.01s/it][A
 73%|███████▎  | 8/11 [00:07<00:02,  1.31it/s][A
 82%|████████▏ | 9/11 [00:09<00:02,  1.13s/it][A
 91%|█████████ | 10/11 [00:10<00:00,  1.20it/s][A
100%|██████████| 11/11 [00:10<00:00,  1.08it/s][A

Starting epoch 30/30, LR = [0.001]
Step 320, Loss 0.011034000664949417



  0%|          | 0/12 [00:00<?, ?it/s][A
  8%|▊         | 1/12 [00:04<00:48,  4.40s/it][A
 17%|█▋        | 2/12 [00:04<00:31,  3.14s/it][A
 25%|██▌       | 3/12 [00:04<00:20,  2.26s/it][A
 33%|███▎      | 4/12 [00:05<00:13,  1.64s/it][A
 42%|████▏     | 5/12 [00:08<00:15,  2.23s/it][A
 50%|█████     | 6/12 [00:08<00:09,  1.60s/it][A
 58%|█████▊    | 7/12 [00:08<00:05,  1.17s/it][A
 67%|██████▋   | 8/12 [00:09<00:03,  1.14it/s][A
 75%|███████▌  | 9/12 [00:10<00:03,  1.00s/it][A
 92%|█████████▏| 11/12 [00:10<00:00,  1.38it/s][A
100%|██████████| 12/12 [00:10<00:00,  1.12it/s][A
  0%|          | 0/11 [00:00<?, ?it/s][A

accuracy 0.8515089163237312




  9%|▉         | 1/11 [00:03<00:38,  3.86s/it][A
 18%|█▊        | 2/11 [00:04<00:24,  2.77s/it][A
 27%|██▋       | 3/11 [00:04<00:16,  2.01s/it][A
 36%|███▋      | 4/11 [00:04<00:10,  1.48s/it][A
 45%|████▌     | 5/11 [00:07<00:11,  1.85s/it][A
 55%|█████▍    | 6/11 [00:07<00:06,  1.35s/it][A
 64%|██████▎   | 7/11 [00:07<00:04,  1.00s/it][A
 73%|███████▎  | 8/11 [00:07<00:02,  1.32it/s][A
 82%|████████▏ | 9/11 [00:09<00:02,  1.04s/it][A
 91%|█████████ | 10/11 [00:09<00:00,  1.31it/s][A
100%|██████████| 11/11 [00:09<00:00,  1.12it/s][A

Loss = [1.7154978513717651, 0.5789200067520142, 0.1964610368013382, 0.13044054806232452, 0.06928829848766327, 0.0631697028875351, 0.03482929244637489, 0.08127230405807495, 0.016721095889806747, 0.03681374713778496, 0.027139194309711456, 0.02133290469646454, 0.010023381561040878, 0.015165083110332489, 0.030835475772619247, 0.011760149151086807, 0.03178276866674423, 0.010552074760198593, 0.006648547947406769, 0.02298865094780922, 0.037313226610422134, 0.008815854787826538, 0.005598347634077072, 0.0069585032761096954, 0.012674562633037567, 0.01183924451470375, 0.00593055784702301, 0.00665036216378212, 0.014200147241353989, 0.008813586086034775]
Validation_accuracy = [0.6532921810699589, 0.8103566529492455, 0.8261316872427984, 0.8401920438957476, 0.8453360768175583, 0.8446502057613169, 0.845679012345679, 0.840877914951989, 0.8412208504801097, 0.848079561042524, 0.8477366255144033, 0.8463648834019204, 0.8463648834019204, 0.8494513031550068, 0.8477366255144033, 0.8460219478737997, 0.84876543

**Test**

In [52]:
net = bestNet
net = net.to(DEVICE) # this will bring the network to GPU if DEVICE is cuda
net.train(False) # Set Network to evaluation mode

running_corrects = 0
for images, labels in tqdm(test_dataloader):
  images = images.to(DEVICE)
  labels = labels.to(DEVICE)

  # Forward Pass
  outputs = net(images)

  # Get predictions
  _, preds = torch.max(outputs.data, 1)

  # Update Corrects
  running_corrects += torch.sum(preds == labels.data).data.item()

# Calculate Accuracy
accuracy = running_corrects / float(len(test_dataset))

print('Test Accuracy: {}'.format(accuracy))


  0%|          | 0/12 [00:00<?, ?it/s][A
  8%|▊         | 1/12 [00:04<00:48,  4.40s/it][A
 17%|█▋        | 2/12 [00:04<00:31,  3.15s/it][A
 25%|██▌       | 3/12 [00:04<00:20,  2.27s/it][A
 33%|███▎      | 4/12 [00:05<00:13,  1.67s/it][A
 42%|████▏     | 5/12 [00:08<00:15,  2.16s/it][A
 50%|█████     | 6/12 [00:08<00:09,  1.56s/it][A
 58%|█████▊    | 7/12 [00:08<00:05,  1.15s/it][A
 67%|██████▋   | 8/12 [00:08<00:03,  1.16it/s][A
 75%|███████▌  | 9/12 [00:10<00:02,  1.08it/s][A
 92%|█████████▏| 11/12 [00:10<00:00,  1.49it/s][A
100%|██████████| 12/12 [00:10<00:00,  1.16it/s][A

Test Accuracy: 0.8572416176978914
