<a href="https://colab.research.google.com/github/freguti/Homework-ML/blob/Homework_2/Assignment2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>


**Install requirements**

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [2]:
!pip3 install 'torch==1.3.1'
!pip3 install 'torchvision==0.4.2'
!pip3 install 'Pillow-SIMD'
!pip3 install 'tqdm'
!pip3 install --upgrade 'pillow'

Requirement already up-to-date: pillow in /usr/local/lib/python3.6/dist-packages (6.2.1)


**Import libraries**

In [0]:
import os
import logging
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Subset, DataLoader
from torch.backends import cudnn
import numpy as np
import torchvision
from torchvision import transforms
from torchvision.models import alexnet

from PIL import Image
from tqdm import tqdm

**Set Arguments**

In [0]:
DEVICE = 'cuda' # 'cuda' or 'cpu'

NUM_CLASSES = 101 # 101 + 1: There is am extra Background class that should be removed 

BATCH_SIZE = 256     # Higher batch sizes allows for larger learning rates. An empirical heuristic suggests that, when changing
                     # the batch size, learning rate should change by the same factor to have comparable results

LR = 0.05            # The initial Learning Rate
MOMENTUM = 0.9       # Hyperparameter for SGD, keep this at 0.9 when using SGD
WEIGHT_DECAY = 5e-5  # Regularization, you can keep this at the default

NUM_EPOCHS = 50      # Total number of training epochs (iterations over dataset)
STEP_SIZE = 20       # How many epochs before decreasing learning rate (if using a step-down policy)
GAMMA = 0.1          # Multiplicative factor for learning rate step-down

LOG_FREQUENCY = 10

**Define Data Preprocessing**

In [0]:
# Define transforms for training phase
train_transform = transforms.Compose([transforms.Resize(256),      # Resizes short size of the PIL image to 256
                                      transforms.CenterCrop(224),  # Crops a central square patch of the image
                                                                   # 224 because torchvision's AlexNet needs a 224x224 input!
                                                                   # Remember this when applying different transformations, otherwise you get an error
                                      transforms.ToTensor(), # Turn PIL Image to torch.Tensor
                                      transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)) # Normalizes tensor with mean and standard deviation
])
# Define transforms for the evaluation phase
eval_transform = transforms.Compose([transforms.Resize(256),
                                      transforms.CenterCrop(224),
                                      transforms.ToTensor(),
                                      transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))                                    
])

In [0]:

from torchvision.datasets import VisionDataset

from PIL import Image

import os
import os.path
import sys

TRAIN_PATH = 'Homework2-Caltech101/train.txt'
TEST_PATH = 'Homework2-Caltech101/test.txt'
def pil_loader(path):
    # open path as file to avoid ResourceWarning (https://github.com/python-pillow/Pillow/issues/835)
    with open(path, 'rb') as f:
        img = Image.open(f)
        return img.convert('RGB')


class Caltech(VisionDataset):
    def __init__(self, root, split='train', transform=None, target_transform=None):
        super(Caltech, self).__init__(root, transform=transform, target_transform=target_transform)
        self.split = split # This defines the split you are going to use
                           # (split files are called 'train.txt' and 'test.txt')
        self.classes,self.cl_idx = self.read_classes()
        self.dataset = self.read()
        return

    def read_classes(self):
      classes = []
      f = open(TEST_PATH)
      for l in f.readlines():
        if(l.strip() != '\n' and l.strip() != '' and l.split('/')[0] != 'BACKGROUND_Google'):
          if(not l.split('/')[0] in classes):
            classes.append(l.split('/')[0])
      cl_idx = {classes[i]: i for i in range(len(classes))}
      return classes,cl_idx


    def read(self):
      fine_dataset = []
      #index_class= []
      fine_dataset.clear()
      if(self.split == 'train'):
        f = open(TRAIN_PATH)
      elif(self.split == 'test'):
        f = open(TEST_PATH)
      else:
        return fine_dataset
      for l in f.readlines():
        if(l.strip() != '\n' and l.strip() != ''):
          index_class = l.split('/')[0]
          if(index_class in self.cl_idx and index_class is not None):
            fine_dataset.append((pil_loader(DATA_DIR + '/'+l.split("\n")[0]),self.cl_idx[index_class])) 
      print('len dataset{}'.format(len(fine_dataset)))
      return fine_dataset#,index_class
    def __getitem__(self, index):
        '''
        __getitem__ should access an element through its index
        Args:
            index (int): Index

        Returns:
            tuple: (sample, target) where target is class_index of the target class.
        '''
        image,label = self.dataset[index]

        # Applies preprocessing when accessing the image
        if self.transform is not None:
            image = self.transform(image)
        return image, label

    def __len__(self):
        '''
        The __len__ method returns the length of the dataset
        It is mandatory, as this is used by several other components
        '''
        length = len(self.dataset) # Provide a way to get the length (number of elements) of the dataset
        return length


**Prepare Dataset**

In [7]:
# Clone github repository with data
if not os.path.isdir('./Homework2-Caltech101'):
  !git clone https://github.com/MachineLearning2020/Homework2-Caltech101.git

DATA_DIR = 'Homework2-Caltech101/101_ObjectCategories'

train_dataset = Caltech(DATA_DIR,'train',train_transform);

test_dataset = Caltech(DATA_DIR,'test',train_transform);

print('Train Dataset: {}'.format(len(train_dataset)))
print('Test Dataset: {}'.format(len(test_dataset)))


Cloning into 'Homework2-Caltech101'...
remote: Enumerating objects: 9256, done.[K
remote: Total 9256 (delta 0), reused 0 (delta 0), pack-reused 9256[K
Receiving objects: 100% (9256/9256), 129.48 MiB | 43.90 MiB/s, done.
Resolving deltas: 100% (4/4), done.
Checking out files: 100% (9149/9149), done.
len dataset5784
len dataset2893
Train Dataset: 5784
Test Dataset: 2893


In [8]:
train = [idx for idx in range(len(train_dataset)) if idx % 2==0]
val=[i+1 for i in train]

train_dataset=Subset(train_dataset,val)
val_dataset=Subset(train_dataset,train)
print('Training split: {}'.format(len(train_dataset)))
print('Validation split: {}'.format(len(val_dataset)))


Training split: 2892
Validation split: 2892


**Prepare Dataloaders**

In [0]:
train_dataloader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=4, drop_last=True)
test_dataloader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=4)
validation_dataloader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=4)

**Prepare Network**

In [0]:
net = alexnet() # Loading AlexNet model

# AlexNet has 1000 output neurons, corresponding to the 1000 ImageNet's classes
# We need 101 outputs for Caltech-101
net.classifier[6] = nn.Linear(4096, NUM_CLASSES) # nn.Linear in pytorch is a fully connected layer
                                                 # The convolutional layer is nn.Conv2d

# We just changed the last layer of AlexNet with a new fully connected layer with 101 outputs
# It is mandatory to study torchvision.models.alexnet source code

**Prepare Training**

In [0]:
# Define loss function
criterion = nn.CrossEntropyLoss() # for classification, we use Cross Entropy

# Choose parameters to optimize
# To access a different set of parameters, you have to access submodules of AlexNet
# (nn.Module objects, like AlexNet, implement the Composite Pattern)
# e.g.: parameters of the fully connected layers: net.classifier.parameters()
# e.g.: parameters of the convolutional layers: look at alexnet's source code ;) 
parameters_to_optimize = net.parameters() # In this case we optimize over all the parameters of AlexNet

# Define optimizer
# An optimizer updates the weights based on loss
# We use SGD with momentum
optimizer = optim.SGD(parameters_to_optimize, lr=LR, momentum=MOMENTUM, weight_decay=WEIGHT_DECAY)

# Define scheduler
# A scheduler dynamically changes learning rate
# The most common schedule is the step(-down), which multiplies learning rate by gamma every STEP_SIZE epochs
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=STEP_SIZE, gamma=GAMMA)

**UTILITIES**

In [0]:
import copy
def evaluate(model,test_dataset,test_dataloder):
  model = model.to(DEVICE) # this will bring the network to GPU if DEVICE is cuda
  cudnn.benchmark # Calling this optimizes runtime
  model.train(False) # Set Network to evaluation mode
  accuracy=0
  running_corrects = 0
  for images, labels in test_dataloader:
    images = images.to(DEVICE)
    labels = labels.to(DEVICE)
    # Forward Pass
    outputs = model(images)
    # Get predictions
    _, preds = torch.max(outputs.data, 1)
    # Update Corrects
    running_corrects += torch.sum(preds == labels.data).data.item()
  # Calculate Accuracy
  accuracy = running_corrects / float(len(test_dataset))
  print('\t[Validation Accuracy]: {}'.format(accuracy))

  
  return accuracy

def train_and_validate(n_epoch,model,train_dataloader,val_dataset,val_dataloader,optimizer,criterion,scheduler):
  '''
  train the model and validate it on each epoch, 
  return the model with best score on validation set
  '''
  model = model.to(DEVICE) # this will bring the network to GPU if DEVICE is cuda
  current_step = 0
  best_model={'accuracy':0}
  accuracies=[]
  loss_=[]
  # Start iterating over the epochs
  
  for epoch in range(n_epoch):
    medium_loss=0
    count_step=0
    print('Starting epoch {}/{}, LR = {}'.format(epoch+1, n_epoch, scheduler.get_lr()))
    # Iterate over the dataset
    for images, labels in train_dataloader:
      # Bring data over the device of choice
      images = images.to(DEVICE)
      labels = labels.to(DEVICE)
      model.train() # Sets module in training mode
      # PyTorch, by default, accumulates gradients after each backward pass
      # We need to manually set the gradients to zero before starting a new iteration
      optimizer.zero_grad() # Zero-ing the gradients
      # Forward pass to the network
      outputs = model(images)
      # Compute loss based on output and ground truth
      loss = criterion(outputs, labels)

      medium_loss+=loss.item()
      count_step+=1
      # Compute gradients for each layer and update weights
      loss.backward()  # backward pass: computes gradients
      optimizer.step() # update weights based on accumulated gradients
      current_step += 1
    medium_loss=medium_loss/count_step
    loss_.append(medium_loss)
    print('--------------------------------------------') # evaluate epoch's model on val set and evaluate the medium loss
    print('\t[Medium loss of epoch {}]: {}'.format(epoch+1,medium_loss))
    accuracy=evaluate(model,val_dataset,val_dataloader)
    accuracies.append(accuracy)
    if(accuracy > best_model['accuracy']):
      print('\t\t\t\t[BEST MODEL found!]')
      best_model['accuracy']=copy.deepcopy(accuracy)
      best_model['model']=copy.deepcopy(model)
    print('--------------------------------------------')
    scheduler.step()
  best_model['accuracies']=accuracies

  return best_model

**TRAIN**

In [0]:
best_model=train_and_validate(50,net,train_dataloader,val_dataset,validation_dataloader,optimizer,criterion,scheduler)

**Test**

In [0]:
net = best_model['model']
net = net.to(DEVICE) # this will bring the network to GPU if DEVICE is cuda
net.train(False) # Set Network to evaluation mode

accuracy=evaluate(net,test_dataset,test_dataloader)

**TRANSFER LEARNING**


In [0]:
#change the normalization, ImageNet mean and standard deviation
normalize = transforms.Normalize(mean= [0.485, 0.456, 0.406], std = [0.229, 0.224, 0.225])
LRs = [0.01,0.001,0.0001]
EPOCHs = [50,70,20]
STEP_SIZEs = [20,30,5]
GAMMAs = [0.1,0.05,0.2]
BATCH_SIZEs = [256,256,26]
for lr,epoch,step,gamma,index_for in zip(LRs,EPOCHs,STEP_SIZEs,GAMMAs,[0,1,2]):
  trained_net = alexnet(pretrained=True)
  train_transform = transforms.Compose([transforms.Resize(256),      # Resizes short size of the PIL image to 256
                                        transforms.CenterCrop(224),  # Crops a central square patch of the image
                                                                    # 224 because torchvision's AlexNet needs a 224x224 input!
                                                                    # Remember this when applying different transformations, otherwise you get an error
                                        transforms.ToTensor(), # Turn PIL Image to torch.Tensor
                                        normalize])
  # Define transforms for the evaluation phase
  eval_transform = transforms.Compose([transforms.Resize(256),
                                        transforms.CenterCrop(224),
                                        transforms.ToTensor(),
                                        normalize])   
  train_dataset = Caltech(DATA_DIR,'train',train_transform);

  test_dataset = Caltech(DATA_DIR,'test',eval_transform);

  train = [idx for idx in range(len(train_dataset)) if idx % 2==0]
  val=[i+1 for i in train]

  train_dataset=Subset(train_dataset,val)
  val_dataset=Subset(train_dataset,train)
  print('Training split: {}'.format(len(train_dataset)))
  print('Validation split: {}'.format(len(val_dataset)))

  trained_net.classifier[6] = nn.Linear(4096, NUM_CLASSES) # nn.Linear in pytorch is a fully connected layer
                                                  # The convolutional layer is nn.Conv2d

  t_criterion = nn.CrossEntropyLoss()


  train_dataloader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=4, drop_last=True)
  test_dataloader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=4)
  validation_dataloader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=4)

  parameters_to_optimize = trained_net.parameters()
  optimizer = optim.SGD(parameters_to_optimize, lr=lr, momentum=MOMENTUM, weight_decay=WEIGHT_DECAY)
  scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=step, gamma=gamma)
  best_model[index_for]=train_and_validate(epoch,trained_net,train_dataloader,val_dataset,validation_dataloader,optimizer,criterion,scheduler)
  
  print("LR = {}, Number of epochs = {}, Step size = {}, Gamma = {}\n Best accuracy = {}".format(lr,epoch,step,gamma,max(best_model[index_for]["accuracies"])))

'''
 LR = 0.01, Number of epochs = 50, Step size = 20, Gamma = 0.1
 Best accuracy = 0.8509681881051175

 LR = 0.001, Number of epochs = 70, Step size = 30, Gamma = 0.05
 Best accuracy = 0.83298755186722

 LR = 0.0001, Number of epochs = 20, Step size = 5, Gamma = 0.2
 Best accuracy = 0.3558091286307054
 '''

In [0]:
max_acc = 0
for index_for in [0,1,2]:
  print("LR = {}, Number of epochs = {}, Step size = {}, Gamma = {}\nBest accuracy = {}\n".format(LRs[index_for],EPOCHs[index_for],STEP_SIZEs[index_for],GAMMAs[index_for],max(best_model[index_for]["accuracies"])))
  accuracy=evaluate(best_model[index_for]["model"],test_dataset,test_dataloader)
  if(accuracy > max_acc):
    max_acc = accuracy
    max_lr = LRs[index_for]
    max_epoch = EPOCHs[index_for]
    max_step = STEP_SIZEs[index_for]
    max_gamma = GAMMAs[index_for]
# best params: LR = 0.005, Number of epochs = 50, Step size = 20, Gamma = 0.1
# Best accuracy = 0.8509681881051175
# Test accuracy = 0.8506740407881093


In [0]:
max_lr = 0.01
max_epoch = 50
max_step = 20
max_gamma = 0.1

In [14]:
#fully connected layer net.classifier.parameters()
#convolutional layer net.features.parameters()
normalize = transforms.Normalize(mean= [0.485, 0.456, 0.406], std = [0.229, 0.224, 0.225])
fully_net = alexnet(pretrained=True)
conv_net = alexnet(pretrained=True)
train_transform = transforms.Compose([transforms.Resize(256),      # Resizes short size of the PIL image to 256
                                      transforms.CenterCrop(224),  # Crops a central square patch of the image
                                                                  # 224 because torchvision's AlexNet needs a 224x224 input!
                                                                  # Remember this when applying different transformations, otherwise you get an error
                                      transforms.ToTensor(), # Turn PIL Image to torch.Tensor
                                      normalize])
# Define transforms for the evaluation phase
eval_transform = transforms.Compose([transforms.Resize(256),
                                      transforms.CenterCrop(224),
                                      transforms.ToTensor(),
                                      normalize])   
train_dataset = Caltech(DATA_DIR,'train',train_transform);

test_dataset = Caltech(DATA_DIR,'test',eval_transform);

train = [idx for idx in range(len(train_dataset)) if idx % 2==0]
val=[i+1 for i in train]

train_dataset=Subset(train_dataset,val)
val_dataset=Subset(train_dataset,train)
print('Training split: {}'.format(len(train_dataset)))
print('Validation split: {}'.format(len(val_dataset)))

fully_net.classifier[6] = nn.Linear(4096, NUM_CLASSES) 
conv_net.classifier[6] = nn.Linear(4096, NUM_CLASSES) 
t_criterion = nn.CrossEntropyLoss()


train_dataloader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=4, drop_last=True)
test_dataloader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=4)
validation_dataloader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=4)
parameters_to_optimize = fully_net.classifier.parameters()
parameters_to_optimize_conv = conv_net.features.parameters()

for param,i,net in zip([parameters_to_optimize_conv,parameters_to_optimize],["convolutional layer","fully connected layer"],[conv_net,fully_net]):
  optimizer = optim.SGD(param, lr=max_lr, momentum=MOMENTUM, weight_decay=WEIGHT_DECAY)
  scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=max_step, gamma=max_gamma)
  best_model=train_and_validate(max_epoch,net,train_dataloader,val_dataset,validation_dataloader,optimizer,criterion,scheduler)
  print("type = {} ,LR = {}, Number of epochs = {}, Step size = {}, Gamma = {}\n Best accuracy = {}\n".format(i,max_lr,max_epoch,max_step,max_gamma,max(best_model["accuracies"])))
  accuracy = evaluate(best_model["model"],test_dataset,test_dataloader)
  print("test accuracy = {}\n".format(accuracy))
# CONVOLUTIONAL: Best accuracy = 0.5615491009681881 test = 0.5613549948150709

# FULLY CONNECTED: Best accuracy = 0.8592669432918395 test = 0.8589699274109921


len dataset5784
len dataset2893
Training split: 2892
Validation split: 2892
Starting epoch 1/50, LR = [0.01]
--------------------------------------------
	[Medium loss of epoch 1]: 4.576128699562767
	[Validation Accuracy]: 0.165283540802213
				[BEST MODEL found!]
--------------------------------------------
Starting epoch 2/50, LR = [0.01]
--------------------------------------------
	[Medium loss of epoch 2]: 3.9858145713806152
	[Validation Accuracy]: 0.23686030428769017
				[BEST MODEL found!]
--------------------------------------------
Starting epoch 3/50, LR = [0.01]
--------------------------------------------
	[Medium loss of epoch 3]: 3.5384666702964087
	[Validation Accuracy]: 0.28734439834024894
				[BEST MODEL found!]
--------------------------------------------
Starting epoch 4/50, LR = [0.01]
--------------------------------------------
	[Medium loss of epoch 4]: 3.35447604005987
	[Validation Accuracy]: 0.3112033195020747
				[BEST MODEL found!]
---------------------------

In [21]:
normalize = transforms.Normalize(mean= [0.485, 0.456, 0.406], std = [0.229, 0.224, 0.225])
train_tr1 = transforms.Compose([transforms.Resize(256),
                                      transforms.CenterCrop(224), 
                                      transforms.RandomHorizontalFlip(1),  
                                      transforms.ToTensor(),
                                      normalize ])

test_tr1 = transforms.Compose([transforms.Resize(256),
                                      transforms.CenterCrop(224),
                                      transforms.RandomHorizontalFlip(1),
                                      transforms.ToTensor(),
                                      normalize])  

train_tr2 = transforms.Compose([transforms.Resize(256),
                                      transforms.CenterCrop(224), 
                                      transforms.RandomVerticalFlip(1),  
                                      transforms.ToTensor(),
                                      normalize ])

test_tr2 = transforms.Compose([transforms.Resize(256),
                                      transforms.CenterCrop(224),
                                      transforms.RandomVerticalFlip(1),
                                      transforms.ToTensor(),
                                      normalize])   

train_tr3 = transforms.Compose([transforms.Resize(256),
                                      transforms.CenterCrop(224), 
                                      transforms.RandomPerspective(0.5,1,3),  
                                      transforms.ToTensor(),
                                      normalize ])

test_tr3 = transforms.Compose([transforms.Resize(256),
                                      transforms.CenterCrop(224),
                                      transforms.RandomPerspective(0.5,1,3),
                                      transforms.ToTensor(),
                                      normalize])    

train_dataset = Caltech(DATA_DIR,'train',train_transform);
test_dataset = Caltech(DATA_DIR,'test',eval_transform);
train_dataset += Caltech(DATA_DIR,'train',train_tr1);
test_dataset += Caltech(DATA_DIR,'test',test_tr1);
train_dataset += Caltech(DATA_DIR,'train',train_tr2);
test_dataset += Caltech(DATA_DIR,'test',test_tr2);
train_dataset += Caltech(DATA_DIR,'train',train_tr3);
test_dataset += Caltech(DATA_DIR,'test',test_tr3);
print(len(test_dataset))
print(len(train_dataset))




len dataset5784
len dataset2893
len dataset5784
len dataset2893
len dataset5784
len dataset2893
len dataset5784
len dataset2893
11572
23136


In [22]:
train = [idx for idx in range(len(train_dataset)) if idx % 2==0]
val=[i+1 for i in train]

train_dataset=Subset(train_dataset,val)
val_dataset=Subset(train_dataset,train)
print('Training split: {}'.format(len(train_dataset)))
print('Validation split: {}'.format(len(val_dataset)))

train_dataloader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=4, drop_last=True)
test_dataloader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=4)
validation_dataloader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=4)

net = alexnet(pretrained=True)

net.classifier[6] = nn.Linear(4096, NUM_CLASSES) 
t_criterion = nn.CrossEntropyLoss()

parameters_to_optimize = net.parameters()

optimizer = optim.SGD(parameters_to_optimize, lr=max_lr, momentum=MOMENTUM, weight_decay=WEIGHT_DECAY)
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=max_step, gamma=max_gamma)
best_model=train_and_validate(max_epoch,net,train_dataloader,val_dataset,validation_dataloader,optimizer,criterion,scheduler)



# loss: 0.0034390117559168075 train:  0.8150933609958506 test: 0.814898029726927

Training split: 11568
Validation split: 11568
Starting epoch 1/50, LR = [0.01]
--------------------------------------------
	[Medium loss of epoch 1]: 1.6204469468858507
	[Validation Accuracy]: 0.7519017980636238
				[BEST MODEL found!]
--------------------------------------------
Starting epoch 2/50, LR = [0.01]
--------------------------------------------
	[Medium loss of epoch 2]: 0.3730251845386293
	[Validation Accuracy]: 0.7636583679114799
				[BEST MODEL found!]
--------------------------------------------
Starting epoch 3/50, LR = [0.01]
--------------------------------------------
	[Medium loss of epoch 3]: 0.20565384891298083
	[Validation Accuracy]: 0.7815525587828492
				[BEST MODEL found!]
--------------------------------------------
Starting epoch 4/50, LR = [0.01]
--------------------------------------------
	[Medium loss of epoch 4]: 0.11086799982521268
	[Validation Accuracy]: 0.7799965421853389
--------------------------------------------
Starting epoch 5/50, LR = [0.01]

NameError: ignored

In [23]:
accuracy = evaluate(best_model["model"],test_dataset,test_dataloader)
print("test accuracy = {}\n".format(accuracy))

	[Validation Accuracy]: 0.814898029726927
test accuracy = 0.814898029726927

