In [154]:
# from zipfile import ZipFile
# filename = "cifar10.zip"

# with ZipFile(filename, 'r') as zip:
#   zip.extractall()
#   print('Done')

#Packages:
import torch
import torchvision
import numpy as np
from os import path as ospath
from skimage import io
import skimage.transform
import torch.nn as nn
import os
import pandas as pd
from tabulate import tabulate
from tqdm import tqdm_notebook

Done


Part A: Building a Custom Data Loader 

In [136]:
class CifarDataset(torch.utils.data.Dataset):
  def __init__(self, root_dir, mobile_net = False):
    """Initializes a dataset containing images and labels."""
    super().__init__()
    self.img_list = []
    self.label_dict = {
        "airplane": 0,
        "automobile": 1,
        "bird": 2,
        "cat": 3,
        "deer": 4,
        "dog": 5,
        "frog": 6,
        "horse": 7,
        "ship": 8,
        "truck": 9
    }
    self.root_dir = root_dir
    self.mobile_net = mobile_net
    #pre-load images (data)
    # for k,v in self.label_dict.items():
    #   for file in os.listdir(os.path.join(self.root_dir, k)):
    #     self.img_list.append([os.path.join(self.root_dir, k, file), self.label_dict[k]])
    # #   file = os.listdir(os.path.join(self.root_dir, k))
    # # print(sorted(self.label_dict))
    for folder in os.listdir(path=root_dir):
      for file in os.listdir(path=root_dir+'/'+folder):
        # print(root_dir+'/'+folder+'/'+file)
        if file.endswith('.png'):
          image = io.imread(root_dir+'/'+folder+'/'+file)
          self.img_list.append((image, self.label_dict[folder]))
        # image = io.imread(root_dir+'/'+folder+'/'+file)
        # self.img_list.append((image, folder)) 
    #     # raise NotImplementedError
    self.img_list = np.array(self.img_list)
  def __len__(self):
    """Returns the size of the dataset."""
    return len(self.img_list)

  def __getitem__(self, index):
    """Returns the index-th data item of the dataset."""

    if self.mobile_net is True:
      return (skimage.transform.resize(self.img_list[index][0], (224, 224)),self.img_list[index][1])
    else:
      return (self.img_list[index][0],self.img_list[index][1])

Part B: Training a neural network in PyTorch (25 points)

In [192]:
#3-layer MLP model
class MultilayerPerceptron(nn.Module):
  def __init__(self, input_size, hidden_size, num_classes, dropout = 0.2):
    super().__init__()
    self.fc1 = nn.Linear(input_size, hidden_size)
    self.tanh = nn.Tanh()
    self.dropout = nn.Dropout(p = dropout)
    self.fc2 = nn.Linear(hidden_size, num_classes)
    self.input_size = input_size
  def forward(self, x):
    out = self.fc1(x.reshape(-1, self.input_size))
    out = self.tanh(out)
    out = self.fc2(out)
    return out
  
def training(training_path, device, model, criterion, optimizer, num_epochs, BATCH_SIZE, mobile_net = False):
  train_dataset = CifarDataset(TRAIN_DIRECTORY_PATH)
  train_dataloader = torch.utils.data.DataLoader(train_dataset,
                                                batch_size=BATCH_SIZE,
                                                shuffle=True)
  min_loss = 10000000.0
  total_epochs = tqdm_notebook(range(num_epochs))

  model.train() #set the model into training mode
  
  for epch in total_epochs:
    for i, (images, labels) in enumerate(train_dataloader):
      #Move tensors to the configured device
      images = images.to(device)
      if mobile_net is True:
        images = images.permute(0, 3, 1, 2)
      labels = labels.to(device)

      #forward pass
      outputs = model(images.float())
      labels = labels.type(torch.LongTensor)
      loss = criterion(outputs, labels)

      #backward and optimize
      loss.backward()
      optimizer.step()
      optimizer.zero_grad()

      if loss.item() <= min_loss:
        min_loss = loss.item()
      if (i+1) % 10 == 0:
        total_epochs.set_description(
            'Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}'.format(epch + 1, NUM_EPOCHS, i + 1, len(train_dataloader), loss.item())
        )
  print('Min Loss was: {} % '.format(min_loss))
def evaluation(test_path, device, model, criterion, BATCH_SIZE, mobile_net = False):
  test_dataset = CifarDataset(TEST_DIRECTORY_PATH)
  test_dataloader = torch.utils.data.DataLoader(test_dataset,
                                                batch_size=BATCH_SIZE,
                                                shuffle=True)
  model.eval()
  with torch.no_grad():
    correct = 0
    total = 0
    for images, labels in tqdm_notebook(test_dataloader):
      images = images.to(device)
      if mobile_net is True:
        images = images.permute(0, 3, 1, 2)
      labels = labels.to(device)
      outputs = model(images.float())
      _, predicted = torch.max(outputs.data, 1)
      total += labels.size(0)
      correct += (predicted == labels).sum().item()

  print('Accuracy of the network on the test images: {} %'.format(100 * correct/total))
#parameters
TRAIN_DIRECTORY_PATH = 'cifar10/cifar10_train'
TEST_DIRECTORY_PATH = 'cifar10/cifar10_test'
NUM_CLASSES = 10
NUM_EPOCHS = 20
INPUT_SIZE = 32*32*3
device = 'cpu'

DROP_OUT = 0.5
LEARNING_RATE = 0.00005
HIDDEN_SIZE = 500
BATCH_SIZE = 100
WEIGHT_DECAY = 0.0001

model = MultilayerPerceptron(INPUT_SIZE, HIDDEN_SIZE, NUM_CLASSES).to(device)
criterion = nn.CrossEntropyLoss()
# optimizer = torch.optim.Adamax(model.parameters(), lr = LEARNING_RATE, weight_decay = WEIGHT_DECAY) 
optimizer = torch.optim.RMSprop(model.parameters(), lr = LEARNING_RATE, weight_decay = WEIGHT_DECAY) 

########
def part2():
  training(TRAIN_DIRECTORY_PATH, device, model, criterion, optimizer, NUM_EPOCHS, BATCH_SIZE)
  evaluation(TEST_DIRECTORY_PATH, device, model, criterion, BATCH_SIZE)

part2()

Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`


HBox(children=(FloatProgress(value=0.0, max=5.0), HTML(value='')))


Min Loss was: 1.9320440292358398 % 


Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`


HBox(children=(FloatProgress(value=0.0), HTML(value='')))


Accuracy of the network on the test images: 25.68 %


Results:

In [156]:
# Here, I tinkered around with 3 different learning rates:
n = [10]*3
lr = [0.0001, 0.00001, 0.00005]
d_out = [0.2]*3
h_size = [500]*3
optim = ['Adamax']*3
w_decay = [0.5]*3
b_size = [100]*3
l = [1.8089, 1.6517, 1.6826]
a = [32.58, 36.92, 36.38]
pd.set_option("display.max_rows", None, "display.max_columns", None)
df = pd.DataFrame({'Num Epochs': n, 'Learning Rate': lr, 'Drop Out': d_out, 'Hidden Size': h_size, 'Optimizer': optim, 'Weight Decay': w_decay, 'Batch Size': b_size, 'Loss': l, 'Accuracy': a})
print(tabulate(df, headers='keys', tablefmt='psql'))

+----+--------------+-----------------+------------+---------------+-------------+----------------+--------------+--------+------------+
|    |   Num Epochs |   Learning Rate |   Drop Out |   Hidden Size | Optimizer   |   Weight Decay |   Batch Size |   Loss |   Accuracy |
|----+--------------+-----------------+------------+---------------+-------------+----------------+--------------+--------+------------|
|  0 |           10 |          0.0001 |        0.2 |           500 | Adamax      |            0.5 |          100 | 1.8089 |      32.58 |
|  1 |           10 |          1e-05  |        0.2 |           500 | Adamax      |            0.5 |          100 | 1.6517 |      36.92 |
|  2 |           10 |          5e-05  |        0.2 |           500 | Adamax      |            0.5 |          100 | 1.6826 |      36.38 |
+----+--------------+-----------------+------------+---------------+-------------+----------------+--------------+--------+------------+


In [162]:
# Now I fix the Learning Rate as the ones that yielded the best accuracy and lowest loss, and tinker around with Drop Out
n = [10]*3
lr = [0.00005]*3
d_out = [0.5, 0.6, 0.7]
h_size = [500]*3
optim = ['Adamax']*3
w_decay = [0.5]*3
b_size = [100]*3
l = [1.6588, 1.6885, 1.6704]
a = [36.12, 35.89, 35.12]
pd.set_option("display.max_rows", None, "display.max_columns", None)
df = pd.DataFrame({'Num Epochs': n, 'Learning Rate': lr, 'Drop Out': d_out, 'Hidden Size': h_size, 'Optimizer': optim, 'Weight Decay': w_decay, 'Batch Size': b_size, 'Loss': l, 'Accuracy': a})
print(tabulate(df, headers='keys', tablefmt='psql'))

+----+--------------+-----------------+------------+---------------+-------------+----------------+--------------+--------+------------+
|    |   Num Epochs |   Learning Rate |   Drop Out |   Hidden Size | Optimizer   |   Weight Decay |   Batch Size |   Loss |   Accuracy |
|----+--------------+-----------------+------------+---------------+-------------+----------------+--------------+--------+------------|
|  0 |           10 |           5e-05 |        0.5 |           500 | Adamax      |            0.5 |          100 | 1.6588 |      36.12 |
|  1 |           10 |           5e-05 |        0.6 |           500 | Adamax      |            0.5 |          100 | 1.6885 |      35.89 |
|  2 |           10 |           5e-05 |        0.7 |           500 | Adamax      |            0.5 |          100 | 1.6704 |      35.12 |
+----+--------------+-----------------+------------+---------------+-------------+----------------+--------------+--------+------------+


In [167]:
# Now I fix the Learning Rate and Drop Out as the ones that yielded the best accuracy and lowest loss, and tinker around with Weight Decay
n = [10]*3
lr = [0.00005]*3
d_out = [0.5]*3
h_size = [500]*3
optim = ['Adamax']*3
w_decay = [0.001, 0.0001, 0.00001]
b_size = [100]*3
l = [1.5759, 1.5669, 1.5727]
a = [36.83, 36.87, 36.61]
pd.set_option("display.max_rows", None, "display.max_columns", None)
df = pd.DataFrame({'Num Epochs': n, 'Learning Rate': lr, 'Drop Out': d_out, 'Hidden Size': h_size, 'Optimizer': optim, 'Weight Decay': w_decay, 'Batch Size': b_size, 'Loss': l, 'Accuracy': a})
print(tabulate(df, headers='keys', tablefmt='psql'))

+----+--------------+-----------------+------------+---------------+-------------+----------------+--------------+--------+------------+
|    |   Num Epochs |   Learning Rate |   Drop Out |   Hidden Size | Optimizer   |   Weight Decay |   Batch Size |   Loss |   Accuracy |
|----+--------------+-----------------+------------+---------------+-------------+----------------+--------------+--------+------------|
|  0 |           10 |           5e-05 |        0.5 |           500 | Adamax      |         0.001  |          100 | 1.5759 |      36.83 |
|  1 |           10 |           5e-05 |        0.5 |           500 | Adamax      |         0.0001 |          100 | 1.5669 |      36.87 |
|  2 |           10 |           5e-05 |        0.5 |           500 | Adamax      |         1e-05  |          100 | 1.5727 |      36.61 |
+----+--------------+-----------------+------------+---------------+-------------+----------------+--------------+--------+------------+


In [182]:
# Now I fix the Learning Rate, Drop Out and Weight Decay as the ones that yielded the best accuracy and lowest loss, and tinker around with the Number of Epochs
n = [10, 15, 20]
lr = [0.00005]*3
d_out = [0.5]*3
h_size = [400, 500, 600]
optim = ['Adamax']*3
w_decay = [0.0001]*3
b_size = [100]*3
l = [1.5364, 1.5097, 1.5279]
a = [36.85, 37.5, 37.43]
pd.set_option("display.max_rows", None, "display.max_columns", None)
df = pd.DataFrame({'Num Epochs': n, 'Learning Rate': lr, 'Drop Out': d_out, 'Hidden Size': h_size, 'Optimizer': optim, 'Weight Decay': w_decay, 'Batch Size': b_size, 'Loss': l, 'Accuracy': a})
print(tabulate(df, headers='keys', tablefmt='psql'))

+----+--------------+-----------------+------------+---------------+-------------+----------------+--------------+--------+------------+
|    |   Num Epochs |   Learning Rate |   Drop Out |   Hidden Size | Optimizer   |   Weight Decay |   Batch Size |   Loss |   Accuracy |
|----+--------------+-----------------+------------+---------------+-------------+----------------+--------------+--------+------------|
|  0 |           10 |           5e-05 |        0.5 |           400 | Adamax      |         0.0001 |          100 | 1.5364 |      36.85 |
|  1 |           15 |           5e-05 |        0.5 |           500 | Adamax      |         0.0001 |          100 | 1.5097 |      37.5  |
|  2 |           20 |           5e-05 |        0.5 |           600 | Adamax      |         0.0001 |          100 | 1.5279 |      37.43 |
+----+--------------+-----------------+------------+---------------+-------------+----------------+--------------+--------+------------+


In [179]:
# Now I fix the Learning Rate, Drop Out, Weight Decay and Number of Epochs as the one that yielded the best accuracy and lowest loss, and tinker around with the Hidden Size
n = [15]*3
lr = [0.00005]*3
d_out = [0.5]*3
h_size = [400, 500, 600]
optim = ['Adamax']*3
w_decay = [0.0001]*3
b_size = [100]*3
l = [1.5425, 1.5155, 1.5413]
a = [36.13, 37.78, 37.96]
pd.set_option("display.max_rows", None, "display.max_columns", None)
df = pd.DataFrame({'Num Epochs': n, 'Learning Rate': lr, 'Drop Out': d_out, 'Hidden Size': h_size, 'Optimizer': optim, 'Weight Decay': w_decay, 'Batch Size': b_size, 'Loss': l, 'Accuracy': a})
print(tabulate(df, headers='keys', tablefmt='psql'))

+----+--------------+-----------------+------------+---------------+-------------+----------------+--------------+--------+------------+
|    |   Num Epochs |   Learning Rate |   Drop Out |   Hidden Size | Optimizer   |   Weight Decay |   Batch Size |   Loss |   Accuracy |
|----+--------------+-----------------+------------+---------------+-------------+----------------+--------------+--------+------------|
|  0 |           15 |           5e-05 |        0.5 |           400 | Adamax      |         0.0001 |          100 | 1.5425 |      36.13 |
|  1 |           15 |           5e-05 |        0.5 |           500 | Adamax      |         0.0001 |          100 | 1.5155 |      37.78 |
|  2 |           15 |           5e-05 |        0.5 |           600 | Adamax      |         0.0001 |          100 | 1.5413 |      37.96 |
+----+--------------+-----------------+------------+---------------+-------------+----------------+--------------+--------+------------+


In [184]:
# Now I fix the Learning Rate, Drop Out, Weight Decay, Number of Epochs and Hidden Size as the ones that yielded the best accuracy and lowest loss, and tinker around with the Batch Size
n = [15]*3
lr = [0.00005]*3
d_out = [0.5]*3
h_size = [500]*3
optim = ['Adamax']*3
w_decay = [0.0001]*3
b_size = [32, 64, 128]
l = [1.3237,1.4838, 1.5711]
a = [37.35,38.12, 38.01]
pd.set_option("display.max_rows", None, "display.max_columns", None)
df = pd.DataFrame({'Num Epochs': n, 'Learning Rate': lr, 'Drop Out': d_out, 'Hidden Size': h_size, 'Optimizer': optim, 'Weight Decay': w_decay, 'Batch Size': b_size, 'Loss': l, 'Accuracy': a})
print(tabulate(df, headers='keys', tablefmt='psql'))

+----+--------------+-----------------+------------+---------------+-------------+----------------+--------------+--------+------------+
|    |   Num Epochs |   Learning Rate |   Drop Out |   Hidden Size | Optimizer   |   Weight Decay |   Batch Size |   Loss |   Accuracy |
|----+--------------+-----------------+------------+---------------+-------------+----------------+--------------+--------+------------|
|  0 |           15 |           5e-05 |        0.5 |           500 | Adamax      |         0.0001 |           32 | 1.3237 |      37.35 |
|  1 |           15 |           5e-05 |        0.5 |           500 | Adamax      |         0.0001 |           64 | 1.4838 |      38.12 |
|  2 |           15 |           5e-05 |        0.5 |           500 | Adamax      |         0.0001 |          128 | 1.5711 |      38.01 |
+----+--------------+-----------------+------------+---------------+-------------+----------------+--------------+--------+------------+


In [None]:
# Now I fix the Learning Rate, Drop Out, Weight Decay, Number of Epochs, Hidden Size and Batch Size as the ones that yielded the best accuracy and lowest loss, and tinker around with the Optimizers
n = [15]*3
lr = [0.00005]*3
d_out = [0.5]*3
h_size = [500]*3
optim = ['Adamax', 'SGD', 'RMSProp']
w_decay = [0.0001]*3
b_size = [64]*3
l = [1.3833,1.5253,1.6852]
a = [36.83,35.79,28.76]
pd.set_option("display.max_rows", None, "display.max_columns", None)
df = pd.DataFrame({'Num Epochs': n, 'Learning Rate': lr, 'Drop Out': d_out, 'Hidden Size': h_size, 'Optimizer': optim, 'Weight Decay': w_decay, 'Batch Size': b_size, 'Loss': l, 'Accuracy': a})
print(tabulate(df, headers='keys', tablefmt='psql'))

Final Remarks: 

The method in which I selected the "optimal" hyperparameters is in no way a good way. A better way to choose the optimal hyperparameters would be to use cross-validation, perhaps using a 5-fold or 10-fold and determine which combinations of hyperparameters yield the lowest error. 

Part C: Transfer Learning

In [193]:
EXTRACT_FEATURES = True #true means we fine tune, false means we don't.
def part3():
  # (1) freezing all the MobileNetV2 layers (feature extraction) and only train the final classification layer; 
  # (2) finetuning all MobileNetV2 layers together with the final classification layer.
  # Simple flag will do the trick!
  # Load MobileNetV2 model
  mobile_model = torch.hub.load('pytorch/vision', 'mobilenet_v2', pretrained=True)

  if EXTRACT_FEATURES is True:
    for param in mobile_model.parameters():
      param.requires_grad = False
  numftrs = mobile_model.classifier[1].in_features
  mobile_model.classifier[1] = torch.nn.Linear(in_features = numftrs, out_features=NUM_CLASSES)
  mobile_model = mobile_model.to(device)
  mobile_criterion = nn.CrossEntropyLoss()
  
  params = mobile_model.parameters()
  if EXTRACT_FEATURES is True:
    params = []
    for n,p in mobile_model.named_parameters():
      if p.requires_grad:
        params.append(p)
  optimizer = torch.optim.SGD(params, LEARNING_RATE, momentum = 0.9)
  training(TRAIN_DIRECTORY_PATH, device, mobile_model, mobile_criterion, optimizer, NUM_EPOCHS, BATCH_SIZE, True)
  evaluation(TEST_DIRECTORY_PATH, device, mobile_model, BATCH_SIZE, True)
part3()

Using cache found in /root/.cache/torch/hub/pytorch_vision_master
Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`


HBox(children=(FloatProgress(value=0.0, max=5.0), HTML(value='')))

KeyboardInterrupt: ignored

Results:

In [194]:
# I now choose the optimal parameters to run for Freezing
t = ['Freezing', 'Fine Tuning']
n = [20]*2
lr = [0.00005]*2
d_out = [0.5]*2
h_size = [500]*2
optim = ['Adamax']*2
w_decay = [0.0001]*2
b_size = [100]*2
l = [1.23, 0.67]
a = [53.1, 78.4]
pd.set_option("display.max_rows", None, "display.max_columns", None)
df = pd.DataFrame({'Types of Transfer Learning': t, 'Num Epochs': n, 'Learning Rate': lr, 'Drop Out': d_out, 'Hidden Size': h_size, 'Optimizer': optim, 'Weight Decay': w_decay, 'Batch Size': b_size, 'Loss': l, 'Accuracy': a})
print(tabulate(df, headers='keys', tablefmt='psql'))

+----+------------------------------+--------------+-----------------+------------+---------------+-------------+----------------+--------------+--------+------------+
|    | Types of Transfer Learning   |   Num Epochs |   Learning Rate |   Drop Out |   Hidden Size | Optimizer   |   Weight Decay |   Batch Size |   Loss |   Accuracy |
|----+------------------------------+--------------+-----------------+------------+---------------+-------------+----------------+--------------+--------+------------|
|  0 | Freezing                     |           20 |           5e-05 |        0.5 |           500 | Adamax      |         0.0001 |          100 |   1.23 |       62.1 |
|  1 | Fine Tuning                  |           20 |           5e-05 |        0.5 |           500 | Adamax      |         0.0001 |          100 |   0.67 |       83.4 |
+----+------------------------------+--------------+-----------------+------------+---------------+-------------+----------------+--------------+--------+------

Remarks:

Overall, this assignment was very enjoyable. In comparison to the part (b), the models in (c) took much longer, and for took much longer. In order to yield better results, I would use cross-validation to select the optimal hyperparameters. The most frustrating part of this assignment was knowing that if something looked off, that I would need to train all over again, I guess in a way, this is a good indication of telling me how gruesome it is to train neural networks with a large amount of data. Getting to the end result where we train is rewarding, but if you're impatient, waiting for the neural network to finish training, may be annoying. Overall, I learned a lot, and thoroughly enjoyed the assignment!