# Mount Google Drive

In [1]:
# # load and mount google drive
# from google.colab import drive
# drive.mount('/content/gdrive')

# Set Data Paths

In [2]:
training_set_pickle_path = "./Train.pkl"
training_labels_path = "./TrainLabels.csv"
test_set_pickle_path = "./Test.pkl"

# Navigate To CNN_MNIST

In [3]:
# # cd into my director where the files are at
# %cd '/content/gdrive/My Drive/ECSE_551_Machine_Learning/CNN_MNIST'
# # list what is in the current directory
%ls 

CNN_Fashion_MNIST_Project.ipynb  Test.pkl
ExampleSubmissionRandom.csv      Train.pkl
LoadData.ipynb                   TrainLabels.csv


# Import Libraries

In [4]:
# # to see what packages are available in the current server's python
# # and to see which python we are using
# %%script bash 
# python --version
# pip install ray
%pip install -U skorch
# # pip install torch==1.6.0 torchvision==0.7.0
# pip list
%pip list

Requirement already up-to-date: skorch in /usr/local/anaconda3/lib/python3.8/site-packages (0.9.0)
Note: you may need to restart the kernel to use updated packages.
Package                            Version
---------------------------------- -------------------
alabaster                          0.7.12
anaconda-client                    1.7.2
anaconda-navigator                 1.9.12
anaconda-project                   0.8.3
applaunchservices                  0.2.1
appnope                            0.1.0
appscript                          1.1.1
argh                               0.26.2
asn1crypto                         1.3.0
astroid                            2.4.2
astropy                            4.0.1.post1
atomicwrites                       1.4.0
attrs                              19.3.0
autopep8                           1.5.3
Babel                              2.8.0
backcall                           0.2.0
backports.functools-lru-cache      1.6.1
backports.shutil-get-terminal-

Note: you may need to restart the kernel to use updated packages.


In [5]:
# importing all relevant libraies
import pickle
import torchvision.models as models
import time
# import cv2 as cv
import pdb
import matplotlib.pyplot as plt
import numpy as np
import random as rand
from torch.autograd import Variable
from skorch import NeuralNetClassifier
from sklearn.model_selection import GridSearchCV
import math as ma
from sklearn.model_selection import train_test_split
import torch.nn as nn
import torch
import torch.nn.functional as F
import torch.optim as optim
from torchvision import transforms
from torch.utils.data import Dataset
from torch.utils.data import DataLoader
from PIL import Image
import PIL
import torch
# from ray import tune
# from ray.tune import CLIReporter
# from ray.tune.schedulers import ASHAScheduler
print(f"Import successful!")
print(f"Pytorch version: {torch.__version__}")

Import successful!
Pytorch version: 1.6.0


# Check GPU

In [6]:
# Check device
USE_CUDA = 0
DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
if torch.cuda.is_available():
  USE_CUDA = 1
  print(f"Nvidia Cuda/GPU is available!")

# gpu_info = !nvidia-smi
# gpu_info = '\n'.join(gpu_info)
# if gpu_info.find('failed') >= 0:
#   print('Select the Runtime > "Change runtime type" menu to enable a GPU accelerator, ')
#   print('and then re-execute this cell.')
# else:
#   print(gpu_info)

# Torchvision: Image Preprocessing Pipeline

In [7]:
# Transforms are common image transformations. They can be chained together using Compose.
# Here we normalize images img=(img-0.5)/0.5
# These values normalize the image tensors to be between -1 and 1
# Adding Image augmentation to training set to increase accuracy of CNN 
mean = 0.5
std = 0.5
# transforms.RandomRotation(10, resample=PIL.Image.BILINEAR)
ImageTransforms = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize([mean], [std])
])

# ImageTransforms = transforms.Compose([
#     transforms.ToTensor(),
#     transforms.Normalize((0.1307,), (0.3081,))
# ])

# Torchvision: Training Dataset & Dataloader 

Target Transform Function

In [8]:
target_tranform = lambda a: a - 5

Getting Length of Training Set

In [9]:
training_set_length = 60000
print(f"Length of training set is: {training_set_length}")

Length of training set is: 60000


Creates Train & Validation indices

In [10]:
# Returning training and Validation indices 
def createTrainValIndices(training_set_length, p_train):

  # Creating indices for our original training set 
  indices = np.linspace(0, training_set_length-1, num=training_set_length, dtype=int)
  print(f"Original in order indices: \n{indices}")
  rand.shuffle(indices)
  print(f"Shuffled indices: \n{indices}")

  # How to split the training and validation set
  train_end_index = ma.floor(training_set_length*p_train)
  print(f"Training index start: {0}, Training index end: {train_end_index}")
  val_end_index = training_set_length
  print(f"Validation index start: {train_end_index}, Validation index end: {val_end_index}")

  # Slicing our original indices to form training and test indices
  training_indices = indices[0:train_end_index]
  val_indices = indices[train_end_index:val_end_index]

  return training_indices, val_indices

Our Custom Dataset Class

In [11]:
# img_file: the pickle file containing the images
# label_file: the .csv file containing the labels
# transform: We use it for normalizing images (see above)
# idx: This is a binary vector that is useful for creating training and validation set.
# It return only samples where idx is True

class MyDataSet(Dataset):

  # MyDataSet constructor which stores the pickled training images and its labels
  def __init__(self, img_file, label_file, transform=None, idx = None, target_transform=None):
    self.data = pickle.load( open(img_file, 'rb' ), encoding='bytes')
    self.targets = np.genfromtxt(label_file, delimiter=',', skip_header=1)[:,1:]
    if idx is not None:
      self.targets = self.targets[idx]
      self.data = self.data[idx]
    self.transform = transform
    self.target_transform = target_transform

  # returns the size of our data set
  def __len__(self):
    return len(self.targets)

  # returns a specific image in the data set by index
  def __getitem__(self, index):
    img, target = self.data[index], int(self.targets[index])
    img = Image.fromarray(img.astype('uint8'), mode='L')
    if self.transform is not None:
      img = self.transform(img)
    return img, target

# Visualizing Data

In [12]:
# Create training dataset
batch_size = 100
training_indices, val_indices = createTrainValIndices(training_set_length, p_train=0.8)
training_set = MyDataSet(training_set_pickle_path, training_labels_path, 
                         transform=ImageTransforms, idx=training_indices, target_transform=target_tranform)
trainingSetDataLoader = DataLoader(training_set, batch_size=batch_size, shuffle=False)

Original in order indices: 
[    0     1     2 ... 59997 59998 59999]
Shuffled indices: 
[31208 31258 26755 ... 15679 52646 48411]
Training index start: 0, Training index end: 48000
Validation index start: 48000, Validation index end: 60000


In [13]:
# # Read a batch of data and their labels and display them
# # Note that since data are transformed, they are between [-1,1]
# imgs, labels = (next(iter(trainingSetDataLoader)))
# imgs = np.squeeze(imgs)
# for i in range(0,batch_size):
#     plt.figure(figsize=(7,7))
#     plt.imshow(imgs[i].cpu().numpy(),cmap='gray', vmin=-1, vmax=1) #.transpose()
#     plt.show()

# Test Set Loader

In [14]:
# function to load our test set and apply the same image transformation pipeline
# that we had done to our training batches
def loadTestSet(test_file_path, transform=None):
  with open(str(test_file_path), 'rb') as tf:
    test_set = pickle.load(tf, encoding='bytes')
    for i in range(0, len(test_set)):
      test_set[i] = Image.fromarray(test_set[i].astype('uint8'), mode='L')

  # applying the same transformations that was done to our images for the training set
  test_set = transform(test_set)
  test_set = test_set.reshape((test_set.shape[1],test_set.shape[2], test_set.shape[0]))
  return test_set

In [15]:
test_set = loadTestSet(test_set_pickle_path, ImageTransforms)
print(test_set.shape)
print(test_set[1])

torch.Size([10000, 64, 128])
tensor([[-1., -1., -1.,  ..., -1., -1., -1.],
        [-1., -1., -1.,  ..., -1., -1., -1.],
        [-1., -1., -1.,  ..., -1., -1., -1.],
        ...,
        [-1., -1., -1.,  ..., -1., -1., -1.],
        [-1., -1., -1.,  ..., -1., -1., -1.],
        [-1., -1., -1.,  ..., -1., -1., -1.]])


# Convolution Neural Network Classes

Activation Functions

In [16]:
# @titledictionary holding a few common activation functions used in CNNs
act_func_dict = {
    'Relu': nn.ReLU(True), # defacto standard in deep learning these days
    'Sig': nn.Sigmoid(), # may provide vanishing gradient problems in deep NNs
    'Tanh': nn.Tanh(), # may provide vanishing gradient problems in deep NNs
    'LeakyRelu': nn.LeakyReLU(), # slightly better than ReLU as it solves the problem of "dead neurons" in the network
    'ELU': nn.ELU(),
}

Modified VGG CNN with Dropout Regularization & Batch Normalization

In [17]:
# Our various VGG architecture specifications for each layers input/output sizes
# M = MaxPool layer
VGG_types = {
    'VGG11': [64, 'M', 128, 'M', 256, 256, 'M',512, 512, 'M', 512, 512, 'M'],
    'VGG13': [64, 64, 'M', 128, 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'],
    'VGG16': [64, 64, 'M', 128, 128, 'M', 256, 256, 256,'M', 512, 512, 512, 'M', 512, 512, 512, 'M'],
    'VGG19': [64, 64,'M', 128, 128,'M', 256, 256, 256, 256,'M', 512, 512, 512, 512,'M',512,512,512,512,'M'],
}

# For our fully connected layers
VGG16_Structure_Lin = [4096, 9]

# We will base our CNN on the VGG Nets 
# CNN is composed of two types of layers: 
# 1) Cov and Pooling - Feature Extraction 
# 2) Fully Connected Linear - Classification 
class Fashion_VGG_CNN(nn.Module):

  # Constructor
  # 9 way classification problem so num classes is 9
  # in_channel is 1 becasue our images are gray scale
  def __init__(self, in_channels = 1, num_classes = 9, dropout=0.15, vgg_type = "VGG16", act_func = act_func_dict['Relu']):
    super(Fashion_VGG_CNN, self).__init__()
    self.in_channels = in_channels
    self.conv_layers = self._create_cov_layers(self, VGG_types[vgg_type], act_func)
    # This fcs represents the last 3 linear fully connected layers of the VGG16
    self.fcs = nn.Sequential(
        # so the input to the first linear fully connected layer would be
        # H = 64/(2**5), W = 128/(2**5) then H*W*512
        nn.Linear(in_features=(512*2*4), out_features=VGG16_Structure_Lin[0]),
        act_func,
        nn.Dropout(p=dropout),
        nn.Linear(in_features=VGG16_Structure_Lin[0], out_features=VGG16_Structure_Lin[0]),
        act_func,
        nn.Dropout(p=dropout),
        nn.Linear(in_features=VGG16_Structure_Lin[0], out_features = num_classes)
        )
    
  # Feed forwarding our images to find outputs 
  def forward(self, x):

    # Sending each image through all of our convolution layers
    x = self.conv_layers(x)
    # After the last max pool layer we need to flatten image into a linear vector
    x = x.view(x.size(0), -1)
    # Now send the flattened vector into the last 3 fully connected linear layers
    x = self.fcs(x)
    m = nn.Softmax(dim=1)
    return m(x)

  # Creates the convolution layers for us for this CNN
  @staticmethod
  def _create_cov_layers(self, myArchitecture, act_func):
    # image input channels for us its only 1 since it's gray scale
    in_channels = self.in_channels
    # define a list to hold the layers of the CNN
    layers = []
    # looping through the architecture to define our layers
    for x in myArchitecture:
      # if it is convolution layer 
      if (type(x) == int):
        out_channels = x
        layers += [nn.Conv2d(in_channels=in_channels, out_channels=out_channels, 
                             kernel_size=(3,3), stride=(1,1), padding=(1,1)),
                   nn.BatchNorm2d(x),
                   act_func]
        in_channels = x
      # if it is a max pooling layer
      elif (x == 'M'):
        layers += [nn.MaxPool2d(kernel_size=(2,2), stride=(2,2))]
    # Now return the block containing all these layers stacked one after another sequentially
    return nn.Sequential(*layers)
  
  # Weight initializations
  def reg_init_weights(self, m):
    '''
        regular model implementation of weight initialization
    '''
    if (type(m) == nn.Conv2d or type(m) == nn.Linear):
      nn.init.kaiming_normal_(m.weight)
      m.bias.data.fill_(0.01)

In [18]:
# Test code
model = Fashion_VGG_CNN(in_channels=1, num_classes=9, vgg_type= "VGG11")
x = torch.randn(1,1,64,128)
print(model(x))
print(model(x).shape)

tensor([[0.1386, 0.1047, 0.1258, 0.1044, 0.0958, 0.0961, 0.1154, 0.1222, 0.0971]],
       grad_fn=<SoftmaxBackward>)
torch.Size([1, 9])


Simple Fashion CNN with Dropout Regularization & Batch Normalization

In [19]:
# define a dictionary holding diff CNN dropout rates for each layer 
CNN_Dropout_rates = {
    "CNN_1": [0.15],
    "CNN_2": [0.15, 0.15],
    "CNN_3": [0.15, 0.2, 0.3],
    "CNN_4": [0.15, 0.2, 0.3, 0.4]
}

# define a dictionary holding diff CNN configs
CNN_Configs = {
    "CNN_1": [32, 'M'],
    "CNN_2": [32, 'M', 64, 'M'],
    "CNN_3": [32, 'M', 64, 'M', 128, 'M'],
    "CNN_4": [32, 'M', 64, 'M', 128, 'M', 256, 'M'],
}


class Fashion_Simple_CNN(nn.Module):

  # Constructor
  # 9 way classification problem so num classes is 9
  # in_channel is 1 becasue our images are gray scale
  def __init__(self, in_channel = 1, num_classes = 9, cnn_type = "CNN_3", kernel_size = (3,3), 
               act_func = act_func_dict['Relu'], use_dropout_reg = True, use_batch_norm = True):
    # super class constructor
    super(Fashion_Simple_CNN, self).__init__() 
    # class variables 
    self.in_channel = in_channel
    self.use_dropout_reg = use_dropout_reg
    self.use_batch_norm = use_batch_norm
    self.kernel_size = kernel_size
    self.conv_layers = self._create_cov_layers(self, CNN_Configs[cnn_type], CNN_Dropout_rates[cnn_type], kernel_size, act_func)

    # This fcs represents the last 2 linear fully connected layers of this simple CNN
    if (cnn_type == "CNN_3"):
      self.fcs = nn.Sequential(
          # so the input to the first linear fully connected layer would be
          # H = 64/(2**3), W = 128/(2**3) then H*W*128
          nn.Linear(in_features=(128*8*16), out_features=1024),
          nn.Dropout(p=0.25),
          nn.Linear(in_features=1024, out_features=num_classes),
          )
    elif (cnn_type == "CNN_4"):
      self.fcs = nn.Sequential(
          # so the input to the first linear fully connected layer would be
          # H = 64/(2**3), W = 128/(2**3) then H*W*256
          nn.Linear(in_features=(256*4*8), out_features=1024),
          nn.Dropout(p=0.25),
          nn.Linear(in_features=1024, out_features=num_classes),
          )

    elif (cnn_type == "CNN_2"):
      self.fcs = nn.Sequential(
          # so the input to the first linear fully connected layer would be
          # H = 64/(2**2), W = 128/(2**2) then H*W*64
          nn.Linear(in_features=(64*16*32), out_features=1024),
          nn.Dropout(p=0.15),
          nn.Linear(in_features=1024, out_features=num_classes)
          )

    elif (cnn_type == "CNN_1"):
      self.fcs = nn.Sequential(
          # so the input to the first linear fully connected layer would be
          # H = 64/(2**1), W = 128/(2**1) then H*W*32
          nn.Linear(in_features=(32*32*64), out_features=1024),
          nn.Dropout(p=0.25),
          nn.Linear(in_features=1024, out_features=num_classes)
          )


  # feed forward our image data to compute y
  def forward(self, x):
    # Sending each image through all of our convolution layers
    x = self.conv_layers(x)
    # After the last max pool layer we need to flatten image into a linear vector
    x = x.view(x.size(0), -1)
    # Now send the flattened vector into the last 2 fully connected linear layers
    # print(x.shape)
    x = self.fcs(x)
    # We should put an appropriate activation for the output layer.
    m = nn.Softmax(dim=1)
    return m(x)
  
  # Creates the convolution layers for us for this CNN
  @staticmethod
  def _create_cov_layers(self, conv_architecture, conv_dropout_rates, kernel_size, act_func):
    
    # To keep track of drop out rates used at each conv layer
    index = 0
    # Define in channel value
    in_channel = self.in_channel
    # List to hold our layers
    layers = []

    # Loop to go through our CNN output size specification
    for x in conv_architecture:

      # If it's a conv layer
      if (type(x) == int):
        out_channels = x
        if (self.use_batch_norm):
          layers += [nn.Conv2d(in_channels=in_channel, out_channels=out_channels, 
                             kernel_size=kernel_size, stride=(1,1), padding=(1,1)),
                     nn.BatchNorm2d(x), act_func]
        else:
          layers += [nn.Conv2d(in_channels=in_channel, out_channels=out_channels, 
                             kernel_size=kernel_size, stride=(1,1), padding=(1,1)), act_func]
        in_channel = x
      # if it is a max pooling layer
      elif (x == 'M'):
        if (self.use_dropout_reg):
          layers += [nn.MaxPool2d(kernel_size=(2,2), stride=(2,2)), nn.Dropout(p=conv_dropout_rates[index])]
          index += 1
        else:
          layers += [nn.MaxPool2d(kernel_size=(2,2), stride=(2,2))]

    # Now return the block containing all these layers stacked one after another sequentially
    return nn.Sequential(*layers)

  # Weight initializations
  def reg_init_weights(self, m):
    '''
        regular model implementation of weight initialization
    '''
    if (type(m) == nn.Conv2d or type(m) == nn.Linear):
      nn.init.kaiming_normal_(m.weight)
      m.bias.data.fill_(0.01)

Modified CNN From Tutorial

In [20]:
class CNN_Tutorial(nn.Module):
    # This part defines the layers
    def __init__(self, in_channel = 1, num_classes = 9, first_kernel = 3, sec_kernel = 3,  
                           act_func = act_func_dict['Relu'], p_dropout=0.15):
        super(CNN_Tutorial, self).__init__()
        
        # Calculate the input size of the first linear layer
        first_kernel = first_kernel
        cov1_param1 = int((64-first_kernel+1)/2)
        cov1_param2 = int((128-first_kernel+1)/2)
        sec_kernel = sec_kernel
        cov2_param1 = int((cov1_param1-sec_kernel+1)/2)
        cov2_param2 = int((cov1_param2-sec_kernel+1)/2)
        fc1_input_size = 20 * cov2_param1 * cov2_param2
        print(f"The input size of my first linear layer is: {fc1_input_size}")

        # conv layer 1
        self.conv1 = nn.Sequential(
            nn.Conv2d(in_channels=in_channel, out_channels=10, kernel_size=first_kernel),
            nn.BatchNorm2d(10),
            act_func,
            nn.MaxPool2d(kernel_size=(2,2), stride=(2,2))
        )

        # conv layer 2
        self.conv2 = nn.Sequential(
            nn.Conv2d(in_channels=10, out_channels=20, kernel_size=sec_kernel),
            nn.BatchNorm2d(20),
            act_func,
            nn.MaxPool2d(kernel_size=(2,2), stride=(2,2))
        )

        # fully connected layers
        self.fcs = nn.Sequential(
            nn.Linear(fc1_input_size, 600),
            act_func,
            nn.Dropout(p=p_dropout),
            nn.Linear(600, 120),
            act_func,
            nn.Linear(120, num_classes)
        )

    # And this part defines the way they are connected to each other
    # forward pass
    def forward(self, x):
        x = self.conv1(x)
        x = self.conv2(x)
        x = x.view(x.size(0), -1)
        x = self.fcs(x)
        return F.log_softmax(x)

    # Weight initializations
    def reg_init_weights(self, m):
      '''
          regular model implementation of weight initialization
      '''
      if (type(m) == nn.Conv2d or type(m) == nn.Linear):
          nn.init.kaiming_normal_(m.weight)
          m.bias.data.fill_(0.01)

CNN with Weights Initialization

In [21]:
class BasicCNN(nn.Module):

    def __init__(self, channel_sizes, layers, batch_norm, dropout, num_classes):
        super(BasicCNN, self).__init__()
        modules = []
        for block_idx in range(0, len(channel_sizes) -1):
            modules.append(nn.Conv2d(channel_sizes[block_idx], channel_sizes[block_idx+1], 3, padding=1, bias=False))
            if batch_norm:
                modules.append(nn.BatchNorm2d(channel_sizes[block_idx+1]))
            modules.append(nn.ReLU(True))
            if dropout is not None:
                modules.append(nn.Dropout2d(dropout, inplace=False))
            if layers > 1:
                for layer in range(layers - 1):
                    modules.append(nn.Conv2d(channel_sizes[block_idx+1], channel_sizes[block_idx+1], 3, padding=1, bias=False))
                    if batch_norm:
                        modules.append(nn.BatchNorm2d(channel_sizes[block_idx+1]))
                    modules.append(nn.ReLU(True))
                    if dropout is not None:
                        modules.append(nn.Dropout2d(dropout, inplace=False))
            
            if block_idx + 1  != len(channel_sizes) - 1:
                modules.append(nn.MaxPool2d(2,2))
        
        self.cnn_core = nn.Sequential(*modules)
        self.gap = nn.AdaptiveAvgPool2d(1)
        self.linear = nn.Linear(channel_sizes[-1], num_classes)
            
    def forward(self, x):
        x = self.cnn_core(x)
        x = self.gap(x)
        x = x.view(x.size(0), -1)
        x = self.linear(x)
        m = nn.Softmax(dim=1)
        return m(x)
    
    # Weight initializations
    def reg_init_weights(self, m):
      '''
          regular model implementation of weight initialization
      '''
      if (type(m) == nn.Conv3d or type(m) == nn.Linear):
          nn.init.kaiming_normal_(m.weight)
          m.bias.data.fill_(0.01)

  

# Loss Functions & Optimizers

Dictionary of Loss Functions

In [22]:
# function returning our desired loss function
def selectLoss(key):
  loss_func_dict = {
      "CEL": nn.CrossEntropyLoss(),
      "KLDL": nn.KLDivLoss(),
      "NLL": nn.NLLLoss(),
      "MSE": nn.MSELoss(),
  }
  return loss_func_dict.get(key,"Invalid loss function!") 
  

Dictionary of Optimizers

In [23]:
# function returns our desired optimizer
def selectOptimizer(key, model, lr, momentum):
  optimizers_dict = {
      "SGD": optim.SGD(model.parameters(), lr=lr, momentum=momentum),
      "Adam": optim.Adam(model.parameters(), lr=lr, betas=(0.9, 0.999), eps=1e-08),
      "RMS": optim.RMSprop(model.parameters(), lr=lr, momentum=momentum),
      "AdaG": optim.Adagrad(model.parameters(), lr=lr),
      "AdaD": optim.Adadelta(model.parameters(), lr=lr)
  }
  return optimizers_dict.get(key,"Invalid optimizer!")


# Create Models

Simple CNN with Weights Initialization

In [24]:
# Basic_CNN = BasicCNN(channel_sizes=cnn_config, layers = 1, batch_norm=True, dropout=0.15, num_classes=9)
# Basic_CNN.apply(Basic_CNN.reg_init_weights)
# print(Basic_CNN)       

VGG Net

In [25]:
vgg_CNN = Fashion_VGG_CNN(in_channels=1, num_classes=9, vgg_type="VGG11", act_func = act_func_dict['Relu'])
# print(vgg_CNN)

Simple CNN

In [26]:
simple_CNN = Fashion_Simple_CNN(in_channel = 1, num_classes = 9, cnn_type = "CNN_3", kernel_size = (3,3), 
                           act_func = act_func_dict['Relu'], use_dropout_reg = True, use_batch_norm = True)
# print(simple_CNN)

# Hyper-parameters & Tunning parameters

In [48]:
# Define all the hyperparameters
model_name = "./VGG19_model.tar"
vgg_type = "VGG19"
cnn_config = [1,64,128,256]
p_train = 0.80
step_size = 3 # every number of epochs decrease the learning rate 
gamma = 0.1 # decrease leanring rate by gamma /10 every number of epochs or so
lr = 1e-5
batch_size = 128
print_results_every = 20
epoch_start = 1
num_epochs = 200
momentum = 0.5
loss_key = "CEL"
optimizer_key = "Adam"
act_func = act_func_dict['Relu']
kernel_size = (3,3)
stride = (1,1)
first_kernel = 5
sec_kernel = 5
p_dropout = 0.5
num_layers = 1
use_batch_norm = True
small_train_size = 500
small_val_size = 100
training_set_length = 60000

# TorchVision Models

In [28]:
resnet18 = models.resnet18(pretrained=False)
vgg11_bn = models.vgg11_bn(pretrained=False)
vgg13_bn = models.vgg13_bn(pretrained=False)
vgg16_bn = models.vgg16_bn(pretrained=False)
vgg19_bn = models.vgg19_bn(pretrained=False)

# CNN Training & Model Evaluation Functions

In [62]:
# function used to evaluate our validation accuracy on during training 
def evaluate(model, validation_loader):
  correct = 0
  total = 0
  model.eval()
  with torch.no_grad():
    for val_data, val_labels in validation_loader:
      val_data, val_labels = val_data.to(DEVICE), val_labels.to(DEVICE)
      val_outputs = model(val_data)
      _,predicted = torch.max(val_outputs.data, 1)
      total += val_labels.size(0)
      correct += (predicted == val_labels).sum()
    current_val_acc = ((float(correct)/float(total))*100)
  return current_val_acc, correct, total

In [57]:
# create small validaiton and training indices
def createSmallTrainValIndicies(small_train_size, small_val_size, training_set_length):
  # Creating indices for our original training set 
  indices = np.linspace(0, training_set_length-1, num=training_set_length, dtype=int)
  # print(f"Original in order indices: \n{indices}")
  rand.shuffle(indices)
  # print(f"Shuffled indices: \n{indices}")
  half_index = ma.floor(len(indices)/2)
  small_train_indices = indices[0:small_train_size]
  small_val_indices = indices[half_index:half_index + small_val_size]
  return small_train_indices, small_val_indices

In [63]:
# Training function
def trainCNN(epoch, model, optimizer, loss_function, 
             validation_loader, train_loader, 
             b, train_losses, train_counter, model_name):
  
  # Defining the running accuracy of training
  train_correct = 0
  train_total = 0
  for batch_id, (batch_data, batch_labels) in enumerate(train_loader):
    # our model is now in training phase 
    # ensure's our model will use batch norm layers and dropout layers for training
    model.train()

    # Initializing grad to 0 to ensure there is no mixing of graidents among batches
    optimizer.zero_grad()

    # move a batch of images and it's labels into GPU
    batch_data = batch_data.to(DEVICE)
    batch_labels = batch_labels.to(DEVICE)
    # print(torch.unique(batch_labels))

    # Forward pass
    outputs = model(batch_data)

    # Adding to the running sum of accuracy
    _,pred = torch.max(outputs.data, 1)
    train_correct += (pred == batch_labels).sum()
    train_total += batch_data.size(0)
    
    # Calculate loss
    # print(torch.unique(batch_labels))
    current_loss = loss_function(outputs, batch_labels)

    # Propagate error backwards
    current_loss.backward()

    # Optimize our model parameters and update scheduler
    optimizer.step()

    # Printing out our training and validation progress every so often
    if (batch_id % b == 0):
      
      # finding out the training accuracy every however many batches 
      current_train_acc = ((float(train_correct)/float(train_total))*100)

      # finding validation accuracy
      current_val_acc, val_correct, val_total = evaluate(model, validation_loader)

      # printing our results 
      print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
          epoch, batch_id * len(batch_data), len(train_loader.dataset),
          100. * batch_id / len(train_loader), current_loss.item()))
      print(f"Train Epoch: {epoch} Validation_Accuracy: {current_val_acc}%, Ratio: {val_correct}/{val_total}")
      print(f"Train Epoch: {epoch} Training_Accuracy: {current_train_acc}%, Ratio: {train_correct}/{train_total}\n")


      train_losses.append(current_loss.item())
      train_counter.append((batch_id*64) + ((epoch-1)*len(train_loader.dataset)))
      
      # saving our model, optimizer, and training counter/losses lists
      torch.save({
            'epoch': epoch,
            'model_state_dict': model.state_dict(),
            'optimizer_state_dict': optimizer.state_dict(),
            'train_loss': train_losses,
            'train_counter': train_counter
            }, str(model_name), _use_new_zipfile_serialization=False)


# CNN Pipeline


Mini-batches for Train & Validation

In [32]:
# create small train and val indices for slicing our data set
small_train_indices, small_val_indices = createSmallTrainValIndicies(small_train_size, small_val_size, training_set_length)
print(f"Train size: {len(small_train_indices)}, Validation size: {len(small_val_indices)}")

# Create small train and small val dataset
small_dataset = MyDataSet(training_set_pickle_path, training_labels_path, 
                         transform=ImageTransforms, idx=small_train_indices, target_transform=target_tranform)
small_dataset.targets = small_dataset.target_transform(small_dataset.targets)
small_valset = MyDataSet(training_set_pickle_path, training_labels_path, 
                         transform=ImageTransforms, idx=small_val_indices, target_transform=target_tranform)
small_valset.targets = small_valset.target_transform(small_valset.targets)

# Create small train and validation dataloaaders
small_train_loader = DataLoader(small_dataset, batch_size=batch_size, shuffle=True)
small_val_loader = DataLoader(small_valset, batch_size=batch_size, shuffle=True)

Train size: 500, Validation size: 100


Full Training & Validation Sets

In [33]:
# Creating training and val indices so our data set class can chop them up appropriately 
training_indices, val_indices = createTrainValIndices(training_set_length, p_train=0.8)

# Create training dataset 
training_set = MyDataSet(training_set_pickle_path, training_labels_path, 
                         transform=ImageTransforms, idx=training_indices, target_transform=target_tranform)
training_set.targets = training_set.target_transform(training_set.targets).astype(int)
print(training_set.targets[0:10,:])
print(f"My training set shape is: {training_set.data.shape}")
print(f"My training set labels shape is: {training_set.targets.shape}")

# Create validation dataset 
validation_set = MyDataSet(training_set_pickle_path, training_labels_path, 
                           transform=ImageTransforms, idx=val_indices, target_transform=target_tranform)
validation_set.targets = validation_set.target_transform(validation_set.targets)
validation_set.targets = validation_set.targets.astype(int)
print(validation_set.targets[0:10,:])
print(f"My validation set shape is: {validation_set.data.shape}")
print(f"My validation set labels shape is: {validation_set.targets.shape}")

Original in order indices: 
[    0     1     2 ... 59997 59998 59999]
Shuffled indices: 
[58464 51282 53809 ... 58853 47203 12277]
Training index start: 0, Training index end: 48000
Validation index start: 48000, Validation index end: 60000
[[8]
 [3]
 [6]
 [0]
 [5]
 [8]
 [2]
 [6]
 [6]
 [2]]
My training set shape is: (48000, 64, 128)
My training set labels shape is: (48000, 1)
[[0]
 [5]
 [7]
 [5]
 [4]
 [7]
 [0]
 [8]
 [3]
 [0]]
My validation set shape is: (12000, 64, 128)
My validation set labels shape is: (12000, 1)


Create Training & Validation Loaders

In [34]:
# Create training and validation loaders
trainingSetDataLoader = DataLoader(training_set, batch_size=batch_size, shuffle=True)
validationSetDataLoader = DataLoader(validation_set, batch_size=batch_size, shuffle=True)

Create different CNN Models

In [35]:
# Tutorial CNN
CNN_Tutorial = CNN_Tutorial(in_channel=1, num_classes=9, 
                            first_kernel=first_kernel, 
                            sec_kernel=sec_kernel, act_func=act_func,p_dropout=p_dropout)
CNN_Tutorial.apply(CNN_Tutorial.reg_init_weights)
# print(CNN_Tutorial)

The input size of my first linear layer is: 7540


CNN_Tutorial(
  (conv1): Sequential(
    (0): Conv2d(1, 10, kernel_size=(5, 5), stride=(1, 1))
    (1): BatchNorm2d(10, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU(inplace=True)
    (3): MaxPool2d(kernel_size=(2, 2), stride=(2, 2), padding=0, dilation=1, ceil_mode=False)
  )
  (conv2): Sequential(
    (0): Conv2d(10, 20, kernel_size=(5, 5), stride=(1, 1))
    (1): BatchNorm2d(20, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU(inplace=True)
    (3): MaxPool2d(kernel_size=(2, 2), stride=(2, 2), padding=0, dilation=1, ceil_mode=False)
  )
  (fcs): Sequential(
    (0): Linear(in_features=7540, out_features=600, bias=True)
    (1): ReLU(inplace=True)
    (2): Dropout(p=0.5, inplace=False)
    (3): Linear(in_features=600, out_features=120, bias=True)
    (4): ReLU(inplace=True)
    (5): Linear(in_features=120, out_features=9, bias=True)
  )
)

In [36]:
# Basic CNN
cnn_basic = BasicCNN(channel_sizes=cnn_config, layers = 1, batch_norm=True, dropout=0, num_classes=9)
cnn_basic.apply(cnn_basic.reg_init_weights)
# print(cnn_basic)      

BasicCNN(
  (cnn_core): Sequential(
    (0): Conv2d(1, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU(inplace=True)
    (3): Dropout2d(p=0, inplace=False)
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (6): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (7): ReLU(inplace=True)
    (8): Dropout2d(p=0, inplace=False)
    (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (11): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (12): ReLU(inplace=True)
    (13): Dropout2d(p=0, inplace=False)
  )
  (gap): AdaptiveAvgPool2d(output_size=1)
  (linear): L

In [37]:
simple_CNN = Fashion_Simple_CNN(in_channel = 1, num_classes = 9, cnn_type = "CNN_2", kernel_size = (3,3), 
                           act_func = act_func_dict['Relu'], use_dropout_reg = True, use_batch_norm = True)
simple_CNN.apply(simple_CNN.reg_init_weights)

Fashion_Simple_CNN(
  (conv_layers): Sequential(
    (0): Conv2d(1, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU(inplace=True)
    (3): MaxPool2d(kernel_size=(2, 2), stride=(2, 2), padding=0, dilation=1, ceil_mode=False)
    (4): Dropout(p=0.15, inplace=False)
    (5): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (6): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (7): ReLU(inplace=True)
    (8): MaxPool2d(kernel_size=(2, 2), stride=(2, 2), padding=0, dilation=1, ceil_mode=False)
    (9): Dropout(p=0.15, inplace=False)
  )
  (fcs): Sequential(
    (0): Linear(in_features=32768, out_features=1024, bias=True)
    (1): Dropout(p=0.15, inplace=False)
    (2): Linear(in_features=1024, out_features=9, bias=True)
  )
)

In [38]:
VGG_CNN = Fashion_VGG_CNN(in_channels=1, num_classes=9, dropout=p_dropout, vgg_type=vgg_type, act_func = act_func)
VGG_CNN.apply(VGG_CNN.reg_init_weights)
# print(VGG_CNN)

Fashion_VGG_CNN(
  (conv_layers): Sequential(
    (0): Conv2d(1, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU(inplace=True)
    (3): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (4): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (5): ReLU(inplace=True)
    (6): MaxPool2d(kernel_size=(2, 2), stride=(2, 2), padding=0, dilation=1, ceil_mode=False)
    (7): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (9): ReLU(inplace=True)
    (10): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (12): ReLU(inplace=True)
    (13): MaxPool2d(kernel_size=(2, 2), stride=(2, 2), padding=0, dilation=1, ceil_mo

Set-up Loss Function and Optimizers

In [64]:
# create a net
model = vgg_CNN
print(model)

# moving our CNN model into GPU memory
if USE_CUDA:
  model = model.to(DEVICE)

# create loss function
loss_function = selectLoss(key=loss_key)

# create optimizer and step scheduler
optimizer = selectOptimizer(key=optimizer_key, model=model, lr=lr, momentum=momentum)
# step_lr_scheduler = optim.lr_scheduler.StepLR(optimizer, step_size = step_size, gamma=gamma, verbose=True)


Fashion_VGG_CNN(
  (conv_layers): Sequential(
    (0): Conv2d(1, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU(inplace=True)
    (3): MaxPool2d(kernel_size=(2, 2), stride=(2, 2), padding=0, dilation=1, ceil_mode=False)
    (4): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (5): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (6): ReLU(inplace=True)
    (7): MaxPool2d(kernel_size=(2, 2), stride=(2, 2), padding=0, dilation=1, ceil_mode=False)
    (8): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (9): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (10): ReLU(inplace=True)
    (11): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (12): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (13): ReLU(i

In [50]:
# define lists to hold our training loss and iterations
train_losses = []
train_counter = []

# GridSearchCV with Skorch For CNN

In [41]:
# reshaping data 
ts = training_set.data.reshape(-1, 1, 64, 128).astype('float32')
ts_labels = training_set.targets.reshape((training_set.targets.shape[0],)).astype('int64')
vs = validation_set.data.reshape(-1, 1, 64, 128).astype('float32')
vs_labels = validation_set.targets.reshape((validation_set.targets.shape[0],)).astype('int64')
print(f"Training set shape: {ts.shape}, Validation set shape: {vs.shape}")
print(f"Training labels shape: {ts_labels.shape}, Validation labels shape: {vs_labels.shape}")

Training set shape: (48000, 1, 64, 128), Validation set shape: (12000, 1, 64, 128)
Training labels shape: (48000,), Validation labels shape: (12000,)


In [42]:
# fixed random seed and cuda random seed 
torch.manual_seed(0)
torch.cuda.manual_seed_all(0)

# in_channels = 1, num_classes = 9, dropout=0.15, vgg_type = "VGG16", act_func = act_func_dict['Relu']

# wrapping my own cnn class in skorch cnn
cnn = NeuralNetClassifier(
    module = Fashion_VGG_CNN,
    module__in_channels = 1,
    module__num_classes = 9,
    module__dropout = p_dropout,
    module__vgg_type = vgg_type,
    module__act_func = act_func,
    max_epochs=5,
    lr=1e-5,
    optimizer=torch.optim.Adam,
    device=DEVICE,
    criterion=torch.nn.CrossEntropyLoss,
    batch_size=batch_size
)

In [43]:
# # training with skorch
# cnn.fit(ts, ts_labels);

In [44]:
# full data set of my training
full_training_set = MyDataSet(training_set_pickle_path, training_labels_path, 
                         transform=ImageTransforms, idx=training_indices, target_transform=target_tranform)

full_training_set.targets = full_training_set.target_transform(full_training_set.targets)
gs_ts = full_training_set.data.reshape(-1, 1, 64, 128).astype('float32')
gs_tl = full_training_set.targets.reshape((full_training_set.targets.shape[0],)).astype('int64')
print(f"Training set shape: {gs_ts.shape}, Training Labels shape: {gs_tl.shape}")

Training set shape: (48000, 1, 64, 128), Training Labels shape: (48000,)


In [45]:
# # doing gridsearch with skortch 
# params = {
#     'lr': [1e-5, 5e-7],
#     'max_epochs': [5, 10],
#     'module__dropout': [0.25, 0.5],
# }
# gs = GridSearchCV(cnn, params, refit=False, cv=3, scoring='accuracy', verbose=True)
# gs.fit(gs_ts, gs_tl)
# print(gs.best_score_, gs.best_params_)

# Reload Model From Checkpoint

In [65]:
# if load is true meaning we are starting from where we left off after we stopped training
load = True
if load:
  if USE_CUDA == 0:
    # load checkpoint dictionary into CPU
    checkpoint = torch.load(str(model_name), map_location=torch.device('cpu'))
    epoch_start = checkpoint['epoch']
    model.load_state_dict(checkpoint['model_state_dict'])
    optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
    train_losses = checkpoint['train_loss']
    train_counter = checkpoint['train_counter']
  else:
    # load checkpoint dictionary into GPU
    checkpoint = torch.load(model_name, map_location=DEVICE)
    epoch_start = checkpoint['epoch']
    model.load_state_dict(checkpoint['model_state_dict'])
    optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
    train_losses = checkpoint['train_loss']
    train_counter = checkpoint['train_counter']
  print(f"Reloaded from checkpoint successfully!")

Reloaded from checkpoint successfully!


# Train Model

In [66]:
# training converges at around 6 epochs for the tutorial cnn
# training for a number of epochs
last_epoch_num = 0
for epoch in range(epoch_start, num_epochs):
  # if (epoch % step_size == 0):
  #   lr = lr * 0.1
  #   print(f"Learning rate set to: {lr}")
  #   optimizer = selectOptimizer(key=optimizer_key, model=model, lr=lr, momentum=momentum)
  start = time.time()
  trainCNN(epoch, model, optimizer, loss_function, validationSetDataLoader, trainingSetDataLoader, 
           b=print_results_every, train_losses=train_losses, train_counter=train_counter, model_name=model_name)
  # if (epoch % step_size == 0):
  #   step_lr_scheduler.step()
  end = time.time()
  print(f"\nRun time per epoch: {end - start} (s) = {(end - start)/60} (mins)\n")
  last_epoch_num = epoch

Train Epoch: 1 Validation_Accuracy: 51.4%, Ratio: 6168/12000
Train Epoch: 1 Training_Accuracy: 41.40625%, Ratio: 53/128

Train Epoch: 1 Validation_Accuracy: 51.65%, Ratio: 6198/12000
Train Epoch: 1 Training_Accuracy: 52.269345238095234%, Ratio: 1405/2688

Train Epoch: 1 Validation_Accuracy: 52.59166666666667%, Ratio: 6311/12000
Train Epoch: 1 Training_Accuracy: 53.105945121951216%, Ratio: 2787/5248

Train Epoch: 1 Validation_Accuracy: 52.94166666666666%, Ratio: 6353/12000
Train Epoch: 1 Training_Accuracy: 53.41956967213115%, Ratio: 4171/7808

Train Epoch: 1 Validation_Accuracy: 52.708333333333336%, Ratio: 6325/12000
Train Epoch: 1 Training_Accuracy: 53.607253086419746%, Ratio: 5558/10368

Train Epoch: 1 Validation_Accuracy: 53.583333333333336%, Ratio: 6430/12000
Train Epoch: 1 Training_Accuracy: 54.1769801980198%, Ratio: 7004/12928

Train Epoch: 1 Validation_Accuracy: 53.39166666666667%, Ratio: 6407/12000
Train Epoch: 1 Training_Accuracy: 54.132231404958674%, Ratio: 8384/15488

Train E

KeyboardInterrupt: 

# Save Model

In [None]:
# saving our models fianlly at the very end after training 
torch.save({
            'epoch': last_epoch_num,
            'model_state_dict': model.state_dict(),
            'optimizer_state_dict': optimizer.state_dict(),
            'train_loss': train_losses,
            'train_counter': train_counter
            }, str(model_name))

# Final CNN Training & Prediction on Test Set

In [None]:
# Define final training loss and counter lists for graphing
final_train_loss = []
final_train_counter = []

In [None]:
# Training function
def trainfinalCNN(epoch, model, optimizer, loss_function, train_loader, b, train_losses, train_counter):
  for batch_id, (batch_data, batch_labels) in enumerate(train_loader):
    # our model is now in training phase 
    # ensure's our model will use batch norm layers and dropout layers for training
    model.train()

    # Initializing grad to 0 to ensure there is no mixing of graidents among batches
    optimizer.zero_grad()

    # move a batch of images and it's labels into GPU
    batch_data = batch_data.to(DEVICE)
    batch_labels = batch_labels.to(DEVICE)
    # print(torch.unique(batch_labels))

    # Forward pass
    outputs = model(batch_data)
    
    # Calculate loss
    # print(torch.unique(batch_labels))
    current_loss = loss_function(outputs, batch_labels)

    # Propagate error backwards
    current_loss.backward()

    # Optimize our model parameters and update scheduler
    optimizer.step()

    # Printing out our training and validation progress every so often
    if (batch_id % b == 0):

      # finding training set accuracy 
      train_correct = 0
      train_total = 0
      _,pred = torch.max(outputs.data, 1)
      train_correct += (pred == batch_labels).sum()
      train_total += batch_data.size(0)
      current_train_acc = ((train_correct/train_total)*100)

      # printing our results 
      print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
          epoch, batch_id * len(batch_data), len(train_loader.dataset),
          100. * batch_id / len(train_loader), current_loss.item()))
      print(f"Train Epoch: {epoch} Training_Accuracy: {current_train_acc}%\n")

      train_losses.append(current_loss.item())
      train_counter.append((batch_id*64) + ((epoch-1)*len(train_loader.dataset)))
      torch.save(model.state_dict(), './model.pth')
      torch.save(optimizer.state_dict(), './optimizer.pth')


In [None]:
#TODO: need to finish the predict function
# to predict on test set 
def predictOnTestSet(testSetLoader, model):
  predictedLabels = []
  model.eval()
  with torch.no_grad():
    for test_batch in testSetLoader:
      test_batch = test_batch.to(DEVICE)
      outputs = model(test_batch)
      # -> assign label for each sample x 
      # note: output shape: (B, Classes) and during prediction we need to add 5 back
      
   
  return 
