# Mount Google Drive

In [None]:
# load and mount google drive
from google.colab import drive
drive.mount('/content/gdrive')

# Set Data Paths

In [None]:
training_set_pickle_path = "./Train.pkl"
training_labels_path = "./TrainLabels.csv"
test_set_pickle_path = "./Test.pkl"

# Navigate To CNN_MNIST

In [None]:
# cd into my director where the files are at
%cd '/content/gdrive/My Drive/ECSE_551_Machine_Learning/CNN_MNIST'
# list what is in the current directory
%ls 

# Import Libraries

In [None]:
# to see what packages are available in the current server's python
# and to see which python we are using
%%script bash 
python --version
pip install -U skorch
pip install torch==1.6.0 torchvision==0.7.0
pip list

In [None]:
# importing all relevant libraies
import pickle
import torchvision.models as models
import time
import matplotlib.pyplot as plt
import numpy as np
import random as rand
from torch.autograd import Variable
# from skorch import NeuralNetClassifier
from sklearn.model_selection import GridSearchCV
import math as ma
from sklearn.model_selection import train_test_split
import torch.nn as nn
import torch
import torch.nn.functional as F
import torch.optim as optim
from torchvision import transforms
from torch.utils.data import Dataset
from torch.utils.data import DataLoader
from PIL import Image
import PIL
import torch
import pandas as  pd 
print(f"Import successful!")
print(f"Pytorch version: {torch.__version__}")

# Check GPU

In [None]:
# Check device
USE_CUDA = 0
DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(torch.cuda.is_available())
if torch.cuda.is_available():
  USE_CUDA = 1
  print(f"Nvidia Cuda/GPU is available!")

gpu_info = !nvidia-smi
gpu_info = '\n'.join(gpu_info)
if gpu_info.find('failed') >= 0:
  print('Select the Runtime > "Change runtime type" menu to enable a GPU accelerator, ')
  print('and then re-execute this cell.')
else:
  print(gpu_info)

# Torchvision: Image Preprocessing Pipeline

In [None]:
# Transforms are common image transformations. They can be chained together using Compose.
# Here we normalize images img=(img-0.5)/0.5
# These values normalize the image tensors to be between -1 and 1
# Adding Image augmentation to training set to increase accuracy of CNN 
mean = 0.5
std = 0.5
# transforms.RandomRotation(10, resample=PIL.Image.BILINEAR)
ImageTransforms = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize([mean], [std]),
    transforms.RandomRotation(10, resample=PIL.Image.BILINEAR)
])

# ImageTransforms = transforms.Compose([
#     transforms.ToTensor(),
#     transforms.Normalize((0.1307,), (0.3081,))
# ])

# Torchvision: Training Dataset & Dataloader 

Target Transform Function

In [None]:
target_tranform = lambda a: a - 5

Getting Length of Training Set

In [None]:
training_set_length = 60000
print(f"Length of training set is: {training_set_length}")

Creates Train & Validation indices

In [None]:
# Returning training and Validation indices 
def createTrainValIndices(training_set_length, p_train):

  # Creating indices for our original training set 
  indices = np.linspace(0, training_set_length-1, num=training_set_length, dtype=int)
  # print(f"Original in order indices: \n{indices}")
  rand.shuffle(indices)
  # print(f"Shuffled indices: \n{indices}")

  # How to split the training and validation set
  train_end_index = ma.floor(training_set_length*p_train)
  print(f"Training index start: {0}, Training index end: {train_end_index}")
  val_end_index = training_set_length
  print(f"Validation index start: {train_end_index}, Validation index end: {val_end_index}")

  # Slicing our original indices to form training and test indices
  training_indices = indices[0:train_end_index]
  val_indices = indices[train_end_index:val_end_index]

  return training_indices, val_indices

Our Custom Training Set Class

In [None]:
# img_file: the pickle file containing the images
# label_file: the .csv file containing the labels
# transform: We use it for normalizing images (see above)
# idx: This is a binary vector that is useful for creating training and validation set.
# It return only samples where idx is True

class MyDataSet(Dataset):

  # MyDataSet constructor which stores the pickled training images and its labels
  def __init__(self, img_file, label_file, transform=None, idx = None, target_transform=None):
    self.data = pickle.load( open(img_file, 'rb' ), encoding='bytes')
    self.targets = np.genfromtxt(label_file, delimiter=',', skip_header=1)[:,1:]
    if idx is not None:
      self.targets = self.targets[idx]
      self.data = self.data[idx]
    self.transform = transform
    self.target_transform = target_transform

  # returns the size of our data set
  def __len__(self):
    return len(self.targets)

  # returns a specific image in the data set by index
  def __getitem__(self, index):
    img, target = self.data[index], int(self.targets[index])
    img = Image.fromarray(img.astype('uint8'), mode='L')
    if self.transform is not None:
      img = self.transform(img)
    return img, target

Our Custom Test Set Class

In [None]:
# stores my test set
class MyTestSet(Dataset):

  # constructor 
  def __init__(self, img_file, transform=None):
    self.data = pickle.load( open(img_file, 'rb' ), encoding='bytes')
    self.transform = transform

  # returns the size of our data set
  def __len__(self):
    return self.data.shape[0]

  # returns a specific image in the data set by index
  def __getitem__(self, index):
    img = self.data[index]
    img = Image.fromarray(img.astype('uint8'), mode='L')
    if self.transform is not None:
      img = self.transform(img)
    return img

# Visualizing Data

In [None]:
# Create training dataset
batch_size = 100
training_indices, val_indices = createTrainValIndices(training_set_length, p_train=0.8)
training_set = MyDataSet(training_set_pickle_path, training_labels_path, 
                         transform=ImageTransforms, idx=training_indices, target_transform=target_tranform)
trainingSetDataLoader = DataLoader(training_set, batch_size=batch_size, shuffle=False)

In [None]:
# Read a batch of data and their labels and display them
# Note that since data are transformed, they are between [-1,1]
imgs, labels = (next(iter(trainingSetDataLoader)))
imgs = np.squeeze(imgs)
for i in range(0,batch_size):
    plt.figure(figsize=(7,7))
    plt.imshow(imgs[i].cpu().numpy(),cmap='gray', vmin=-1, vmax=1) #.transpose()
    plt.show()

# Convolution Neural Network Classes

Activation Functions

In [None]:
# @titledictionary holding a few common activation functions used in CNNs
act_func_dict = {
    'Relu': nn.ReLU(True), # defacto standard in deep learning these days
    'Sig': nn.Sigmoid(), # may provide vanishing gradient problems in deep NNs
    'Tanh': nn.Tanh(), # may provide vanishing gradient problems in deep NNs
    'LeakyRelu': nn.LeakyReLU(), # slightly better than ReLU as it solves the problem of "dead neurons" in the network
    'ELU': nn.ELU()
}

Modified VGG CNN with Dropout Regularization & Batch Normalization

In [None]:
# Our various VGG architecture specifications for each layers input/output sizes
# M = MaxPool layer
VGG_types = {
    'VGG11': [64, 'M', 128, 'M', 256, 256, 'M',512, 512, 'M', 512, 512, 'M'],
    'VGG13': [64, 64, 'M', 128, 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'],
    'VGG16': [64, 64, 'M', 128, 128, 'M', 256, 256, 256,'M', 512, 512, 512, 'M', 512, 512, 512, 'M'],
    'VGG19': [64, 64,'M', 128, 128,'M', 256, 256, 256, 256,'M', 512, 512, 512, 512,'M',512,512,512,512,'M'],
}

# For our fully connected layers
VGG16_Structure_Lin = [4096, 9]

# We will base our CNN on the VGG Nets 
# CNN is composed of two types of layers: 
# 1) Cov and Pooling - Feature Extraction 
# 2) Fully Connected Linear - Classification 
class Fashion_VGG_CNN(nn.Module):

  # Constructor
  # 9 way classification problem so num classes is 9
  # in_channel is 1 becasue our images are gray scale
  def __init__(self, in_channels = 1, num_classes = 9, dropout=0.15, vgg_type = "VGG16", act_func = act_func_dict['Relu']):
    super(Fashion_VGG_CNN, self).__init__()
    self.in_channels = in_channels
    self.conv_layers = self._create_cov_layers(self, VGG_types[vgg_type], act_func)
    # This fcs represents the last 3 linear fully connected layers of the VGG16
    self.fcs = nn.Sequential(
        # so the input to the first linear fully connected layer would be
        # H = 64/(2**5), W = 128/(2**5) then H*W*512
        nn.Linear(in_features=(512*2*4), out_features=VGG16_Structure_Lin[0]),
        act_func,
        nn.Dropout(p=dropout),
        nn.Linear(in_features=VGG16_Structure_Lin[0], out_features=VGG16_Structure_Lin[0]),
        act_func,
        nn.Dropout(p=dropout),
        nn.Linear(in_features=VGG16_Structure_Lin[0], out_features = num_classes)
        )
    
  # Feed forwarding our images to find outputs 
  def forward(self, x):

    # Sending each image through all of our convolution layers
    x = self.conv_layers(x)
    # After the last max pool layer we need to flatten image into a linear vector
    x = x.view(x.size(0), -1)
    # Now send the flattened vector into the last 3 fully connected linear layers
    x = self.fcs(x)
    m = nn.Softmax(dim=1)
    return m(x)

  # Creates the convolution layers for us for this CNN
  @staticmethod
  def _create_cov_layers(self, myArchitecture, act_func):
    # image input channels for us its only 1 since it's gray scale
    in_channels = self.in_channels
    # define a list to hold the layers of the CNN
    layers = []
    # looping through the architecture to define our layers
    for x in myArchitecture:
      # if it is convolution layer 
      if (type(x) == int):
        out_channels = x
        layers += [nn.Conv2d(in_channels=in_channels, out_channels=out_channels, 
                             kernel_size=(3,3), stride=(1,1), padding=(1,1)),
                   nn.BatchNorm2d(x),
                   act_func]
        in_channels = x
      # if it is a max pooling layer
      elif (x == 'M'):
        layers += [nn.MaxPool2d(kernel_size=(2,2), stride=(2,2))]
    # Now return the block containing all these layers stacked one after another sequentially
    return nn.Sequential(*layers)
  
  # Weight initializations
  def reg_init_weights(self, m):
    '''
        regular model implementation of weight initialization
    '''
    if (type(m) == nn.Conv2d or type(m) == nn.Linear):
      nn.init.kaiming_normal_(m.weight)
      m.bias.data.fill_(0.01)

Simple Fashion CNN with Dropout Regularization & Batch Normalization

In [None]:
# define a dictionary holding diff CNN dropout rates for each layer 
CNN_Dropout_rates = {
    "CNN_1": [0.15],
    "CNN_2": [0.15, 0.15],
    "CNN_3": [0.15, 0.2, 0.3],
    "CNN_4": [0.15, 0.2, 0.3, 0.4]
}

# define a dictionary holding diff CNN configs
CNN_Configs = {
    "CNN_1": [32, 'M'],
    "CNN_2": [32, 'M', 64, 'M'],
    "CNN_3": [32, 'M', 64, 'M', 128, 'M'],
    "CNN_4": [32, 'M', 64, 'M', 128, 'M', 256, 'M'],
}


class Fashion_Simple_CNN(nn.Module):

  # Constructor
  # 9 way classification problem so num classes is 9
  # in_channel is 1 becasue our images are gray scale
  def __init__(self, in_channel = 1, num_classes = 9, cnn_type = "CNN_3", kernel_size = (3,3), 
               act_func = act_func_dict['Relu'], use_dropout_reg = True, use_batch_norm = True):
    # super class constructor
    super(Fashion_Simple_CNN, self).__init__() 
    # class variables 
    self.in_channel = in_channel
    self.use_dropout_reg = use_dropout_reg
    self.use_batch_norm = use_batch_norm
    self.kernel_size = kernel_size
    self.conv_layers = self._create_cov_layers(self, CNN_Configs[cnn_type], CNN_Dropout_rates[cnn_type], kernel_size, act_func)

    # This fcs represents the last 2 linear fully connected layers of this simple CNN
    if (cnn_type == "CNN_3"):
      self.fcs = nn.Sequential(
          # so the input to the first linear fully connected layer would be
          # H = 64/(2**3), W = 128/(2**3) then H*W*128
          nn.Linear(in_features=(128*8*16), out_features=1024),
          nn.Dropout(p=0.25),
          nn.Linear(in_features=1024, out_features=num_classes),
          )
    elif (cnn_type == "CNN_4"):
      self.fcs = nn.Sequential(
          # so the input to the first linear fully connected layer would be
          # H = 64/(2**3), W = 128/(2**3) then H*W*256
          nn.Linear(in_features=(256*4*8), out_features=1024),
          nn.Dropout(p=0.25),
          nn.Linear(in_features=1024, out_features=num_classes),
          )

    elif (cnn_type == "CNN_2"):
      self.fcs = nn.Sequential(
          # so the input to the first linear fully connected layer would be
          # H = 64/(2**2), W = 128/(2**2) then H*W*64
          nn.Linear(in_features=(64*16*32), out_features=1024),
          nn.Dropout(p=0.15),
          nn.Linear(in_features=1024, out_features=num_classes)
          )

    elif (cnn_type == "CNN_1"):
      self.fcs = nn.Sequential(
          # so the input to the first linear fully connected layer would be
          # H = 64/(2**1), W = 128/(2**1) then H*W*32
          nn.Linear(in_features=(32*32*64), out_features=1024),
          nn.Dropout(p=0.25),
          nn.Linear(in_features=1024, out_features=num_classes)
          )


  # feed forward our image data to compute y
  def forward(self, x):
    # Sending each image through all of our convolution layers
    x = self.conv_layers(x)
    # After the last max pool layer we need to flatten image into a linear vector
    x = x.view(x.size(0), -1)
    # Now send the flattened vector into the last 2 fully connected linear layers
    # print(x.shape)
    x = self.fcs(x)
    # We should put an appropriate activation for the output layer.
    m = nn.Softmax(dim=1)
    return m(x)
  
  # Creates the convolution layers for us for this CNN
  @staticmethod
  def _create_cov_layers(self, conv_architecture, conv_dropout_rates, kernel_size, act_func):
    
    # To keep track of drop out rates used at each conv layer
    index = 0
    # Define in channel value
    in_channel = self.in_channel
    # List to hold our layers
    layers = []

    # Loop to go through our CNN output size specification
    for x in conv_architecture:

      # If it's a conv layer
      if (type(x) == int):
        out_channels = x
        if (self.use_batch_norm):
          layers += [nn.Conv2d(in_channels=in_channel, out_channels=out_channels, 
                             kernel_size=kernel_size, stride=(1,1), padding=(1,1)),
                     nn.BatchNorm2d(x), act_func]
        else:
          layers += [nn.Conv2d(in_channels=in_channel, out_channels=out_channels, 
                             kernel_size=kernel_size, stride=(1,1), padding=(1,1)), act_func]
        in_channel = x
      # if it is a max pooling layer
      elif (x == 'M'):
        if (self.use_dropout_reg):
          layers += [nn.MaxPool2d(kernel_size=(2,2), stride=(2,2)), nn.Dropout(p=conv_dropout_rates[index])]
          index += 1
        else:
          layers += [nn.MaxPool2d(kernel_size=(2,2), stride=(2,2))]

    # Now return the block containing all these layers stacked one after another sequentially
    return nn.Sequential(*layers)

  # Weight initializations
  def reg_init_weights(self, m):
    '''
        regular model implementation of weight initialization
    '''
    if (type(m) == nn.Conv2d or type(m) == nn.Linear):
      nn.init.kaiming_normal_(m.weight)
      m.bias.data.fill_(0.01)

Modified CNN From Tutorial

In [None]:
class CNN_Tutorial(nn.Module):
    # This part defines the layers
    def __init__(self, in_channel = 1, num_classes = 9, first_kernel = 3, sec_kernel = 3,  
                           act_func = act_func_dict['Relu'], p_dropout=0.15):
        super(CNN_Tutorial, self).__init__()
        
        # Calculate the input size of the first linear layer
        first_kernel = first_kernel
        cov1_param1 = int((64-first_kernel+1)/2)
        cov1_param2 = int((128-first_kernel+1)/2)
        sec_kernel = sec_kernel
        cov2_param1 = int((cov1_param1-sec_kernel+1)/2)
        cov2_param2 = int((cov1_param2-sec_kernel+1)/2)
        fc1_input_size = 20 * cov2_param1 * cov2_param2
        print(f"The input size of my first linear layer is: {fc1_input_size}")

        # conv layer 1
        self.conv1 = nn.Sequential(
            nn.Conv2d(in_channels=in_channel, out_channels=10, kernel_size=first_kernel),
            nn.BatchNorm2d(10),
            act_func,
            nn.MaxPool2d(kernel_size=(2,2), stride=(2,2))
        )

        # conv layer 2
        self.conv2 = nn.Sequential(
            nn.Conv2d(in_channels=10, out_channels=20, kernel_size=sec_kernel),
            nn.BatchNorm2d(20),
            act_func,
            nn.MaxPool2d(kernel_size=(2,2), stride=(2,2))
        )

        # fully connected layers
        self.fcs = nn.Sequential(
            nn.Linear(fc1_input_size, 600),
            act_func,
            nn.Dropout(p=p_dropout),
            nn.Linear(600, 120),
            act_func,
            nn.Linear(120, num_classes)
        )

    # And this part defines the way they are connected to each other
    # forward pass
    def forward(self, x):
        x = self.conv1(x)
        x = self.conv2(x)
        x = x.view(x.size(0), -1)
        x = self.fcs(x)
        return F.log_softmax(x)

    # Weight initializations
    def reg_init_weights(self, m):
      '''
          regular model implementation of weight initialization
      '''
      if (type(m) == nn.Conv2d or type(m) == nn.Linear):
          nn.init.kaiming_normal_(m.weight)
          m.bias.data.fill_(0.01)

CNN with Weights Initialization

In [None]:
class BasicCNN(nn.Module):
    def __init__(self, channel_sizes, layers, batch_norm, dropout, num_classes):
        super(BasicCNN, self).__init__()
        modules = []
        for block_idx in range(0, len(channel_sizes) -1):
            modules.append(nn.Conv2d(channel_sizes[block_idx], channel_sizes[block_idx+1], 3, padding=1, bias=False))
            if batch_norm:
                modules.append(nn.BatchNorm2d(channel_sizes[block_idx+1]))
            modules.append(nn.ReLU(True))
            if dropout is not None:
                modules.append(nn.Dropout2d(dropout, inplace=False))
            if layers > 1:
                for layer in range(layers - 1):
                    modules.append(nn.Conv2d(channel_sizes[block_idx+1], channel_sizes[block_idx+1], 3, padding=1, bias=False))
                    if batch_norm:
                        modules.append(nn.BatchNorm2d(channel_sizes[block_idx+1]))
                    modules.append(nn.ReLU(True))
                    if dropout is not None:
                        modules.append(nn.Dropout2d(dropout, inplace=False))
            
            if block_idx + 1  != len(channel_sizes) - 1:
                modules.append(nn.MaxPool2d(2,2))
        
        self.cnn_core = nn.Sequential(*modules)
        self.gap = nn.AdaptiveAvgPool2d(1)
        self.linear = nn.Linear(channel_sizes[-1], num_classes)
            
    def forward(self, x):
        x = self.cnn_core(x)
        x = self.gap(x)
        x = x.view(x.size(0), -1)
        x = self.linear(x)
        m = nn.Softmax(dim=1)
        return m(x)
    
    # Weight initializations
    def reg_init_weights(self, m):
      '''
          regular model implementation of weight initialization
      '''
      if (type(m) == nn.Conv3d or type(m) == nn.Linear):
          nn.init.kaiming_normal_(m.weight)
          m.bias.data.fill_(0.01)

  

# Hyper-parameters & Tunning parameters

In [None]:
# Define all the hyperparameters
model_name = "./VGG16_model_exp.tar"
vgg_type = "VGG16"
cnn_config = [1,64,128,256]
p_train = 0.95
step_size = 3 # every number of epochs decrease the learning rate 
gamma = 0.1 # decrease leanring rate by gamma /10 every number of epochs or so
lr = 1e-5
patience = 4
beta1 = 0.9
beta2 = 0.999
eps = 1e-5
batch_size = 128
epoch_start = 1
num_epochs = 60
momentum = 0.5
loss_key = "CEL"
optimizer_key = "Adam"
act_func = act_func_dict['LeakyRelu']
kernel_size = (3,3)
stride = (1,1)
first_kernel = 5
sec_kernel = 5
p_dropout = 0.25
num_layers = 1
use_batch_norm = True
small_train_size = 500
small_val_size = 100
training_set_length = 60000

# Loss Functions & Optimizers

Dictionary of Loss Functions

In [None]:
# function returning our desired loss function
def selectLoss(key):
  loss_func_dict = {
      "CEL": nn.CrossEntropyLoss(),
      "KLDL": nn.KLDivLoss(),
      "NLL": nn.NLLLoss(),
      "MSE": nn.MSELoss(),
  }
  return loss_func_dict.get(key,"Invalid loss function!") 
  

Dictionary of Optimizers

In [None]:
# function returns our desired optimizer
def selectOptimizer(key, model, lr, momentum):
  optimizers_dict = {
      "SGD": optim.SGD(model.parameters(), lr=lr, momentum=momentum),
      "Adam": optim.Adam(model.parameters(), lr=lr, betas=(beta1, beta2), eps=eps),
      "RMS": optim.RMSprop(model.parameters(), lr=lr, momentum=momentum),
      "AdaG": optim.Adagrad(model.parameters(), lr=lr),
      "AdaD": optim.Adadelta(model.parameters(), lr=lr)
  }
  return optimizers_dict.get(key,"Invalid optimizer!")


# Create Models

Simple CNN with Weights Initialization

In [None]:
Basic_CNN = BasicCNN(channel_sizes=cnn_config, layers = 1, batch_norm=True, dropout=0.15, num_classes=9)
Basic_CNN.apply(Basic_CNN.reg_init_weights)
print(Basic_CNN)       

VGG Net

In [None]:
vgg_CNN = Fashion_VGG_CNN(in_channels=1, num_classes=9, vgg_type="VGG11", act_func = act_func_dict['Relu'])
print(vgg_CNN)

Simple CNN

In [None]:
simple_CNN = Fashion_Simple_CNN(in_channel = 1, num_classes = 9, cnn_type = "CNN_3", kernel_size = (3,3), 
                           act_func = act_func_dict['Relu'], use_dropout_reg = True, use_batch_norm = True)
print(simple_CNN)

# CNN Pipeline


In [None]:
# create small validaiton and training indices
def createSmallTrainValIndicies(small_train_size, small_val_size, training_set_length):
  # Creating indices for our original training set 
  indices = np.linspace(0, training_set_length-1, num=training_set_length, dtype=int)
  # print(f"Original in order indices: \n{indices}")
  rand.shuffle(indices)
  # print(f"Shuffled indices: \n{indices}")
  half_index = ma.floor(len(indices)/2)
  small_train_indices = indices[0:small_train_size]
  small_val_indices = indices[half_index:half_index + small_val_size]
  return small_train_indices, small_val_indices

Mini-batches for Train & Validation

In [None]:
# create small train and val indices for slicing our data set
small_train_indices, small_val_indices = createSmallTrainValIndicies(small_train_size, small_val_size, training_set_length)
print(f"Train size: {len(small_train_indices)}, Validation size: {len(small_val_indices)}")

# Create small train and small val dataset
small_dataset = MyDataSet(training_set_pickle_path, training_labels_path, 
                         transform=ImageTransforms, idx=small_train_indices, target_transform=target_tranform)
small_dataset.targets = small_dataset.target_transform(small_dataset.targets)
small_valset = MyDataSet(training_set_pickle_path, training_labels_path, 
                         transform=ImageTransforms, idx=small_val_indices, target_transform=target_tranform)
small_valset.targets = small_valset.target_transform(small_valset.targets)

# Create small train and validation dataloaaders
small_train_loader = DataLoader(small_dataset, batch_size=batch_size, shuffle=True)
small_val_loader = DataLoader(small_valset, batch_size=batch_size, shuffle=True)

Full Training & Validation Sets

In [None]:
# Creating training and val indices so our data set class can chop them up appropriately 
training_indices, val_indices = createTrainValIndices(training_set_length, p_train=p_train)

# Create training dataset 
training_set = MyDataSet(training_set_pickle_path, training_labels_path, 
                         transform=ImageTransforms, idx=training_indices, target_transform=target_tranform)
training_set.targets = training_set.target_transform(training_set.targets).astype(int)
# print(training_set.targets[0:10,:])
print(f"My training set shape is: {training_set.data.shape}")
print(f"My training set labels shape is: {training_set.targets.shape}")

# Create validation dataset 
validation_set = MyDataSet(training_set_pickle_path, training_labels_path, 
                           transform=ImageTransforms, idx=val_indices, target_transform=target_tranform)
validation_set.targets = validation_set.target_transform(validation_set.targets)
validation_set.targets = validation_set.targets.astype(int)
# print(validation_set.targets[0:10,:])
print(f"My validation set shape is: {validation_set.data.shape}")
print(f"My validation set labels shape is: {validation_set.targets.shape}")

Create Training & Validation Loaders

In [None]:
# Create training and validation loaders
trainingSetDataLoader = DataLoader(training_set, batch_size=batch_size, shuffle=True, num_workers=4, pin_memory=True)
validationSetDataLoader = DataLoader(validation_set, batch_size=batch_size, shuffle=False)

Create different CNN Models

In [None]:
# Tutorial CNN
CNN_Tutorial = CNN_Tutorial(in_channel=1, num_classes=9, 
                            first_kernel=first_kernel, 
                            sec_kernel=sec_kernel, act_func=act_func,p_dropout=p_dropout)
CNN_Tutorial.apply(CNN_Tutorial.reg_init_weights)
# print(CNN_Tutorial)

In [None]:
# Basic CNN
cnn_basic = BasicCNN(channel_sizes=cnn_config, layers = 1, batch_norm=True, dropout=0, num_classes=9)
cnn_basic.apply(cnn_basic.reg_init_weights)   

In [None]:
simple_CNN = Fashion_Simple_CNN(in_channel = 1, num_classes = 9, cnn_type = "CNN_2", kernel_size = (3,3), 
                           act_func = act_func_dict['Relu'], use_dropout_reg = True, use_batch_norm = True)
simple_CNN.apply(simple_CNN.reg_init_weights)

In [None]:
VGG_CNN = Fashion_VGG_CNN(in_channels=1, num_classes=9, dropout=p_dropout, vgg_type=vgg_type, act_func = act_func)
# VGG_CNN.apply(VGG_CNN.reg_init_weights)
print(VGG_CNN)

Set-up Loss Function and Optimizers

In [None]:
# create a net
model = VGG_CNN
# print(model)

# moving our CNN model into GPU memory
if USE_CUDA:
  model = model.to(DEVICE)

# create loss function
loss_function = selectLoss(key=loss_key)

# create optimizer and step scheduler
optimizer = selectOptimizer(key=optimizer_key, model=model, lr=lr, momentum=momentum)
print(optimizer.state_dict)
plateau_lr_scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', patience=patience, verbose=True)

In [None]:
# define lists to hold our training and validation loss and iterations
train_losses = []
train_counter = []
val_losses = []
val_counter = []

In [None]:
globalTrainCounter = 0
globalValidationCounter = 0

# Using Full Training Set

In [None]:
# full training set
full_training_set = MyDataSet(training_set_pickle_path, training_labels_path, 
                         transform=ImageTransforms, idx=np.arange(60000), target_transform=target_tranform)
# doing target transforms
full_training_set.targets = full_training_set.target_transform(full_training_set.targets).astype(int)

# loader for full training set
full_training_loader = DataLoader(full_training_set, batch_size=batch_size, shuffle=True, num_workers=4, pin_memory=True)

# GridSearchCV with Skorch For CNN

In [None]:
# reshaping data 
ts = full_training_set.data.reshape(-1, 1, 64, 128).astype('float32')
ts_labels = full_training_set.targets.reshape((full_training_set.targets.shape[0],)).astype('int64')


vs = validation_set.data.reshape(-1, 1, 64, 128).astype('float32')
vs_labels = validation_set.targets.reshape((validation_set.targets.shape[0],)).astype('int64')


print(vs_labels)
print(f"Training set shape: {ts.shape}, Validation set shape: {vs.shape}")
print(f"Training labels shape: {ts_labels.shape}, Validation labels shape: {vs_labels.shape}")

In [None]:
# fixed random seed and cuda random seed 
torch.manual_seed(0)
torch.cuda.manual_seed_all(0)

# in_channels = 1, num_classes = 9, dropout=0.15, vgg_type = "VGG16", act_func = act_func_dict['Relu']

# wrapping my own cnn class in skorch cnn
cnn = NeuralNetClassifier(
    module = Fashion_VGG_CNN,
    module__in_channels = 1,
    module__num_classes = 9,
    module__dropout = p_dropout,
    module__vgg_type = vgg_type,
    module__act_func = act_func,
    max_epochs=80,
    lr=5e-5,
    optimizer=torch.optim.Adam,
    device=DEVICE,
    criterion=torch.nn.CrossEntropyLoss,
    batch_size=batch_size
)

In [None]:
# training with skorch
cnn.fit(ts, ts_labels);

In [None]:
cnn.save_params(
    f_params='model.pkl', f_optimizer='opt.pkl', f_history='history.json')

In [None]:
# full data set of my training
full_training_set = MyDataSet(training_set_pickle_path, training_labels_path, 
                         transform=ImageTransforms, idx=training_indices, target_transform=target_tranform)

full_training_set.targets = full_training_set.target_transform(full_training_set.targets)
gs_ts = full_training_set.data.reshape(-1, 1, 64, 128).astype('float32')
gs_tl = full_training_set.targets.reshape((full_training_set.targets.shape[0],)).astype('int64')
print(f"Training set shape: {gs_ts.shape}, Training Labels shape: {gs_tl.shape}")

In [None]:
# doing gridsearch with skortch 
params = {
    'lr': [1e-5, 5e-7],
    'max_epochs': [5, 10],
    'module__dropout': [0.25, 0.5],
}
gs = GridSearchCV(cnn, params, refit=False, cv=3, scoring='accuracy', verbose=True)
gs.fit(gs_ts, gs_tl)
print(gs.best_score_, gs.best_params_)

# Reload Model From Checkpoint

In [None]:
# if load is true meaning we are starting from where we left off after we stopped training
load = False
if load:
  if USE_CUDA == 0:
    # load checkpoint dictionary into CPU
    checkpoint = torch.load(str(model_name), map_location=torch.device('cpu'))
    epoch_start = checkpoint['epoch']
    model.load_state_dict(checkpoint['model_state_dict'])
    # optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
  else:
    # load checkpoint dictionary into GPU
    checkpoint = torch.load(model_name, map_location=DEVICE)
    epoch_start = checkpoint['epoch']
    model.load_state_dict(checkpoint['model_state_dict'])
    optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
  print(f"Reloaded from checkpoint successfully!")

# printing out the state dict of optimizer
print(optimizer.state_dict)

# CNN Training & Model Evaluation Functions

In [None]:
# Training function
def train_cnn(model, optimizer, loss_function, train_loader, 
              train_losses, train_counter, globalTrainCounter):
  # Defining the running accuracy of training
  correct = 0
  total = 0
  # our model is now in training phase 
  # ensure's our model will use batch norm layers and dropout layers for training
  model.train()
  for data in train_loader:
    batch_data, batch_labels = data
    # Initializing grad to 0 to ensure there is no mixing of graidents among batches
    optimizer.zero_grad()
    # move a batch of images and it's labels into GPU
    batch_data = batch_data.to(DEVICE)
    batch_labels = batch_labels.to(DEVICE)
    # Forward pass
    outputs = model(batch_data)
    # Adding to the running sum of accuracy
    _,pred = torch.max(outputs.data, 1)
    correct += (pred == batch_labels).sum()
    total += batch_data.size(0)
    # find loss
    current_loss = loss_function(outputs, batch_labels)
    train_losses.append(current_loss)
    train_counter.append(globalTrainCounter+1)
    globalTrainCounter += 1
    # Propagate error backwards
    current_loss.backward()
    # Optimize our model parameters and update scheduler
    optimizer.step()
  # Finding avg accuray and loss here at the end of an epoch
  avgTrainAccuracy = (float(correct)/float(total))*100
  avgTrainLoss = sum(train_losses)/(len(train_losses))
  return avgTrainAccuracy, avgTrainLoss

In [None]:
# function used to evaluate our validation accuracy on during training 
def evaluate_cnn(model, validation_loader, loss_function, 
                 val_losses, val_counter, globalValCounter):
  correct = 0
  total = 0
  model.eval()
  with torch.no_grad():
    for val_data, val_labels in validation_loader:
      val_data, val_labels = val_data.to(DEVICE), val_labels.to(DEVICE)
      val_outputs = model(val_data)
      current_loss = loss_function(val_outputs, val_labels)
      val_losses.append(current_loss)
      val_counter.append(globalValCounter+1)
      globalValCounter += 1
      _,predicted = torch.max(val_outputs.data, 1)
      total += val_labels.size(0)
      correct += (predicted == val_labels).sum()
  avgValAccuracy = (correct/total)*100
  avgValLoss = (sum(val_losses)/len(val_losses))
  return avgValAccuracy, avgValLoss

# Train Model

In [None]:
# training converges at around 6 epochs for the tutorial cnn
# training for a number of epochs
last_epoch_num = 0
currentAvgValAccuracy = 0
train_acc = []
train_loss = []
val_acc = []
val_loss = []
for epoch in range(epoch_start, num_epochs+1):
  start = time.time()
  # train cnn
  avgTrainAccuracy, avgTrainLoss = train_cnn(model=model, optimizer=optimizer, loss_function=loss_function, 
            train_loader=trainingSetDataLoader, train_losses=train_losses, 
            train_counter=train_counter, globalTrainCounter=globalTrainCounter)
  
  # evaluate cnn
  avgValAccuracy, avgValLoss = evaluate_cnn(model=model, validation_loader=validationSetDataLoader, 
               loss_function=loss_function, val_losses=val_losses, 
               val_counter=val_counter, globalValCounter=globalValidationCounter)

  # adding to the list
  val_acc.append(avgValAccuracy)
  val_loss.append(avgValLoss)
  train_acc.append(avgTrainAccuracy)
  train_loss.append(avgTrainLoss)

  # update plateau scheduler 
  plateau_lr_scheduler.step(avgTrainLoss)

  # print epoch info
  print(f"Train Epoch: {epoch}, Avg Training Loss: {avgTrainLoss}, Avg Training Accuracy: {avgTrainAccuracy}%")
  print(f"Train Epoch: {epoch}, Avg Validation Loss: {avgValLoss}, Avg Validation Accuracy: {avgValAccuracy}%\n")

  save model when validation accuracy improves
  if (currentAvgValAccuracy < avgValAccuracy):
    currentAvgValAccuracy = avgValAccuracy
    torch.save({
            'epoch': epoch,
            'model_state_dict': model.state_dict(),
            'optimizer_state_dict': optimizer.state_dict()
            }, str(model_name), _use_new_zipfile_serialization=False)
    print("\nSaving my model!")

  end = time.time()
  print(f"Run time per epoch: {end - start} (s) = {(end - start)/60} (mins)\n")
  last_epoch_num = epoch

In [None]:
# Saving results for graphing later
results = {
    'val_acc': val_acc,
    'train_acc': train_acc,
    'val_loss': val_loss,
    'train_loss': train_loss
}
# Saving results for graphing later
with open("./results_vgg16_batch128.pickle", 'wb') as f:
  pickle.dump(results, f, protocol=pickle.HIGHEST_PROTOCOL)

# # To load the same results
# with open("./results_vgg13_leakyRelu.pickle", 'rb') as f:
#   results = pickle.load(f)


# Save Model

In [None]:
# saving our models fianlly at the very end after training 
torch.save({
            'epoch': last_epoch_num,
            'model_state_dict': model.state_dict(),
            'optimizer_state_dict': optimizer.state_dict(),
            }, str(model_name), _use_new_zipfile_serialization=False)
print("\nSaving my model!\n")

# Final CNN Training & Prediction on Test Set

In [None]:
#TODO: need to finish the predict function
# to predict on test set 
def predictOnTestSet(testSetLoader, model,path ="./output.csv"):
  predictedLabels = []
  model.eval()
  with torch.no_grad():
    for test_batch in testSetLoader:
      test_batch = test_batch.to(DEVICE)
      outputs = model(test_batch)
      _,predicted = torch.max(outputs.data, 1)
      for x in predicted:
        predictedLabels.append(x.item()+5)
  dfPredicted = pd.DataFrame(predictedLabels, columns=['class'])
  print(f"Length of predicted list is {len(predictedLabels)}" )
  print(path)
  dfPredicted.to_csv(path)
  

In [None]:
testset = MyTestSet(test_set_pickle_path,transform=ImageTransforms) 
# print(testset.data.shape)
# print(testset.data[0:2])
testsetLoader = DataLoader(testset, batch_size=batch_size, shuffle=False)
imgs = (next(iter(testsetLoader)))
# print(imgs.shape)
# print(imgs[0:2])
predictOnTestSet(testsetLoader, model)
