# 4. Design own Experiment

## A. Develop a plan

Come up with a plan for what you want to explore and the metrics you will use. Determine the range of options in each dimension to explore (e.g. L options in dimension 1, M options in dimension 2, and N options in dimension 3). You don't have to evaluate all L * M * N options unless you want to. Instead, think about using a linear search strategy where you hold two parameters constant and optimize the third, then switch things up, optimizing one parameter at a time in a round-robin or randomized fashion. Overall, plan to evaluate 50-100 network variations (again, automate this process).


ROUGH PLAN:

dimension 1 = number of convolution filters channels. L = 4

dimension 2 = number of epochs of training. M

dimension 3 = number of batch size. N = 4 (multiple of 32)

- Conv. filter channels is chosen since conv is the workhorse of the NN. 
Increasing the number if filter means increaseing the number of features that will be learned

- Number of epochs was proven to increase the accuracy as can be seen in task 1. Need to make sure its not overfitting

- The higher the number of batch size means CPU capacity will be forced to work fully means better optimization


In [1]:
# torch
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import DataLoader
from torch.utils.data.dataloader import default_collate # to reshape

import torchvision
from torchvision import datasets, transforms

# import previous notebook
import nbimporter
import Task1AE as Note1AE
import Task1FG as Note1FG

# for visualizationg
from matplotlib import pyplot as plt
import numpy as np

# to order dict
import collections

# for tuning and reshape for GridSearch
from skorch.dataset import Dataset
from skorch import NeuralNetClassifier
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import RandomizedSearchCV

In [2]:
def get_train_test_MNIST_data():
    """
    Get MNIST dataset as X and target numpy array as y.
    Return both the train and test data. 
    """
    train_loader = Note1AE.get_loader(is_train = True)

    transform=torchvision.transforms.Compose(
                    [torchvision.transforms.ToTensor(),
                     torchvision.transforms.Normalize(
                        # normalize with mean and std
                        (0.1307,), (0.3801,)
                    )
                    ])
    #dataset
    X_train = torchvision.datasets.MNIST(
                'mnist',
                train=True,
                download=True,
                transform=transform)

    y_train = np.array([y for x, y in iter(X_train)])

    X_test= torchvision.datasets.MNIST(
                'mnist',
                train=False,
                download=True,
                transform=transform)
    y_test = np.array([y for x, y in iter(X_test)])
    
    return X_train, y_train, X_test, y_test

In [3]:
class NeuralNetworkDesign(nn.Module):
    """
    Another neural network for MNIST that takes parameters for 
    the number of channels
    """
    
    def __init__(self, conv1_out_channels):
        # call the parent constructor
        super(NeuralNetworkDesign, self).__init__()
        print("conv1_out_channels:",conv1_out_channels)
        
        # 1. CNN
        # input_pixel = 28 
        # out_channels = [10,..
        # output_pixel = (input_pixel) - 4 /2
        # final output = 10 X 12 X 12
        self.conv1 = nn.Conv2d(in_channels=1, 
                               out_channels=conv1_out_channels, 
                               kernel_size=5) 
        
       
        
        # input_pixel = 12
        # out_channels = [20,..
        # output_pixel = (input_pixel) - 4 /2
        # final output = 20 X 4 X 4 = 320
        self.conv2 = nn.Conv2d(in_channels=conv1_out_channels, 
                               out_channels=conv1_out_channels*2, 
                               kernel_size=5)
        self.conv2_drop = nn.Dropout2d() # default is 0.5 or half
        
        # 2. ANN
        self.in_features = conv1_out_channels * 2 * 4 * 4
        
        self.fc1 = nn.Linear(in_features=self.in_features, out_features=50)
        self.fc2 = nn.Linear(50, 10)
        self.flatten = nn.Flatten()
     

    def forward(self, x):
        # 1. first conv, max pool, relu
        x = F.relu(F.max_pool2d(self.conv1(x), 2))
        
        # 2. 2nd conv, droptout layer, max pool, relu
        x = F.relu(F.max_pool2d(self.conv2_drop(self.conv2(x)), 2))
        
        # 3. reshape tensor . Question: why to -1, 320. this is same as flatten
        x = x.view(-1,self.in_features)
        
        # 4. fully connected, relu
        x = F.relu(self.fc1(x))
        # x = F.dropout(x, training=self.training)
        x = self.fc2(x)
        
        # 5. convert the output from a Linear layer
        # into a categorical probability distribution
        return F.log_softmax(x, -1)
    

In [4]:
# >>>>>> For randomoized search
class SliceDatasetX(Dataset):
    """Helper class that wraps a torch dataset to make it work with sklearn"""
    def __init__(self, dataset, collate_fn=default_collate):
        self.dataset = dataset
        self.collate_fn = collate_fn

        self._indices = list(range(len(self.dataset)))
        
    def __len__(self):
        return len(self.dataset)
        
    @property
    def shape(self):
        return len(self),
    
    def __getitem__(self, i):
        if isinstance(i, (int, np.integer)):
            Xb = self.transform(*self.dataset[i])[0]
            return Xb
        
        if isinstance(i, slice):
            i = self._indices[i]

        Xb = self.collate_fn([self.transform(*self.dataset[j])[0] for j in i])
        return Xb
    

In [5]:
# Question: this doesnt work. state keep changing
torch.manual_seed(42)
DEVICE= 'cuda' if torch.cuda.is_available() else 'cpu'

# 1. get data
X_train, y_train, X_test, y_test = get_train_test_MNIST_data()

# 2. Parameters to iterate
CONV_CHANNELS = [5, 10, 15, 20] # number of channels for 1st conv. layers
BS = [64, 128, 160, 224] # batch sizes
EPOCHS = [5, 8, 10, 12]

# 3. init Skorch NN to plug in randomized search
net = NeuralNetClassifier(
    
    # Question: Do I fill these in if Im doing randomized?
    NeuralNetworkDesign(conv1_out_channels = 10),
    batch_size=64,
    max_epochs=5,
    
    # Question: how come i dont get loss and accuracy with this:
    # optimizer=optim.SGD,
    # criterion=nn.NLLLoss,
    optimizer=optim.Adam,
    criterion=nn.CrossEntropyLoss,
    iterator_train__num_workers=4,
    device=DEVICE
)

net.fit(X_train, y_train)

conv1_out_channels: 10
  epoch    train_loss    valid_acc    valid_loss     dur
-------  ------------  -----------  ------------  ------
      1        [36m0.4560[0m       [32m0.9450[0m        [35m0.1881[0m  5.7295
      2        [36m0.2595[0m       [32m0.9632[0m        [35m0.1292[0m  5.8480
      3        [36m0.2377[0m       0.9588        0.1471  5.9193
      4        [36m0.2131[0m       0.9578        0.1612  5.7783
      5        0.2174       0.9613        0.1368  5.8164


<class 'skorch.classifier.NeuralNetClassifier'>[initialized](
  module_=NeuralNetworkDesign(
    (conv1): Conv2d(1, 10, kernel_size=(5, 5), stride=(1, 1))
    (conv2): Conv2d(10, 20, kernel_size=(5, 5), stride=(1, 1))
    (conv2_drop): Dropout2d(p=0.5, inplace=False)
    (fc1): Linear(in_features=320, out_features=50, bias=True)
    (fc2): Linear(in_features=50, out_features=10, bias=True)
    (flatten): Flatten(start_dim=1, end_dim=-1)
  ),
)

## B. Predict the results

Before starting your evaluation, come up with a hypothesis for how you expect the network to behave along each dimension. Include these hypotheses in your report and then discuss whether the evaluation supported the hypothesis.


## TODO: 
come up with a hypothesis for how you expect the network to behave along each dimension.
discuss whether the evaluation supported the hypothesis.

In [6]:
# Question: This method returns the mean accuracy on the given data and labels for classifiers
# what score is this ? correct / total?
net.score(X_test, y_test)

0.9682

## C. Execute your plan

Run the evaluation and report on the results.


In [None]:
# 1. convert data to numpy
X_slicable =  SliceDatasetX(X_train)

# 2. set params
params = {
    'module__conv1_out_channels': CONV_CHANNELS,
    'batch_size': BS,
    'max_epochs': EPOCHS,
}


# 3. Create randomized search object
rs = RandomizedSearchCV(
                  net,
                  params,
                  refit=False,
                  cv=3,
                  scoring='accuracy',
                  verbose=2,
                  n_iter=50,
                  random_state=42)


# 4. run and evaluate
rs.fit(X_slicable, y_train)
print("best score: {:.3f}, best params: {}".format(rs.best_score_, rs.best_params_))



Fitting 3 folds for each of 50 candidates, totalling 150 fits
conv1_out_channels: 5
  epoch    train_loss    valid_acc    valid_loss     dur
-------  ------------  -----------  ------------  ------
      1        [36m0.5642[0m       [32m0.9530[0m        [35m0.1488[0m  3.2497
      2        [36m0.2399[0m       [32m0.9611[0m        [35m0.1241[0m  3.1050
      3        [36m0.2020[0m       [32m0.9641[0m        [35m0.1082[0m  3.0775
      4        [36m0.1850[0m       [32m0.9705[0m        [35m0.1000[0m  3.1893
      5        [36m0.1680[0m       [32m0.9720[0m        [35m0.0831[0m  3.2006
      6        [36m0.1605[0m       [32m0.9726[0m        0.0872  3.2224
      7        [36m0.1501[0m       [32m0.9741[0m        [35m0.0823[0m  3.2030
      8        0.1520       [32m0.9748[0m        [35m0.0822[0m  3.0022
[CV] END batch_size=224, max_epochs=8, module__conv1_out_channels=5; total time=  27.8s
conv1_out_channels: 5
  epoch    train_loss    valid_acc    

In [None]:
rs.cv_results_

In [None]:
# Get top 5 model from search grid
def get_top5_model(grid, isTop):
    all_rank = grid.cv_results_["rank_test_score"]
    all_params = grid.cv_results_["params"]
    all_score = grid.cv_results_['mean_test_score']
    all_dur = grid.cv_results_['mean_fit_time']
    rank_5 =[]
    
    if(isTop):
        rank_5 = np.arange(1, 6, 1).tolist() #[1,2,3,4,5]
    else:
        rank_5 = np.arange(len(all_rank)-4, len(all_rank) + 1, 1).tolist()  #[46,47,48,49,50]
    
    # 1. Get the index of rank 1-5 or 46-50
    idx_5 = {}
    for i in range(len(all_rank)):
         for rank_num in rank_5:
             if(all_rank[i] == rank_num):
                 idx_5[rank_num] = i
    
    # 2. Print
    if(isTop):
        title = "\nTop"
    else:
        title = "\nBottom"
    print(title + " 5 model:")
    
    # order the winning/losing index by rank
    idx_5 = collections.OrderedDict(sorted(idx_5.items()))
    for i in rank_5:
        id = idx_5[i]
        print(i, ".", all_params[id],
              "\tscore:", all_score[id],
              "\tdur(s):", all_dur[id])



In [None]:
get_top5_model(rs, isTop=True)
get_top5_model(rs, isTop=False)

In [None]:
def get_top5fastest_model(grid, isFast):
    fit_times = grid.cv_results_['mean_fit_time']
    most_ids = {}
    for i in range(6):
        if(isFast):
            most_id = np.argmin(fit_times)

        else:
            most_id = np.argmax(fit_times)
            
        most_ids[fit_times[most_id]] = most_id
        fit_times = np.delete(fit_times, most_id)
        
    most_ids= collections.OrderedDict(sorted(most_ids.items()))
    if(not isFast):
        most_ids= collections.OrderedDict(reversed(sorted(most_ids.items())))
    models = grid.cv_results_['params']
    print(most_ids)
    
    
    for dur in most_ids:
        most_id = most_ids[dur]
        
        print("model:", models[most_id], "\tduration(s): ",dur)


In [None]:
print("\nTop 5 fastest:")
get_top5fastest_model(rs, True)

print("\nTop 5 slowest:")
get_top5fastest_model(rs, False)