In [13]:
import torch.nn as nn
from torchvision import models

class CNN(nn.Module):
    def __init__(self, size, num_classes, dropout_rate, drop_layer_size):
        super(CNN, self).__init__()

        ### convolutional feature extraction
        self.extract = nn.Sequential(
            ### first later
            nn.Conv2d (in_channels = size, out_channels = 32, kernel_size = 5, stride = 1, padding = 2),
            ### first layer non-linearity
            nn.ReLU(inplace = True),
            ### pool layer
            nn.MaxPool2d(2),
            nn.Dropout(dropout_rate),
            ### second layer
            nn.Conv2d(in_channels = 32, out_channels = 64, kernel_size = 5, stride = 1, padding = 2),
            ### nonlinearity
            nn.ReLU(inplace = True),
            nn.MaxPool2d(2),
            nn.Dropout(dropout_rate)
        )
        ### MLP classifier
        self.decimate = nn.Sequential( 
            nn.Linear(64 * 7 * 7, drop_layer_size), # default of 128 drop_layer_size
            nn.ReLU(inplace = True),
            nn.Dropout(dropout_rate),
            nn.Linear(drop_layer_size, num_classes)
        )

    ### forward pass
    def forward(self, x):
            ### convolutional laters
            features = self.extract(x)
            ### turn matrix into 1D array
            features = features.view(features.size()[0], -1)
            ### run MLP
            myresult = self.decimate(features)
            return myresult

        

In [14]:
import os
import torch
import numpy as np
from torchvision import datasets
import torchvision

# nice built in functions for common data sets 
#  go read https://pytorch.org/docs/stable/torchvision/datasets.html
train = datasets.MNIST( root = './', # where to download data set to
                       train = True, # If True, creates dataset from training.pt, otherwise from test.pt
                       transform=torchvision.transforms.Compose([
                               torchvision.transforms.ToTensor()]), # convert data into tensor vs PIL image 
                       download = True)

### Data Loader     

In [15]:
# Run it!
def runModel(dropout_rate, batch_size, drop_layer_size, learning_rate):
    import torch.utils.data as tech
    # batch_size = 16
    train_ld = tech.DataLoader(dataset=train, shuffle=True, batch_size = batch_size)

    input_size = 1
    num_classes = 10
    model = CNN(input_size, num_classes, dropout_rate, drop_layer_size)

    optimizer = torch.optim.Adam(model.parameters(), lr = learning_rate)
    costfx = torch.nn.CrossEntropyLoss()

    from tqdm import tqdm_notebook as tqdm

    num_epochs = 1
    for epoch in range(num_epochs):
        epoch_loss = []

        for batch_id, train_params in enumerate(train_ld):
            samples, labels = train_params

            samples = samples.type('torch.FloatTensor')
            labels = labels.type('torch.LongTensor')

            # forward pass
            prediction = model(samples)
            # find error
            loss = costfx(prediction, labels)
            # record loss
            epoch_loss.append(loss.item())
            # zero gradients
            optimizer.zero_grad()
            # back prop
            loss.backward()
            # update everything
            optimizer.step()
        
        epoch_loss = sum(epoch_loss)/len(epoch_loss)

    torch.save(model, './my_mnist_model.pt')
    model = model.eval()

    test = datasets.MNIST( root = './', train = False, transform = torchvision.transforms.Compose([torchvision.transforms.ToTensor()]), download = True)
    test2 = tech.DataLoader(dataset = test, shuffle = False, batch_size = 1)

    # ConfusionMatrix = torch.zeros((10,10))
    correct = 0

    for sample, label in test2:
        label = int(label.numpy().item())
        sample = sample.type('torch.FloatTensor')
        # forward pass
        prediction = model(sample)
        # make a classification decision
        prediction = int(torch.argmax(prediction).numpy())
        # ConfusionMatrix[label,prediction] = ConfusionMatrix[label,prediction] + 1
        if label == prediction:
            correct += 1

    # df_cm = pd.DataFrame(np.asarray(ConfusionMatrix), index = [i for i in "0123456789"],
    #                     columns = [i for i in "0123456789"])
    # plt.figure(figsize = (10,7))
    # sn.heatmap(df_cm, annot=True)
    # plt.show()

    rate = correct / len(test2.dataset)
    return rate


In [16]:
# # experiment 1: dropout rates
# for i in range(10):
#     rate = 0
#     for j in range(5):
#         rate += runModel(i/10, 16, 128, 0.0001)
#     rate = rate / 5
#     print("Dropout rate of ", i/10, " with a classification rate of ", rate)

# # experiment 2: batch size
# i = 4
# while i < 513:
#     rate = 0
#     for j in range(5):
#         rate += runModel(0.1, i, 128, 0.0001)
#     rate = rate / 5
#     print("Batch size of ", i, " with a classification rate of ", rate)
#     i *= 2

# # experiment 3: drop layer size
# i = 4
# while i < 512:
#     rate = 0
#     for j in range(5):
#         rate += runModel(0.1, 16, i, 0.0001)
#     rate = rate / 5
#     print("Drop layer size of ", i, " with a classification rate of ", rate)
#     i *= 2

# # experiment 4: learning rate
# i = 0.000001
# while i < 1.1:
#     rate = 0
#     for j in range(5):
#         rate += runModel(0.1, 16, 128, i)
#     rate = rate / 5
#     print("Learning rate of ", i, " with a classification rate of ", rate)
#     i *= 10

    rate = 0
    rate += runModel(0, 4, 256, 0.001)
    print("classification rate of ", rate)




KeyboardInterrupt: 