# Phase 1 of project: Data Wrangling

- We will be using the mnist dataset for this project.
- The first iteration will use pytorch

We can start by loading the dataset, pre-split and processed by keras.datsets.mnist

In [None]:
# load the dataset

from keras.datasets import mnist

(train_X, train_y), (test_X, test_y) = mnist.load_data()

print('Train: X=%s, y=%s' % (train_X.shape, train_y.shape))
print('Test: X=%s, y=%s' % (test_X.shape, test_y.shape))

For ease of reading, we also generate an image to display the data.

In [None]:
# plot the dataset

import matplotlib.pyplot as plt

for i in range(9):
    plt.subplot(330 + 1 + i)
    plt.imshow(train_X[i], cmap=plt.get_cmap('gray'))
plt.show()


The pooling layer pads the inputs of the images, and takes the max or mean value of the pixels in the window. This is done to reduce the size of the image, and to reduce the number of parameters in the network.

In [None]:
import numpy as np
from itertools import product
# pooling the dataset
# mode = max, mean
def pooling_layer2D(X, spatial_extent=2, stride=2, mode='max'):
    X = np.asarray(X)
    h, w = X.shape
    h_out = int((h - spatial_extent) / stride) + 1
    w_out = int((w - spatial_extent) / stride) + 1
    #get the max/mean value in the spatial extent stride by stride
    Y = np.zeros((h_out, w_out))
    for i, j in product(range(h_out), range(w_out)):
        if mode == 'max':
            Y[i, j] = np.max(X[i * stride:i * stride + spatial_extent, j * stride:j * stride + spatial_extent])
        elif mode == 'mean':
            Y[i, j] = np.mean(X[i * stride:i * stride + spatial_extent, j * stride:j * stride + spatial_extent])
    return Y

Filtering is also done with a sliding window, similarly to pooling, except each output pixel is the result of a dot product between the filter and the window of pixels for each stride position in the input.

In [None]:
import numpy as np
from itertools import product
# conv the dataset
#num_filters = powers of 2
#rand = [0,1)
def filter_gen(spatial_extent, num_filters):
    return np.random.rand(num_filters, spatial_extent, spatial_extent) -0.5

def kfilter_scan(X, num_filters, spatial_extent, stride, zero_padding):
    X = np.asarray(X)
    h, w = X.shape
    #add zero padding
    if zero_padding:
        X = np.pad(X, spatial_extent // 2)
    w_out = int((w - spatial_extent + (2*zero_padding)) / stride) + 1
    h_out = int((h - spatial_extent + (2*zero_padding)) / stride) + 1

    filters = filter_gen(spatial_extent, num_filters)
    #get the max/mean value in the spatial extent stride by stride
    Y = np.zeros((h_out, w_out))
    #dot product of the filter and the image at each stride
    for i, j in product(range(h_out), range(w_out)):
        temp = X[i * stride:i * stride + spatial_extent, j * stride:j * stride + spatial_extent]
        Y[i,j] = np.dot(temp.flatten(), filters[0].flatten())      
    return Y

Add a relu activation layer, to remove any negative values from the output of the convolutional layer.

In [None]:
def relu(x):
    return np.maximum(0, x)

def relu_layer(image):
    h, w = image.shape
    new_image = np.zeros((h, w))
    for  i, j in product(range(h), range(w)):
        new_image[i,j] = relu(image[i,j])
    return new_image

Plot the data, as it stands, after running it through pooling and convolutional layers.

In [None]:
import matplotlib.pyplot as plt

for i in range(9):
    plt.subplot(330 + 1 + i)
    plt.imshow(train_X[i], cmap=plt.get_cmap('gray'))
plt.show()

for i in range(9):
    poollayer = pooling_layer2D(train_X[i], spatial_extent=2, stride=2, mode='max')
    filtered = kfilter_scan(poollayer, num_filters=2, spatial_extent=4, stride=2, zero_padding=1)
    relued = relu_layer(filtered)
    plt.subplot(330 + 1 + i)
    plt.imshow(relued, cmap=plt.get_cmap('gray'))
plt.show()

Now, we will add a fully connected layer, to reduce the number of parameters in the network.

In [None]:
def Dense(input, weights, bias):
    return np.dot(input, weights) + bias

def softmax(x):
    e_x = np.exp(x - np.max(x))
    return e_x / e_x.sum()

def softmax_layer(image):
    h, w = image.shape
    new_image = np.zeros((h, w))
    for  i, j in product(range(h), range(w)):
        new_image[i,j] = softmax(image[i,j])
    return new_image

def cnn(X, weights, bias):
    poollayer = pooling_layer2D(X, spatial_extent=2, stride=2, mode='max')
    filtered = kfilter_scan(poollayer, num_filters=2, spatial_extent=4, stride=2, zero_padding=1)
    relued = relu_layer(filtered)
    dense = Dense(relued, weights, bias)
    return softmax_layer(dense)

def cnn_loss(X, y, weights, bias):
    pred = cnn(X, weights, bias)
    return -np.log(pred[y])

def cnn_loss_grad(X, y, weights, bias):
    pred = cnn(X, weights, bias)
    pred[y] -= 1
    return pred


now, we will add a dropout layer, to prevent overfitting.