In [1]:
import numpy as np

In [25]:
class Conv3x3:
    #A Convolution layer using 3x3 filters
    
    def __init__(self, num_filters):
        self.num_filters = num_filters
        
        # filters is a 3d array with dimensions (num_filters, 3, 3)
        # We divide by 9 to reduce the variance of our initial values
        self.filters = np.random.randn(num_filters, 3, 3) / 9
        
        #Note: that diving by 9 during the initialization is more important than you may think.
        #if the initial values are too large or too small, training the network will be ineffective.
        
    def iterate_regions(self, image):
        '''
                Generates all possible 3x3 image regions using valid padding.
                - image is a 2d numpy array
        '''
        h, w = image.shape
            
        for i in range(h - 2):
            for j in range(w - 2):
                im_region = image[i:(i+3), j:(j + 3)]
                yield im_region, i, j
        
        
    def forward(self, input):
        '''
                Performs a forward pass of the conv layer using the given input.
                Returns a 3d numpy array with dimensions (h, w, num_filters)
                - input is a 2d numpy array
        '''
            
        h, w = input.shape
        output = np.zeros((h - 2, w - 2, self.num_filters))

        for im_region, i, j in self.iterate_regions(input):
                output[i, j] = np.sum(im_region * self.filters, axis=(1, 2))

        return output
        
        '''
        We have im_region, a 3x3 array containing the relevant image region.
        we have self.filters, 3d array
        we do im_region * self.filters, which uses numpy's broadcasting 
        feature to element-wise multiply the two array the result is a 
        3d array with the same dimension as self.filters
        
        we use np.sum() the result of the previous step using axis=(1, 2), which
        produces a 1d array of lenth num_filters where each element contains the 
        convolution result for the corresponding filter
        
        we assign the result to output[i, j], which contains convolution result
        for pixel (i, j) in the outpur
        '''

In [30]:
''' 
    The squence above is performed for each pixel in the input until we obtain 
    our final output volume.
'''
def loadMNIST( prefix, folder ):
    intType = np.dtype( 'int32' ).newbyteorder( '>' )
    nMetaDataBytes = 4 * intType.itemsize

    data = np.fromfile( folder + "/" + prefix + '-images-idx3-ubyte', dtype = 'ubyte' )
    magicBytes, nImages, width, height = np.frombuffer( data[:nMetaDataBytes].tobytes(), intType )
    data = data[nMetaDataBytes:].astype( dtype = 'float32' ).reshape( [ nImages, width, height ] )

    labels = np.fromfile( folder + "/" + prefix + '-labels-idx1-ubyte',
                          dtype = 'ubyte' )[2 * intType.itemsize:]

    return data, labels

trainingImages, trainingLabels = loadMNIST( "train", "./dataset/MNIST/" )
testImages, testLabels = loadMNIST( "t10k", "./dataset/MNIST/" )

In [33]:
conv = Conv3x3(8)
output = conv.forward(trainingImages[0])

In [34]:
print(output.shape)

(26, 26, 8)


In [35]:
class MaxPool2:
    #A Max Pooling layer using a pool size of 2.
    
    def iterate_regions(self, image):
        '''
        Generates non-overlapping 2x2 image regions to pool over.
        - image is 2d numpy array
        '''
        h, w, _ = image.shape
        new_h = h // 2
        new_w = w // 2
        
        
        for i in range(new_h):
            for j in range(new_w):
                im_region = image[(i * 2): (i * 2 + 2), (j * 2):(j * 2 + 2)]
                yield im_region, i, j
                
    def forward(self, input):
        '''
            Performs  a forward pass of the maxpool layer using the given input.
            Returns a 3d numpy array with dimensions (h / 2, w / 2, num_filters).
            - input is a 3d numpy array with dimensions (h, w, num_filters)
        '''
        
        h, w, num_filters = input.shape
        output = np.zeros((h // 2, w // 2, num_filters))
        
        for im_region, i, j in self.iterate_regions(input):
            output[i, j] = np.amax(im_region, axis=(0, 1))
        
        
        return output
    

In [36]:
conv = Conv3x3(8)
pool = MaxPool2()

output = conv.forward(trainingImages[0])
output = pool.forward(output)
print(output.shape)

(13, 13, 8)


In [40]:
'''
    To Complete our CNN, we need to give it the ability to actually make predictions.
    We will do that by using the standard final layer for a mulitclass classification problem.
    the Softmax layer, a fully-connected (dense) layer that uses the Softmax function as its activation.
    fully-connected layers have every node connected to every output from the previous layer.
    We Will use a softmax layer with 10 nodes, one representing each digit, as the final layer in our CNN.
    each node in the layer will be connected to every input.
    After the softmax transformation is applied, the digit represented 
    by the node with the highest probability will be the output of the CNN
    
    
    What softmax really does is help us quantify how sure we are of our prediction,
    which is useful when training and evaluating our CNN.
'''
#Implementing Softmax

class Softmax:
    # A standard fully-connected layer with softmax activation
    
    
    def __init__(self, input_len, nodes):
        # We divide by input_len to reduce the variance of our initial value
        self.weights = np.random.randn(input_len, nodes) / input_len
        self.biases = np.zeros(nodes)
        
    def forward(self, input):
        '''
        Performs a forward pass of the softmax layer using the given input.
        Returns a 1d numpy array containing the respective probability values.
        - input can be any array with any dimensions.
        '''
        input = input.flatten() # we flatten() the input to make it easier to work with
        
        input_len, nodes = self.weights.shape
        # np.dot() multiplies input and self.weights element-wise and then sums the result
        totals = np.dot(input, self.weights) + self.biases
        # np.exp() calculates the exponentials used for softmax
        exp = np.exp(totals)
        return exp / np.sum(exp, axis=0)

In [41]:

conv = Conv3x3(8) # 28x28x1 -> 26x26x8
pool = MaxPool2() # 26x26x8 -> 13x13x8
softmax = Softmax(13 * 13 * 8, 10) # 13x13x8 -> 10

def forward(image, label):
    '''
    Completes a forward pass of the CNN and calculates the accuracy and
    cross-entropy loss.
    - image is a 2d numpy array
    - label is a digit
    '''
    # We transform the image from [0, 255] to [-0.5, 0.5] to make it easier
    # To work with. This is standard practice
    out = conv.forward((image / 255) - 0.5)
    out = pool.forward(out)
    out = softmax.forward(out)
    
    # Calculate cross-entropy loss and accuracy. np.log() is the natural log.
    loss = -np.log(out[label])
    acc = 1 if np.argmax(out) == label else 0
    
    return out, loss, acc

print('MNIST CNN initialized')

loss = 0
num_correct = 0
for i, (im, label) in enumerate(zip(testImages[:1000], testLabels[:1000])):
    # Do a forward pass.
    _, l, acc = forward(im, label)
    loss += 1
    num_correct += acc
    
    # Print stats every 100 steps.
    if i % 100 == 99:
        print(
          '[Step %d] Past 100 steps: Average Loss %.3f | Accuracy: %d%%' %
          (i + 1, loss / 100, num_correct)
        )
        loss = 0
        num_correct = 0

MNIST CNN initialized
[Step 100] Past 100 steps: Average Loss 1.000 | Accuracy: 13%
[Step 200] Past 100 steps: Average Loss 1.000 | Accuracy: 10%
[Step 300] Past 100 steps: Average Loss 1.000 | Accuracy: 2%
[Step 400] Past 100 steps: Average Loss 1.000 | Accuracy: 10%
[Step 500] Past 100 steps: Average Loss 1.000 | Accuracy: 7%
[Step 600] Past 100 steps: Average Loss 1.000 | Accuracy: 10%
[Step 700] Past 100 steps: Average Loss 1.000 | Accuracy: 5%
[Step 800] Past 100 steps: Average Loss 1.000 | Accuracy: 5%
[Step 900] Past 100 steps: Average Loss 1.000 | Accuracy: 5%
[Step 1000] Past 100 steps: Average Loss 1.000 | Accuracy: 8%
