In [1]:
import math
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [2]:
def one_hot_encode(labels):
    min_l, max_l = min(labels), max(labels)
    one_hot_map = {k: [0 if i < k else 1 if i == k else 0 \
                              for i in range(min_l, max_l+1)] \
                      for k in range(min_l, max_l+1)}
    
    return np.array([one_hot_map[label] for label in labels]), len(set([l for l in labels]))

def init_weights(data_shape, 
                 n_channels, 
                 filter_size_conv1,
                 stride_conv1,
                 pad_conv1,
                 n_filters_conv1,
                 filter_size_pool1,
                 stride_pool1,
                 filter_size_conv2,
                 stride_conv2,
                 pad_conv2,
                 n_filters_conv2,
                 filter_size_pool2,
                 stride_pool2,
                 n_fc_neurons_1, 
                 n_fc_neurons_2):
    
    params = {
        'conv1': {
            'weights': (),  # (W, b)
            'h_params': (stride_conv1, pad_conv1),
            'activations': ()
        },
        'pool1': {
            'h_params': (stride_pool1, filter_size_pool1),
            'activations': ()
        },          
        'conv2': {
            'weights': (),  # (W, b)
            'h_params': (stride_conv2, pad_conv2),
            'activations': ()
        },
        'pool2': {
            'h_params': (stride_pool2, filter_size_pool2),     
            'activations': ()
        },
        'fc1': {
            'weights': (),  # (W, b)
            'activations': ()
        },
        'fc2': {
            'weights': (),  # (W, b)
            'activations': ()
        },
    }
    
    # Weights for the first convolutional layer
    np.random.seed(42)
    W = np.random.randn(filter_size_conv1, filter_size_conv1, n_channels, n_filters_conv1)
    np.random.seed(42)
    b = np.random.randn(1, 1, 1, n_filters_conv1)
    params['conv1']['weights'] = (W, b)

    # Weights for the second convolutional layer
    np.random.seed(42)
    W = np.random.randn(filter_size_conv2, filter_size_conv2, n_filters_conv1, n_filters_conv2)
    np.random.seed(42)
    b = np.random.randn(1, 1, 1, n_filters_conv2)
    params['conv2']['weights'] = (W, b)    
    
    # Weights for the first fully connected layer
    output_1_shape = int((data_shape[1] + 2 * pad_conv1 - filter_size_conv1)/stride_conv1 + 1)
    output_2_shape = int((output_1_shape - filter_size_pool1)/stride_pool1 + 1)
    output_3_shape = int((output_2_shape + 2 * pad_conv2 - filter_size_conv2)/stride_conv2 + 1)
    output_4_shape = int((output_3_shape - filter_size_pool2)/stride_pool2 + 1)
    flattened_dim = output_4_shape**2 * n_filters_conv2
    
    np.random.seed(42)
    W = np.random.randn(flattened_dim, n_fc_neurons_1)
    np.random.seed(42)
    b = np.random.randn(1, n_fc_neurons_1)
    params['fc1']['weights'] = (W, b)
    
    # Weights for the second fully connected layer    
    np.random.seed(42)
    W = np.random.randn(n_fc_neurons_1, n_fc_neurons_2)
    np.random.seed(42)
    b = np.random.randn(1, n_fc_neurons_2)    
    params['fc2']['weights'] = (W, b)
    
    return params
    
def conv_forward(A_prev, W, b, stride=1, pad=1):
    (m, n_H_prev, n_W_prev, n_C_prev) = A_prev.shape
    (f, f, n_C_prev, n_C) = W.shape

    n_H = int((n_H_prev + 2 * pad - f) / stride + 1)
    n_W = int((n_W_prev + 2 * pad - f) / stride + 1)    
    
    Z = np.zeros((m, n_H, n_W, n_C))
    A_prev_pad = np.pad(A_prev, 
                        ((0,0), (pad,pad), (pad,pad), (0,0)), 
                        mode='constant', 
                        constant_values=(0))
    cache = np.zeros((m))
    
    for i in range(m):
        a_prev_pad = A_prev_pad[i]
        for h in range(n_H):
            for w in range(n_W):
                for c in range(n_C):
                    
                    vert_start = h * stride
                    vert_end = vert_start + f
                    horiz_start = w * stride
                    horiz_end = horiz_start + f
                    
                    a_slice_prev = a_prev_pad[vert_start:vert_end, horiz_start:horiz_end, :]
                    Z[i, h, w, c] = np.sum(np.multiply(a_slice_prev, W[:,:,:,c]) + b[:,:,:,c])
                    
    cache = (A_prev, W, b)
    
    return Z, cache

def conv_backward():
    pass

def pool_forward(A_prev, stride=2, filter_size=2):
    (m, n_H_prev, n_W_prev, n_C_prev) = A_prev.shape
    
    n_H = int((n_H_prev - filter_size) / stride + 1)
    n_W = int((n_W_prev - filter_size) / stride + 1)  

    n_C = n_C_prev
    
    A = np.zeros((m, n_H, n_W, n_C)) 
    cache = np.zeros((m))
    
    for i in range(m):                          
        for h in range(n_H):                     
            for w in range(n_W):                 
                for c in range (n_C):            
                    
                    vert_start = h * stride
                    vert_end = vert_start + filter_size
                    horiz_start = w * stride
                    horiz_end = horiz_start + filter_size
                    
                    a_prev_slice = A_prev[i, vert_start:vert_end, horiz_start:horiz_end, c]
                    A[i, h, w, c] = np.max(a_prev_slice)
                    cache[i] = np.argmax(a_prev_slice)
                
    return A, cache

def pool_backward():
    pass

def relu_activation_forward(A_prev):
    return np.where(A_prev>0, A_prev, 0)

def fc_forward(A_prev, W, b):
    A_prev = A_prev.reshape(A_prev.shape[0], -1)    
    A = np.matmul(A_prev, W) + b
    
    return A 

def fc_backward(m, labels, activations, W):
    (a1, a2, a3) = activations
    _m= 1/m

    dL_dW2 = _m * np.dot(a2, (a3-labels)).T
    dL_db2 = _m * np.sum((a3-labels), axis=0, keepdims=True).T
    dL_dW1 = _m * np.dot(np.multiply(np.dot((a3-labels), W).T, (a2 * (1-a2))), a1)
    dL_db1 = _m * np.sum(np.multiply(np.dot((a3-labels), W).T, (a2 * (1-a2))), axis=1, keepdims=True)   
        
    return dL_dW1, dL_db1, dL_dW2, dL_db2

def sigmoid_activation_forward(z):
    z = z.clip(min=-500, max=500)
    exp = np.exp(z)
    
    return exp/(exp+1)

def softmax_activation_forward(z):
    z = z.clip(min=-500, max=500)
    exp = np.exp(z)
    
    return exp/exp.sum(axis=1).reshape(-1, 1)

def log_loss(output, labels): 
    output = output.clip(min=1e-10, max=0.9999999999)   
    cost = (-1./labels.shape[0]) * np.sum(
        np.multiply(labels, np.log(output)) + 
        np.multiply((1-labels), np.log(1-output)))
 
    return cost

def forward(X, params, inspect=False):
    if inspect:
        print('X.shape: {}\n'.format(X.shape))
    
    # First convolutional layer
    W, b = params['conv1']['weights']
    stride, pad = params['conv1']['h_params']
    Z, cache_conv1 = conv_forward(X, W, b, stride=stride, pad=pad)
    if inspect:
        print('First convolutional layer:\nW.shape: {}\nb.shape: {}\nZ.shape: {}\n'.format(
            W.shape, b.shape, Z.shape))
    
    # First max pool layer and ReLU activation
    stride, filter_size = params['pool1']['h_params']
    Z, cache_pool1 = pool_forward(Z, stride=stride, filter_size=filter_size)
    A = relu_activation_forward(Z)
    params['pool1']['activations'] = (cache_pool1, A)
    if inspect:
        print('First max pooling layer:\nA.shape: {}\n'.format(
            A.shape))
    
    # Second convolutional layer
    W, b = params['conv2']['weights']
    stride, pad = params['conv2']['h_params']
    Z, cache_conv2 = conv_forward(A, W, b, stride=stride, pad=pad)
    if inspect:
        print('Second convolutional layer:\nW.shape: {}\nb.shape: {}\nZ.shape: {}\n'.format(
            W.shape, b.shape, Z.shape))
    
    # Second max pool layer and ReLU activation
    stride, filter_size = params['pool2']['h_params']
    Z, cache_pool2 = pool_forward(Z, stride=stride, filter_size=filter_size)
    A = relu_activation_forward(Z)
    params['pool2']['activations'] = (cache_pool2, A)
    params['fc2']['activations'] = [A]
    if inspect:
        print('Second max pooling layer:\nA.shape: {}\n'.format(
            A.shape))
    
    # First fully connnected layer and ReLU activation
    W, b = params['fc1']['weights']
    Z = fc_forward(A, W, b)
    A = relu_activation_forward(Z)
    params['fc2']['activations'].append(A)
    if inspect:
        print('First fully connected layer:\nW.shape: {}\nb.shape: {}\nZ.shape: {}\nA.shape: {}\n'.format(
            W.shape, b.shape, Z.shape, A.shape))
    
    # Second fully connected layer and sigmoid activation
    W, b = params['fc2']['weights']
    Z = fc_forward(A, W, b)
    A = sigmoid_activation_forward(Z)
    params['fc2']['activations'].append(A)
    if inspect:
        print('Second fully connected layer:\nW.shape: {}\nb.shape: {}\nZ.shape: {}\nA.shape: {}\n'.format(
            W.shape, b.shape, Z.shape, A.shape))
    
    return A

def predict(z):    
    return softmax_activation_forward(z)

def backward(m, labels, params):
    """
    dL_dW_fc1, dL_db_fc1, dL_dW_fc2, dL_db_fc2 = fc_backward(
            m, labels, params['fc2']['activations'], params['fc2']['weights'])
    dL_relu2 = relu_backward()
    dL_pool2 = pool_backward()
    dL_conv2 = conv_backward()
    dL_relu1 = relu_backward()
    dL_pool1 = pool_backward()
    dL_conv1 = conv_backward()
    
    derivatives = {
        'conv1': dL_conv1,
        'pool1': dL_pool1,
        'relu1': dL_relu1,
        'conv2': dL_conv2,
        'pool2': dL_pool2,
        'relu2': dL_relu2,
        'fc1': (dL_dW_fc1, dL_db_fc1),
        'fc2': (dL_dW_fc2, dL_db_fc2)
    }

    return derivatives
    """
    pass

def train_network(X, labels, n_classes, iters, inspect=False):
    m = X.shape[0]
    params = init_weights(data_shape=X.shape, 
                          n_channels=3, 
                          filter_size_conv1=3, 
                          stride_conv1=1,
                          pad_conv1=1,
                          n_filters_conv1=8,
                          filter_size_pool1=2,
                          stride_pool1=2,
                          filter_size_conv2=3,
                          stride_conv2=1,
                          pad_conv2=1,
                          n_filters_conv2=8,
                          filter_size_pool2=2,
                          stride_pool2=2,
                          n_fc_neurons_1=50,
                          n_fc_neurons_2=n_classes)
    
    initial_pass = forward(X, params, inspect=inspect)
    initial_loss = log_loss(initial_pass, labels)
    print('Initial log loss is {}\n'.format(initial_loss))
    
    for i in range(iters):
        output = forward(X, params)
        loss = log_loss(output, labels)
        print('Iteration {} -- log loss: {}'.format(i+1, loss))
        derivatives = backward(m, labels, params)
    
    final_pass = forward(X, params)
    final_loss = log_loss(final_pass, labels)
    print('\nFinal log loss is {}\n'.format(final_loss))
    
    predictions = predict(final_pass)
    print('Predictions:\n{}'.format(predictions))

In [3]:
X = np.load('ex5_train_x.npy')
y = np.load('ex5_train_y.npy')
labels, n_classes = one_hot_encode(y)

In [4]:
train_network(X[:5], labels[:5], n_classes, iters=3, inspect=True)        

X.shape: (5, 64, 64, 3)

First convolutional layer:
W.shape: (3, 3, 3, 8)
b.shape: (1, 1, 1, 8)
Z.shape: (5, 64, 64, 8)

First max pooling layer:
A.shape: (5, 32, 32, 8)

Second convolutional layer:
W.shape: (3, 3, 8, 8)
b.shape: (1, 1, 1, 8)
Z.shape: (5, 32, 32, 8)

Second max pooling layer:
A.shape: (5, 16, 16, 8)

First fully connected layer:
W.shape: (2048, 50)
b.shape: (1, 50)
Z.shape: (5, 50)
A.shape: (5, 50)

Second fully connected layer:
W.shape: (50, 6)
b.shape: (1, 6)
Z.shape: (5, 6)
A.shape: (5, 6)

Initial log loss is 69.0775525915

Iteration 1 -- log loss: 69.0775525915
Iteration 2 -- log loss: 69.0775525915
Iteration 3 -- log loss: 69.0775525915

Final log loss is 69.0775525915

Predictions:
[[0.28805844 0.10597078 0.10597078 0.10597078 0.10597078 0.28805844]
 [0.24368619 0.08964714 0.24368619 0.08964714 0.08964714 0.24368619]
 [0.28805844 0.10597078 0.10597078 0.10597078 0.10597078 0.28805844]
 [0.2111594  0.0776812  0.2111594  0.0776812  0.2111594  0.2111594 ]
 [0.24368