In [1]:
import struct
import gzip
import numpy as np
%matplotlib inline
import matplotlib
import matplotlib.pyplot as plt

np.random.seed(42)

def sigmoid(z):
    sig = 1/(1+np.exp(-z))
    return sig

def ReLU(z):
    ReLU = z * (z > 0)
    return ReLU

def compute_multiclass_loss(Y, Y_hat):
    L_sum = np.sum(np.multiply(Y, np.log(Y_hat)))
    m = y.shape[0]
    L = -(1/m) * L_sum
    return L
    
def read_idx(filename):
    with open(filename, 'rb') as f:
        zero, data_type, dims = struct.unpack('>HBB', f.read(4))
        shape = tuple(struct.unpack('>I', f.read(4))[0] for d in range(dims))
        return np.fromstring(f.read(), dtype=np.uint8).reshape(shape)
    
# Deep Learning Framework
# One hidden layer neural network model
# Classification
# Last layer is softmax
# Loss function cross entropy
# Back propagation

images_raw = read_idx('train-images.idx3-ubyte')
labels_raw = read_idx('train-labels.idx1-ubyte')
images_raw = images_raw/255
X = images_raw.reshape(images_raw.shape[0],-1)
y = labels_raw
X.shape
n_digits = 10
y_multiclass = np.eye(n_digits)[y.astype('int32')] # converts the example to prediction class using "dummy variables"



In [2]:
# L2 Regularization - using sigmoid

n_x = X.shape[1]
n_h = 64 # Architecture
learning_rate = 10
W1 = np.random.randn(n_h, n_x)# Neurons weights
b1 = np.zeros((n_h,1)) # Neurons intercepts
W2 = np.random.randn(n_digits, n_h) # Probability/weights of prediction
b2 = np.zeros((n_digits,1))
epochs = 100

lamda = 5
for epoch in range(epochs):
    Z1 = np.matmul(W1,X.T)+b1 # Feed forward prop
    A1 = sigmoid(Z1) # Feedforward activation
    Z2 = np.matmul(W2,A1)+b2 
    A2 = np.exp(Z2) / np.sum(np.exp(Z2), axis=0) #Softmax
    
    cost = compute_multiclass_loss(y_multiclass,A2.T)
    
    # Cross entropy cost
    dZ2 = A2.T - y_multiclass
    dW2 = ((1./y.shape[0])*np.matmul(A1,dZ2))
    dW2 = dW2 + (lamda*dW2) # L2 regularization term added
    db2 = ((1./y.shape[0])*np.sum(dZ2,axis=0,keepdims=True))
    db2 = db2 + (lamda*db2) # L2 regularization term added
    
    dA1 = np.matmul(dZ2,W2)
    dZ1 = dA1.T*sigmoid(Z1)*(1-sigmoid(Z1))
    dW1 = (1./y.shape[0]) * np.matmul(dZ1,X)
    db1 = (1./y.shape[0]) * np.sum(dZ1, axis=0, keepdims=True)
    
    W2 = W2 - learning_rate*dW2.T
    b2 = b2 - learning_rate*db2.T
    W1 = W1 - learning_rate*dW1
    b1 = b1 - learning_rate*db1
    
    if(epoch % 5 == 0):
        print('Epoch', epoch, 'cost: ', cost)

print("Final cost:", cost)

Z1 = np.matmul(W1,X.T)+b1
A1 = sigmoid(Z1)
Z2 = np.matmul(W2,A1)+b2
A2 = np.exp(Z2) / np.sum(np.exp(Z2), axis=0) #Soft Max
prediction = np.array([i.argmax() for i in A2.T]) #Predictions
(60000-(prediction!=y).sum())/60000 
# 92.89% prediction accuracy at learning rate = 10 and with L2 regularization

Epoch 0 cost:  9.09999229262521




Epoch 5 cost:  43.43479805851157
Epoch 10 cost:  22.299215725678753
Epoch 15 cost:  11.074137377322055
Epoch 20 cost:  12.15043686604245
Epoch 25 cost:  9.44298544744364
Epoch 30 cost:  6.453290886976837
Epoch 35 cost:  7.258260010687922
Epoch 40 cost:  6.29828203259229
Epoch 45 cost:  7.762510915805046
Epoch 50 cost:  6.718086967961329
Epoch 55 cost:  2.3780323034664286
Epoch 60 cost:  3.8366023733651238
Epoch 65 cost:  6.615847851777611
Epoch 70 cost:  4.5889312006385365
Epoch 75 cost:  4.476796399884962
Epoch 80 cost:  2.7882874907161592
Epoch 85 cost:  4.727224168456882
Epoch 90 cost:  3.3878524136608665
Epoch 95 cost:  5.0707493811801285
Final cost: 3.406917498860888


0.6649333333333334