In [86]:
import numpy as np
import os
import cv2
import pickle
from matplotlib import pyplot as plt
import math

In [87]:
layer_dims = [28 * 28, 4, 1]

In [88]:
def sigmoid(z):
    return 1/(1+np.exp(-z))

In [89]:
def relu(z):
    return np.maximum(0, z)

In [90]:
def sigmoid_back(dA, z):
    s = 1/(1+np.exp(-z))
    return dA * s * (1 - s)

In [91]:
def relu_back(dA, z):
    dZ = np.array(dA, copy = True)
    dZ[z<=0] = 0
    return dZ

In [92]:
def initialize(layer_dims):
    L = len(layer_dims)
    params = {}
    for l in range(1, L):
        params["w" + str(l)] = np.random.randn(layer_dims[l], layer_dims[l - 1]) * math.sqrt(2/layer_dims[l-1])
        params["b" + str(l)] = np.zeros([layer_dims[l], 1])
    return params

In [93]:
def for_prop(params, x):
    A_prev = np.array(x, copy = True)
    m = x.shape[1]
    L = len(params)//2
    caches = []
    for l in range(L - 1):
        
        w = params['w' + str(l+1)]
        b = params['b' + str(l+1)]
        
        
        assert(w.shape == (layer_dims[l + 1], layer_dims[l]))
        assert(b.shape == (layer_dims[l + 1], 1))
        assert(A_prev.shape == (layer_dims[l], m))
            
            
        z = np.dot(w, A_prev) + b
        cache = (A_prev, w, b, z)
        A_prev = relu(z)
        
        
        assert(z.shape == (layer_dims[l + 1], m))
        
        caches.append(cache)
    
        
    w = params["w" + str(L)]
    b = params["b" + str(L)]
    
    assert(w.shape == (layer_dims[L], layer_dims[L - 1]))
    assert(b.shape == (layer_dims[L], 1))
    assert(A_prev.shape == (layer_dims[L-1], m))
    z = np.dot(w, A_prev) + b
    
    assert(z.shape == (layer_dims[L], m))
    
    cache = (A_prev, w, b, z)
    AL = sigmoid(z)
    
    caches.append(cache)
    return AL, caches
        

In [94]:
def compute_cost(AL, Y):
    m = Y.shape[1]
    return -(np.sum(Y * np.log(AL) + (1 - Y) * np.log(1 - AL)))/m

In [95]:
def back_prop(AL, Y, caches):
    m = Y.shape[1]
    dA = np.divide(-Y, AL) + np.divide(1 - Y, 1 - AL)
    assert(dA.shape == (layer_dims[-1], m))
    grads = []
    
    L = len(caches)
    A_prev, w, b, z = caches[L -1]
    
    dZ = sigmoid_back(dA, z)
    dw = np.dot(dZ, A_prev.T)/m
    db = np.sum(dZ, axis = 1, keepdims = True)/m
    assert(db.shape == b.shape)
    assert(dw.shape == w.shape)
    assert(dZ.shape == z.shape)
    dA_prev = np.dot(w.T, dZ)
    dA = dA_prev
    
    grad = (dZ, dw, db, dA_prev)
    grads.append(grad)
    
    
    for l in reversed(range(L - 1)):
        A_prev, w, b, z = caches[l]
        
        dZ = relu_back(dA, z)
        dw = np.dot(dZ, A_prev.T)/m
        db = np.sum(dZ, axis = 1, keepdims = True)/m
        assert(db.shape == b.shape)
        assert(dw.shape == w.shape)
        assert(dZ.shape == z.shape)
        dA_prev = np.dot(w.T, dZ)
        dA = dA_prev
        
        grad = (dZ, dw, db, dA_prev)
        grads.append(grad)
        
        
    return grads
    

In [96]:
def optimize(params, grads, learning_rate):
    L = len(params)//2
    grads.reverse()
    
    for l in range(L):
        dZ, dw, db, dA_prev = grads[l]
        params['w' + str(l + 1)] = params['w' + str(l + 1)] - (learning_rate * dw)
        params['b' + str(l + 1)] = params['b' + str(l + 1)] - (learning_rate * db)

        assert(params['w' + str(l + 1)].shape == (layer_dims[l + 1], layer_dims[l]))
        assert(params['b' + str(l + 1)].shape == (layer_dims[l + 1], 1))
        
    return params
    

In [97]:
def predict(params, x, y):
    AL, caches = for_prop(params, x)
    
    y_prediction = np.where(AL<=0.5, 0, 1)
    acc = 100 - (np.mean(np.abs(y_prediction - y)) * 100)
    return acc

In [108]:
def model(trianx, trainy, testx, testy, layer_dims, num_iteration, learning_rate, print_cost):
    params = initialize(layer_dims)
    
    costs = []
    for i in range(1, num_iteration+1):
        
        AL, caches = for_prop(params, trainx)
        cost = compute_cost(AL, trainy)
        grads = back_prop(AL, trainy, caches)
        params = optimize(params, grads, learning_rate)
        
        if i%100 == 0 and print_cost:
            print("cost at iteration ", i, ": ", cost)
            costs.append(cost)
            
    train_acc = predict(params, trainx, trainy)
    test_acc = predict(params, testx, testy)
    
    print("Training accuracy: ", train_acc)
    print("Test accuracy: ", test_acc)
    
    data = {"params": params,
           "costs": cost,
           "train_acc": train_acc,
           "test_acc": test_acc}
    return data

In [99]:
with open("handwrittendata.pickle", "rb") as pickle_in:
    data = pickle.load(pickle_in)

In [100]:
print(data.keys())

dict_keys(['trainx', 'trainy', 'testx', 'testy'])


In [101]:
trainx = data['trainx']/255
trainy = data['trainy']
testx = data['testx']/255
testy = data['testy']

In [102]:
print(testx.shape)

(784, 80)


In [109]:
d = model(trainx, trainy, testx, testy, [28 * 28, 4, 1], 2000, 0.05, True)

cost at iteration  100 :  0.09466853566803857
cost at iteration  200 :  0.05694995533714396
cost at iteration  300 :  0.044569702542832594
cost at iteration  400 :  0.03758984571085106
cost at iteration  500 :  0.03288096699114577
cost at iteration  600 :  0.02938873998385639
cost at iteration  700 :  0.02663672692832822
cost at iteration  800 :  0.024376965285457842
cost at iteration  900 :  0.022467110264873198
cost at iteration  1000 :  0.02081800308229185
cost at iteration  1100 :  0.01937233530064329
cost at iteration  1200 :  0.01809209341065612
cost at iteration  1300 :  0.016949856670238482
cost at iteration  1400 :  0.015922449545102576
cost at iteration  1500 :  0.014993294282710706
cost at iteration  1600 :  0.014148869252227074
cost at iteration  1700 :  0.013378560443569023
cost at iteration  1800 :  0.012673292750002516
cost at iteration  1900 :  0.012025682118165591
cost at iteration  2000 :  0.011429244730349834
Training accuracy:  99.85007496251875
Test accuracy:  98.7

In [104]:
print(d.keys())

dict_keys(['params', 'costs', 'train_acc', 'test_acc'])


In [105]:
print(d['train_acc'])
print(d['test_acc'])

99.35032483758121
97.5


In [106]:
with open("NN ABClassifier result", "wb") as pickle_out:
    pickle.dump(d, pickle_out)