In [5]:
import numpy as np
from matplotlib import pyplot as plt
from PIL import Image

%matplotlib inline 
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets("MNIST_data/",one_hot=True)


Extracting MNIST_data/train-images-idx3-ubyte.gz
Extracting MNIST_data/train-labels-idx1-ubyte.gz
Extracting MNIST_data/t10k-images-idx3-ubyte.gz
Extracting MNIST_data/t10k-labels-idx1-ubyte.gz


In [19]:
# Get Inputs and Labels in proper format
num_iterations = 100
learning_rate = .1
X=mnist.train.images
Y=np.reshape(mnist.train.labels[:,0], (mnist.train.labels.shape[0], 1)).T
X_TEST = mnist.test.images
Y_TEST = np.reshape(mnist.test.labels[:,0], (mnist.test.labels.shape[0], 1)).T

#Let X inputs be 1 column 
X_col = X.T
X_TEST_col = X_TEST.T

#training set size 
m = Y.shape[1]
print("There are "+str(m)+" training examples!")

#defining the network structure 
n_x = X_col.shape[0]
n_hidden_layer_size = 4
n_y = Y.shape[0]

#sigmoid function 
def sigmoid(z):
    return 1 / (1+np.exp(-z))

#loss function
def loss (A,Y):
    l = Y * np.log(A) + (1-Y) * np.log(1-A)
    return l

#cost function
def cost (m,A,Y):
    c = (-1/m) * np.sum(loss(A,Y))
    return c

#initialize layers
def initialize_parameters(x,hidden,y):
    W1 = np.random.randn(hidden,x)
    b1 = np.zeros((hidden,1))
    W2 = np.random.randn(y,hidden)
    b2 = np.zeros((y,1))
    return W1,b1,W2,b2

#predict function 
def predict(x,w1,b1,w2,b2):
    Z1 = np.dot(w1,x)+b1
    A1 = np.tanh(Z1)
    Z2 = np.dot(w2,A1)+b2
    A2 = sigmoid(Z2)
    Y_prediction = np.zeros((1,x.shape[1]))
    for i in range (0,x.shape[1]):
        if A2[0,i] < 0.5:
            Y_prediction[0,i] = 0
        else:
            Y_prediction[0,i] = 1
    return Y_prediction

#initialize params
W1,b1,W2,b2 = initialize_parameters(n_x,n_hidden_layer_size,n_y)

for i in range (0,num_iterations):
    #foward propagation
    Z1 = np.dot(W1,X_col)+b1
    A1 = np.tanh(Z1)
    Z2 = np.dot(W2,A1)+b2
    A2 = sigmoid(Z2)
    J = cost(m,A2,Y)
    
    #back propagation
    dZ2 = A2 - Y
    dW2 = (1/m) * np.dot(dZ2,A1.T)
    db2 = (1/m) * np.sum(dZ2, axis=1, keepdims = True)
    dZ1 = np.dot(W2.T,dZ2) * np.power((1-A1),2)
    dW1 = (1/m) * np.dot(dZ1,X_col.T)
    db1 = (1/m) * np.sum(dZ1,axis=1,keepdims=True)
    
    #update with gradient descent
    W2 = W2 - learning_rate * dW2
    W1 = W1 - learning_rate * dW1
    b2 = b2 - learning_rate * db2
    b1 = b1 - learning_rate * db1
    
    #print cost 
    print("Cost after every iteration: "+ str(J))
    
    #shapes of our matrix 
    #print(Z2.shape,W2.shape,b2.shape,Z1.shape,W1.shape,b1.shape)
    #print(dZ2.shape, dW2.shape, db2.shape, dZ1.shape, dW1.shape, db1.shape)

#make prediction
Y_PREDICT = predict(X_TEST_col,W1,b1,W2,b2)
Y_TRAIN_PREDICT = predict(X_col,W1,b1,W2,b2)

#Compute Accuracy of Model
number_correct = 0
for i in range (0,Y_PREDICT.shape[1]):
    if Y_PREDICT[0,i] == Y_TEST[0,i]:
        number_correct+=1 
    else:
        pass
#print Training accuracy 
print(str((number_correct/10000) * 100)+"% Test Accuracy")

    
    



    

There are 55000 training examples!
Cost after every iteration: 1.4877661459177571
Cost after every iteration: 0.4246583133096015
Cost after every iteration: 0.3658718533459059
Cost after every iteration: 0.3379122286584784
Cost after every iteration: 0.3210855757509947
Cost after every iteration: 0.3091428671374855
Cost after every iteration: 0.2998339337187128
Cost after every iteration: 0.29238307756311344
Cost after every iteration: 0.28620871255283326
Cost after every iteration: 0.28073712478345375
Cost after every iteration: 0.27564497446872593
Cost after every iteration: 0.2707741677675977
Cost after every iteration: 0.2660200707188655
Cost after every iteration: 0.2613689243984846
Cost after every iteration: 0.25686698301414346
Cost after every iteration: 0.2525435677022971
Cost after every iteration: 0.24841818325054965
Cost after every iteration: 0.24449677605960007
Cost after every iteration: 0.2407621019647409
Cost after every iteration: 0.23720378785019242
Cost after every 