# Building a Neural Network from Scratch

## Requirements 

- A working fully-connected deep neural network from scratch using only numpy.
- Includes dense layers, activations, optimizers, loss functions and sigmoid or softmax in case of classification. 
- Runtime and results on a public dataset.
- Documented code that includes brief summary, technical details, and results. 

## Extensions 

A comparison of the modelâ€™s runtime and performance with/without each component:
- More than 1 optimizer - SGD, Momentum, RMSProp, Adam etc.
- Regularization - L2/weight decay, dropout, possibly augmentations if image data etc.
- Results on more than 1 dataset.


### Load the dataset

In [5]:
import numpy as np

from sklearn.datasets import fetch_openml
#load MNIST dataset
mnist = fetch_openml('mnist_784')
X = mnist.data
y = mnist.target

In [6]:
X = X /255
#y = np.where(y=='0', 0, 1)

In [7]:
m = 60000
m_test = X.shape[0] - m

#switch rows and columns and reshape
print(X.shape, y.shape)
X_train, X_test = X[:m], X[m:]
y = y.T



# y_train, y_test = y[:m].reshape(1,m), y[m:].reshape(1,m_test)

# y_train, y_test = np.array(y[:m]), np.array(y[m:])
# y_train, y_test = np.array(y[:m]).reshape(1,m), np.array(y[m:]).reshape(1,m_test)
# print(X_train.shape, y_train.shape, X_test.shape,  y_test.shape) 

(70000, 784) (70000,)


In [30]:
#one-hot encode mnist data

digits =10 
examples = 70000

y_new = np.eye(digits)[y.astype('int32')]
y_new = y_new.reshape(digits, examples).T

Y_train, Y_test = y_new[:m], y_new[m:]

print('Train:',X_train.shape, Y_train.shape, '---- Test:',X_test.shape,  Y_test.shape)

Train: (60000, 784) (60000, 10) ---- Test: (10000, 784) (10000, 10)


In [77]:
def sigmoid(Z):
    """
    Sigmoid activation function.
    """
    return 1/(1+np.exp(-Z))

def der_sigmoid(Z):
    """
    Derivative of sigmoid activation function.
    """
    return sigmoid(Z) * (1 - sigmoid(Z))

def cross_entropy(Y, Y_hat):
    """
    Binary cross entropy loss function.
    """
#     print(Y, Y_hat)
#     print(Y.shape)
#     n = Y.shape[0] 
#     L = -(1/n) * (np.sum( np.multiply(np.log(Y_hat),Y) ) + np.sum( np.multiply(np.log(1-Y_hat),(1-Y)) ) )
    L = -np.log(np.max(Y_hat * Y, axis=1) + 1e-8) #add small number to avoid zero error
    L = np.average(L)
    return L

def softmax(x):
    """Compute softmax values for each sets of scores in x."""
    
    e_x = np.exp(x - np.max(x))
    return e_x / e_x.sum(axis=0, keepdims=True) # only difference

In [78]:
cross_entropy(np.array([[0,1,0],[1,0,0]]), np.array([[0.5,0.5,0],[0.2,0.8,0]]))

1.1512925114970236

In [79]:

input_size = 784

#Initialization: weights and biases
w1 = np.random.rand(input_size,64) / 1000
w2 = np.random.rand(64,10) / 1000
b1 = np.zeros(64)
b2 = np.zeros(10)

def forward_propagation(X,y):
    
    #single forward pass
    z1 = np.dot(X,w1) + b1
    s1 = sigmoid(z1)
    z2 = np.dot(s1,w2) + b2 
    print(z2)
    #pass it through softmax
    y_hat = softmax(z2) #model outputs probabilities
#   y_hat = np.argmax(s2, axis=0)
    
    #compute loss using model output and true labels
    #print(y_hat, y.shape)
    L = cross_entropy(y,y_hat)
    
    return L

def backward_propagation(L):
    
    return
    
    
### Backward 
# get that d_w and d_b for each layer 

In [80]:
nice = forward_propagation(X_train,Y_train)

[[0.01841449 0.01561965 0.01454792 ... 0.01670041 0.01628952 0.01511394]
 [0.01847872 0.01567465 0.01459616 ... 0.0167579  0.01634822 0.01516432]
 [0.01827041 0.01549427 0.01443051 ... 0.01656713 0.01616196 0.01499395]
 ...
 [0.0183219  0.01553912 0.01447232 ... 0.01661508 0.01620734 0.0150368 ]
 [0.01829574 0.01551656 0.01444988 ... 0.01659152 0.01618534 0.01501376]
 [0.01829806 0.01551849 0.01445337 ... 0.01659174 0.01618562 0.01501619]]


In [81]:
np.unique(nice,return_counts=True)

(array([13.58720678]), array([1]))