# Building a Neural Network from Scratch

## Requirements 

- A working fully-connected deep neural network from scratch using only numpy.
- Includes dense layers, activations, optimizers, loss functions and sigmoid or softmax in case of classification. 
- Runtime and results on a public dataset.
- Documented code that includes brief summary, technical details, and results. 

## Extensions 

A comparison of the modelâ€™s runtime and performance with/without each component:
- More than 1 optimizer - SGD, Momentum, RMSProp, Adam etc.
- Regularization - L2/weight decay, dropout, possibly augmentations if image data etc.
- Results on more than 1 dataset.


### Load the dataset

In [199]:
import numpy as np

from sklearn.datasets import fetch_openml
#load MNIST dataset
mnist = fetch_openml('mnist_784')
X = mnist.data
y = mnist.target

In [200]:
X = X /255

In [201]:
m = 60000
m_test = X.shape[0] - m
X_train, X_test = np.array(X[:m]), np.array(X[m:])

(70000, 784) (70000,)


In [212]:
#one-hot encode mnist data

digits =10 
examples = 70000

y_new = np.array(y, dtype='int32').reshape(-1)
y_new = np.eye(10)[y_new]
Y_train, Y_test = np.array(y_new[:m]), np.array(y_new[m:])

In [257]:
def sigmoid(Z):
    """
    Sigmoid activation function.
    """
    return 1/(1+np.exp(-Z))

def der_sigmoid(Z):
    """
    Derivative of sigmoid activation function.
    """
    return sigmoid(Z) * (1 - sigmoid(Z))

def cross_entropy(Y, Y_hat):
    """
    Binary cross entropy loss function.
    """
    L = -np.log(np.max(Y_hat * Y, axis=1) + 1e-8) #add small number to avoid zero error
    L = np.average(L)
    return L

def softmax(x):
    """Compute softmax values for each sets of scores in x."""
    
    prob = []
    for arr in x:
        exponentiated = [np.exp(x) for x in arr]
        denominator = np.sum(exponentiated)
        prob.append([x / denominator for x in exponentiated])
    return np.array(prob)

def accuracy(y, y_hat):
    bools = np.argmax(y_hat, axis=1) == np.argmax(y, axis=1)
    return np.average(bools.astype(int))

In [258]:
# cross_entropy(np.array([[0,1,0],[1,0,0]]), np.array([[0.5,0.5,0],[0.2,0.8,0]]))
# accuracy([[0,1,0],[0,0,1]], [[0.2,0.8,0],[0.9,0.1,0]])

0.5

In [274]:

input_size = 784
learning_rate = 5
batch_size = 60

#Initialization: weights and biases
w1 = 0.001 * np.random.randn(input_size,64)
w2 = 0.001 * np.random.randn(64,10)
b1 = np.zeros(64)
b2 = np.zeros(10)

def gradient_descent(X, y, forward_only=False):
    
    global w2
    global b2
    global w1
    global b1
    z1 = np.dot(X,w1) + b1
    s1 = sigmoid(z1)
    z2 = np.dot(s1,w2) + b2 
    y_hat = softmax(z2) #these are probabilities
    L = cross_entropy(y,y_hat) # compute loss using model output and true labels
    acc = accuracy(y, y_hat)
    
    ## Backward propagation ## 
    
    if forward_only:
        return L, acc
    
    #Gradients for Layer 2
    d_z2 = y_hat - y #
    d_w2 = s1.T.dot(d_z2)
    d_b2 = np.sum(d_z2, axis=0)
    d_s1 = der_sigmoid(z1) * d_z2.dot(w2.T)
    
    #Gradients for Layer 1
    d_w1 = X.T.dot(d_s1)
    d_b1 = np.sum(d_w1, axis=0)
    
    # Use gradients to update weights and biases
    w2 = (w2 - (learning_rate * (d_w2 / m)))
    b2 = (b2 - (learning_rate * (d_b2 / m)))
    w1 = (w1 - (learning_rate * (d_w1 / m)))
    b1 = (b1 - (learning_rate * (d_b1 / m)))
    return L, acc


def run_epoch(X, y):
    
    index = np.arange(m)
    np.random.shuffle(index)
    X = X[index]
    y = y[index]
    
    batch_count = m / batch_size
    X_batches = np.array_split(X, batch_count)
    y_batches = np.array_split(y, batch_count)
    
    losses = []
    accs = []
    for i in range(len(X_batches)):
        loss, acc = gradient_descent(X_batches[i], y_batches[i])
        losses.append(loss)
        accs.append(acc)
    return np.average(losses), np.average(accs)

num_epochs = 300
for i in range(num_epochs):
    loss, acc = run_epoch(X_train,Y_train)
    print(f"Epoch {i} Loss: {}, Accuracy: {}")

#cross validation 
#test set 
#

Epoch avg. loss: 2.3015792013701866, Accuracy: 0.11146666666666667
Epoch avg. loss: 2.301246495414362, Accuracy: 0.11221666666666667
Epoch avg. loss: 2.300916533733106, Accuracy: 0.11236666666666667
Epoch avg. loss: 2.2996771821842708, Accuracy: 0.11236666666666666
Epoch avg. loss: 2.2957895456517226, Accuracy: 0.11275
Epoch avg. loss: 2.2844175037371435, Accuracy: 0.14225
Epoch avg. loss: 2.2564970810077982, Accuracy: 0.2232833333333333
Epoch avg. loss: 2.195971624618307, Accuracy: 0.26676666666666665
Epoch avg. loss: 2.074729260626744, Accuracy: 0.32871666666666666
Epoch avg. loss: 1.8882151090138786, Accuracy: 0.40323333333333333
Epoch avg. loss: 1.6863958417971185, Accuracy: 0.4728333333333334
Epoch avg. loss: 1.5076561834939683, Accuracy: 0.5326333333333332
Epoch avg. loss: 1.3552210943974825, Accuracy: 0.5841
Epoch avg. loss: 1.2283905070954815, Accuracy: 0.6220166666666667
Epoch avg. loss: 1.1255527145178261, Accuracy: 0.6647333333333334
Epoch avg. loss: 1.0410544134513582, Accu

Epoch avg. loss: 0.24687736018685258, Accuracy: 0.9308833333333333
Epoch avg. loss: 0.2458829991429177, Accuracy: 0.9312999999999999
Epoch avg. loss: 0.24500876868796262, Accuracy: 0.9317000000000001
Epoch avg. loss: 0.2441874212053851, Accuracy: 0.9317833333333333
Epoch avg. loss: 0.24323241793811848, Accuracy: 0.93245
Epoch avg. loss: 0.2424267993941037, Accuracy: 0.9323833333333332
Epoch avg. loss: 0.24159701649125184, Accuracy: 0.9327333333333332
Epoch avg. loss: 0.24071642303967145, Accuracy: 0.9330999999999999
Epoch avg. loss: 0.2398787603143896, Accuracy: 0.9332166666666666
Epoch avg. loss: 0.23905992832396192, Accuracy: 0.9332666666666667
Epoch avg. loss: 0.23817838277618827, Accuracy: 0.9336833333333333
Epoch avg. loss: 0.23722781986469144, Accuracy: 0.9341666666666665
Epoch avg. loss: 0.23665003156516837, Accuracy: 0.9343166666666666
Epoch avg. loss: 0.23577971538294018, Accuracy: 0.9344333333333332
Epoch avg. loss: 0.23496097972068128, Accuracy: 0.9346999999999999
Epoch avg.

Epoch avg. loss: 0.17421742673965018, Accuracy: 0.9514833333333332
Epoch avg. loss: 0.17388919793036245, Accuracy: 0.9521833333333332
Epoch avg. loss: 0.17349638028181563, Accuracy: 0.9516833333333333
Epoch avg. loss: 0.17311378071902686, Accuracy: 0.9518666666666665
Epoch avg. loss: 0.17286540770065711, Accuracy: 0.9519666666666667
Epoch avg. loss: 0.17237987829969853, Accuracy: 0.9519499999999999
Epoch avg. loss: 0.1720233602056506, Accuracy: 0.9523833333333332
Epoch avg. loss: 0.17161636150018705, Accuracy: 0.9525833333333332
Epoch avg. loss: 0.1713423251722635, Accuracy: 0.9524499999999998
Epoch avg. loss: 0.17095461937990414, Accuracy: 0.9527833333333332
Epoch avg. loss: 0.1705796568661176, Accuracy: 0.9525999999999999
Epoch avg. loss: 0.17021853780744703, Accuracy: 0.9530166666666665
Epoch avg. loss: 0.1697922334916617, Accuracy: 0.9528999999999999
Epoch avg. loss: 0.16951415836203973, Accuracy: 0.9531833333333332
Epoch avg. loss: 0.16907630207069918, Accuracy: 0.9530666666666666