In [15]:
# Create Neural Network From Scratch
# https://www.freecodecamp.org/news/building-a-neural-network-from-scratch/
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import fetch_mldata

In [16]:
def init_params(layer_dims):
    np.random.seed(3)
    params = {}
    _len = len(layer_dims)
    
    for ln in range(1, _len):
        params['W' + str(ln)] = np.random.randn(layer_dims[1], layer_dims[ln-1])*0.01
        params['b' + str(ln)] = np.zeros((layer_dims[ln], 1))
    
    return params


In [17]:
# Z (linear hypothesis) - Z = W*X + b,
# W - weight matrix, b - bias vector, X - Input
def sigmoid(Z):
    A = 1/(1+np.exp(np.dot(-1, Z)))
    cache = (Z)
    
    return A, cache

In [18]:
def forward_prop(X, params):
    A = X # input to first layer i.e. training data
    caches = []
    
    params_len = len(params)//2 # floor divisions
    for _len in range(1, params_len+1):
        A_prev = A
        
        # Linear hypothesis 
        Z = np.dot(params['W'+str(_len)], A_prev) + params['b'+str(_len)]
        
        # Storing the linear cache
        linear_cache = (A_prev, params['W' + str(_len)], params['b'+str(_len)])
        
        # Applying sigmoid on linear hypothesis
        A, activation_cache = sigmoid(Z)
        
        # Storing the both linear and activation cache
        cache = (linear_cache, activation_cache)
        caches.append(cache)
    
    return A, caches

In [19]:
def cos_function(A, Y):
    m = Y.shape[1]
    
    cost = (-1/m)*(np.dot(np.log(A), Y.T) + np.dot(log(1-A), 1-Y.T))
    
    return cost

In [20]:
def one_layer_backward(dA, cache):
    linear_cache, activation_cache = cache
    
    Z = activation_cache
    dZ = dA*sigmoid(Z)*(1-sigmoid(Z)) # The derivative of the sigmoid function
    
    A_prev, W, b = linear_cache
    m = A_prev.shape[1]
    
    dW = (1/m)*np.dot(dZ, A_prev.T)
    db = (1/m)*np.sum(dZ, axis=1, keepdims=True)
    dA_prev = np.dot(W.T, dZ)
    
    return dA_prev, dW, db

In [21]:
def backprop(AL, Y, caches):
    grads = {}
    _len = len(caches)
    m = AL.shape[1]
    Y = Y.reshape(AL.shape)
    
    dAL = -(np.divide(Y, AL) - np.divide(1-Y, 1-AL))
    
    current_cache = caches[_len-1]
    grads['dA' + str(_len-1)], grads['dW' + str(_len-1)], grads['db' + (_len-1)] = one_layer_backward(dAL, current_cache)
    
    for ln in reversed(range(_len - 1)):
        current_cache = caches[ln]
        dA_prev_temp, dW_temp, db_temp = one_layer_backward(grads["dA" + str(_len+1)], current_cache)
        grads["dA" + str(ln)] = dA_prev_temp
        grads["dW" + str(ln + 1)] = dW_temp
        grads["db" + str(ln + 1)] = db_temp
        
    return grads

In [22]:
def update_parameters(parameters, grads, learning_rate):
    _len = len(parameters) // 2
    
    for ln in range(_len):
        parameters['W' + str(ln + 1)] = parameters['W' + str(ln + 1)] - learning_rate*grads['W' + str(ln+1)]
        parameters['b' + str(ln + 1)] = parameters['b' + str(ln + 1)] - learning_rate*grads['b' + str(ln+1)]
    
    return parameters

In [23]:
def train(X, Y, layer_dims, epochs, learning_rate):
    params = init_params(layer_dims)
    cost_history = []
    
    for i in range(epochs):
        Y_hat, caches = forward_prop(X, params)
        cost = cost_function(Y_hat, Y)
        cost_history,append(cost)
        grads = backprop(Y_hat, Y, caches)
        
        params = update_parameters(params, grads, learning_rate)
    
    return params, cost_history
        

In [25]:
mnist = fetch_mldata('MNIST original')
mnist



HTTPError: HTTP Error 500: Internal Server Error