In [3]:
#importing important libraries
import numpy as np


In [131]:
class Neuralnet:
    def __init__(self,neuron_val, lr, batch_size, epochs):
        self.lr, self.batch_size, self.epochs = lr, batch_size, epochs
        self.nn_architecture = [
    {"input_dim": 28*28, "output_dim": neuron_val, "activation": "relu"},
    {"input_dim": neuron_val, "output_dim": 10, "activation": "sigmoid"}]
        number_of_layers = len(self.nn_architecture)
        self.params_values = {}
        
        for idx, layer in enumerate(self.nn_architecture):
            layer_idx = idx + 1
            layer_input_size = layer["input_dim"]
            layer_output_size = layer["output_dim"]
            mu, sigma = 0, 0.1 # mean and standard deviation
            self.params_values['W' + str(layer_idx)] = np.random.normal(
                mu,sigma,[layer_output_size, layer_input_size]) 
            self.params_values['b' + str(layer_idx)] = np.random.normal(
                mu,sigma,[layer_output_size, 1])
            
    def params(self):
        return self.params_values
    
    
    def sigmoid(self,Z):
        return 1/(1+np.exp(-Z))

    def relu(self,Z):
        return np.maximum(0,Z)

    def sigmoid_backward(self, dA, Z):
        sig = sigmoid(Z)
        return dA * sig * (1 - sig)

    def relu_backward(self, dA, Z):
        dZ = np.array(dA, copy = True)
        dZ[Z <= 0] = 0;
        return dZ;
    

    
    def feedforward(self,X):
        self.memory = {}
        A_curr = X

        for idx, layer in enumerate(self.nn_architecture):
            layer_idx = idx + 1
            A_prev = A_curr

            activation = layer["activation"]
            W_curr = self.params_values["W" + str(layer_idx)]
            b_curr = self.params_values["b" + str(layer_idx)]
            Z_curr = np.dot(W_curr, A_prev) + b_curr

            if activation is "relu":
                A_curr = self.relu(Z_curr)
            elif activation is "sigmoid":
                A_curr = self.sigmoid(Z_curr)

            self.memory["A" + str(idx)] = A_prev
            self.memory["Z" + str(layer_idx)] = Z_curr

        return A_curr
    
    
    def backprop(Y_hat, Y):
        self.grads_values = {}
        m = Y.shape[1]
        Y = Y.reshape(Y_hat.shape)

        dA_prev = - (np.divide(Y, Y_hat) - np.divide(1 - Y, 1 - Y_hat));

        for layer_idx_prev, layer in reversed(list(enumerate(self.nn_architecture))):
            layer_idx_curr = layer_idx_prev + 1
            activ_function_curr = layer["activation"]

            dA_curr = dA_prev

            A_prev = self.memory["A" + str(layer_idx_prev)]
            Z_curr = self.memory["Z" + str(layer_idx_curr)]
            W_curr = self.params_values["W" + str(layer_idx_curr)]
            b_curr = self.params_values["b" + str(layer_idx_curr)]
            
            m = A_prev.shape[1]

            if  activ_function_curr == "relu":
                dZ_curr = relu_backward(dA_curr, Z_curr)
            else:
                dZ_curr = sigmoid_backward(dA_curr, Z_curr)
                        
            dW_curr = np.dot(dZ_curr, A_prev.T) / m
            db_curr = np.sum(dZ_curr, axis=1, keepdims=True) / m
            dA_prev = np.dot(W_curr.T, dZ_curr)

            self.grads_values["dW" + str(layer_idx_curr)] = dW_curr
            self.grads_values["db" + str(layer_idx_curr)] = db_curr
            

    def sgd(self,data):
        
        data = data.sample(self.batch_size)
        self.cost_history = []

        for i in range(self.epochs):
            Y_hat = feedforward(X)
            cost = get_cost_value(Y, Y_hat)
            self.cost_history.append(cost)
            update()

        return self.params_values, self.cost_history
    
    def update(self):
        backprop(Y_hat, Y)
        for layer_idx, layer in enumerate(self.nn_architecture):
            self.params_values["W" + str(layer_idx)] -= self.lr * self.grads_values["dW" + str(layer_idx)]        
            self.params_values["b" + str(layer_idx)] -= self.lr * self.grads_values["db" + str(layer_idx)]

    
    def get_cost_value(self, y, y_hat):
        inner = np.power((y - y_hat), 2)
        return np.sum(inner) / (2 * len(y))
    
    def evaluate(self):
        inp = np.random.randn(28*28)
        x,y = self.feedforward(inp)
        return x,y

In [137]:
from tensorflow.keras.datasets import mnist

In [139]:
(trainX, trainy), (testX, testy) = mnist.load_data()
# summarize loaded dataset
print('Train: X=%s, y=%s' % (trainX.shape, trainy.shape))
print('Test: X=%s, y=%s' % (testX.shape, testy.shape))


trainY = to_categorical(trainY)
testY = to_categorical(testY)

print(trainX[0])

Train: X=(60000, 28, 28), y=(60000,)
Test: X=(10000, 28, 28), y=(10000,)


NameError: name 'to_categorical' is not defined