This implementation is taken from https://medium.com/data-science/math-neural-network-from-scratch-in-python-d6da9f29ce65

In [1]:
import numpy as np
import os
import pandas as pd

In [3]:
#Base class
class Layer:
    def __init__self(self):
        self.input = None
        self.output = None

    def forward_propagation(self, input):
        raise NotImplementedError

    def backward_propagation(self, output_error, learning_rate):
        raise NotImplementedError

In [5]:
# Dense Layer class
class Dense(Layer):
    def __init__(self,input_size,output_size):
        self.weights = np.random.rand(input_size, output_size) - 0.5
        self.bias = np.random.rand(1, output_size) - 0.5

    def forward_propagation(self,input_data):
        self.input = input_data
        self.output = np.dot(self.input,self.weights) + self.bias
        return self.output

    def backward_propagation(self,output_error, learning_rate):
        input_error = np.dot(output_error, self.weights.T)
        weights_error = np.dot(self.input.T, output_error)
        #dBias = output_error

        self.weights -= learning_rate * weights_error
        self.bias -= learning_rate * output_error
        return input_error

In [7]:
# Activation Layer class
class Activation(Layer):
    def __init__(self,activation,activation_prime):
        self.activation = activation
        self.activation_prime = activation_prime

    def forward_propagation(self, input_data):
        self.input = input_data
        self.output = self.activation(self.input)
        return self.output

    def backward_propagation(self, output_error, learning_rate):
        return self.activation_prime(self.input)*output_error

In [9]:
# Softmax Layer Class
class Softmax(Layer):
    def forward(self, input):
        tmp = np.exp(input)
        self.output = tmp / np.sum(tmp)
        return self.output
    
    def backward(self, output_gradient, learning_rate):
        n = np.size(self.output)
        return np.dot((np.identity(n) - self.output.T) * self.output, output_gradient)

In [11]:
# Some activation functions here
def tanh(x):
    return np.tanh(x)

def tanh_prime(x):
    return 1-np.tanh(x)**2

def ReLu(x):
    return np.maximum(x,0,x)

def ReLu_prime(x):
    return x > 0

In [13]:
# Loss function MSE
def mse(y_true, y_pred):
    return np.mean(np.power(y_true-y_pred,2))

def mse_prime(y_true, y_pred):
    return 2*(y_pred-y_true)/y_true.size

In [15]:
# Network class
class Network:
    def __init__(self):
        self.layers = []
        self.loss = None
        self.loss_prime = None

    # add layer to network
    def add(self,layer):
        self.layers.append(layer)

    # set loss to use
    def use(self, loss, loss_prime):
        self.loss = loss
        self.loss_prime = loss_prime

    def predict(self, input_data):
        samples = len(input_data)
        result = []

        for i in range(samples):
            # forward propagation
            output = input_data[i]
            for layer in self.layers:
                output = layer.forward_propagation(output)
            result.append(output)
        return result

    # train network
    def fit(self, x_train, y_train, epochs, learning_rate):
        samples= len(x_train)

        for i in range(epochs):
            err = 0
            for j in range(samples):
                output = x_train[j]
                for layer in self.layers:
                    output = layer.forward_propagation(output)

                # compute loss
                err += self.loss(y_train[j], output)
        
                # backward propagation
                error = self.loss_prime(y_train[j],output)
                for layer in reversed(self.layers):
                    error = layer.backward_propagation(error, learning_rate)

            # calculate average error on all samples
            err /= samples
            if i % 10 == 0:
                print('epoch %d/%d   error=%f' % (i+1, epochs, err))

In [56]:
# Test with XOR
# training data
x_train = np.array([[[0,0]], [[0,1]], [[1,0]], [[1,1]]])
y_train = np.array([[[0]], [[1]], [[1]], [[0]]])

# network
net = Network()
net.add(Dense(2, 3))
net.add(Activation(tanh, tanh_prime))
net.add(Dense(3, 1))
net.add(Activation(tanh, tanh_prime))

# train
net.use(mse, mse_prime)
net.fit(x_train, y_train, epochs=5000, learning_rate=0.1)

# test
out = net.predict(x_train)
print(out)

epoch 1/5000   error=0.980193
epoch 101/5000   error=0.232435
epoch 201/5000   error=0.210408
epoch 301/5000   error=0.048100
epoch 401/5000   error=0.002567
epoch 501/5000   error=0.001146
epoch 601/5000   error=0.000715
epoch 701/5000   error=0.000513
epoch 801/5000   error=0.000397
epoch 901/5000   error=0.000322
epoch 1001/5000   error=0.000270
epoch 1101/5000   error=0.000232
epoch 1201/5000   error=0.001391
epoch 1301/5000   error=0.000186
epoch 1401/5000   error=0.000166
epoch 1501/5000   error=0.000150
epoch 1601/5000   error=0.000136
epoch 1701/5000   error=0.000125
epoch 1801/5000   error=0.000116
epoch 1901/5000   error=0.000251
epoch 2001/5000   error=0.000131
epoch 2101/5000   error=0.000096
epoch 2201/5000   error=0.000090
epoch 2301/5000   error=0.000085
epoch 2401/5000   error=0.000080
epoch 2501/5000   error=0.000078
epoch 2601/5000   error=0.000255
epoch 2701/5000   error=0.000106
epoch 2801/5000   error=0.000068
epoch 2901/5000   error=0.000064
epoch 3001/5000   erro

In [19]:
# Train with MNIST
# get training data
dir = os.getcwd() + '\\data'
df = pd.read_csv(dir+'\\train.csv')

df = np.array(df).T
m,n = df.shape

yTrain = df[0]
yTrain = np.eye(10)[yTrain]

xTrain = df[1:n].T
xTrain = xTrain.reshape(xTrain.shape[0], 1, 28*28)
xTrain = xTrain.astype('float32')
xTrain /= 255

In [93]:
# Network
net = Network()
net.add(Dense(28*28, 100))            
net.add(Activation(ReLu, ReLu_prime))
net.add(Dense(100, 50))                 
net.add(Activation(ReLu, ReLu_prime))
net.add(Dense(50, 10))                 
net.add(Activation(tanh, tanh_prime))

net.use(mse, mse_prime)
net.fit(xTrain, yTrain, epochs=200, learning_rate=0.1)

epoch 1/200   error=0.113107
epoch 11/200   error=0.070353
epoch 21/200   error=0.034440
epoch 31/200   error=0.022237
epoch 41/200   error=0.013988
epoch 51/200   error=0.013179
epoch 61/200   error=0.012681
epoch 71/200   error=0.012293
epoch 81/200   error=0.005418
epoch 91/200   error=0.002501
epoch 101/200   error=0.001964
epoch 111/200   error=0.001636
epoch 121/200   error=0.001538
epoch 131/200   error=0.001419
epoch 141/200   error=0.001471
epoch 151/200   error=0.001303
epoch 161/200   error=0.001085
epoch 171/200   error=0.001369
epoch 181/200   error=0.001047
epoch 191/200   error=0.001313


In [94]:
# Test with MNIST
# get test data
dir = os.getcwd() + '\\data'
df = pd.read_csv(dir+'\\test.csv')

xTest = np.array(df)
m,n = xTest.shape

xTest = xTest.reshape(m, 1, 28*28)
xTest = xTest.astype('float32')
xTest /= 255

pred = net.predict(xTest)
pred = np.array([item.argmax() for item in pred])

submission_df = pd.DataFrame({"ImageId": np.arange(1,len(pred)+1), "Label": pred})
submission_df.to_csv("submissionNNmanual_OO.csv", index=False)