In [1]:
import numpy as np
from sklearn.datasets import fetch_mldata

mnist = fetch_mldata('MNIST original')

X, y = mnist.data, mnist.target
index = np.random.permutation(70000)
X, y = X[index], y[index]

x_train, y_train, x_test, y_test = X[:60000], y[:60000], X[60000:], y[60000:]

In [103]:
class DNI:
    
    def __init__(self):
        
        self.synapticWeights = []
        self.syntheticGradients = []
        
    def add(self, units, input_shape=None):
        
        if not self.synapticWeights:
            if input_shape:
                self.synapticWeights.append(np.random.rand(input_shape, units))
                self.syntheticGradients.append(np.random.rand(units, input_shape))
            return
        else:
            self.synapticWeights.append(np.random.rand(self.synapticWeights[-1].shape[1], units))
            self.syntheticGradients.append(np.random.rand(units, self.syntheticGradients[-1].shape[1]))
    
    def sigmoid(self, X, deriv=False):
        
        if not deriv:
            return 1/(1+np.exp(-X))
        return X*(1-X)
    
    def fit(self, x, Y, batch_size=32, epochs=10, learning_rate=0.1, synGrad=False):
        
        #### Forward Prop ####    
            
        layers = len(self.synapticWeights)
        forward = [0]*layers
        backward = [0]*layers
        
        '''if not synGrad:
            for i in range(layers-1):
                if i==0:
                    self.syntheticGradients.append(np.random.rand(self.synapticWeights[0].shape[0], batch_size))
                else:
                    self.syntheticGradients.append(np.random.rand(self.syntheticGradients[-1].shape[1], batch_size))'''
        
        num_sGrad = len(self.syntheticGradients) - 1
        sGradFor = [0]*num_sGrad
        sGradBack = [0]*num_sGrad
        
        batch = len(x) // batch_size
        
        for k in range(epochs):
            for j in range(batch):
                
                X = x[j*batch_size:j*batch_size + batch_size]
                y = Y[j*batch_size:j*batch_size + batch_size]
                
                z = np.zeros((64, 10))
                for i in range(64):
                    z[i][int(y[i])] = 1
                y = z
                
                for i in range(layers):
                    if i==0:
                        forward[i] = self.sigmoid(X.dot(self.synapticWeights[i]))
                    else:
                        forward[i] = self.sigmoid(forward[i-1].dot(self.synapticWeights[i]))

                for i in range(num_sGrad):
                    sGradFor[i] = self.sigmoid(forward[i].dot(self.syntheticGradients[i]))
    
                if not synGrad:

                    #### Back Prop ####

                    for i in range(layers-1, -1, -1):

                        if i==layers-1:
                            error = forward[i] - y
                            backward[i] = error*self.sigmoid(forward[i], deriv=True)/2
                            if j == batch-1:
                                print("Loss :", np.mean(np.square(error)))
                        else:
                            error = backward[i+1].dot(self.synapticWeights[i+1].T)
                            backward[i] = error*self.sigmoid(forward[i], deriv=True)/2

                    for i in range(layers):

                        if i==0:
                            self.synapticWeights[i] = self.synapticWeights[i] - X.T.dot(backward[i]) * learning_rate
                        else:
                            self.synapticWeights[i] = self.synapticWeights[i] - forward[i-1].T.dot(backward[i]) * learning_rate

                    for i in range(num_sGrad):

                        if i==0:
                            print(backward[i+1].dot(self.synapticWeights[i+1].T).shape)
                            error = sGradFor[i] - X.T.dot(backward[i])
                            
                            sGradBack[i] = error*self.sigmoid(sGradFor[i], deriv=True)/2
                            print(sGradBack[i].shape)
                            self.syntheticGradients[i] = self.syntheticGradients[i] - sGradFor[i]*sGradBack[i] * learning_rate

                        else:
                            error = sGradFor[i] - forward[i-1].T.dot(backward[i])
                            sGradBack[i] = error*self.sigmoid(sGradFor[i], deriv=True)/2
                            self.syntheticGradients[i] = self.syntheticGradients[i] - sGradFor[i]*sGradBack[i] * learning_rate                    

                else:

                    for i in range(layers-1):

                        delta = self.sigmoid(forward[i].dot(self.syntheticGradients[i]))
                        self.synapticWeights[i] = self.synapticWeights[i] - delta * learning_rate

                    error = forward[layers-1] - y
                    d = error*self.sigmoid(forward[layers-1], deriv=True)/2
                    self.synapticWeights[layers-1] = self.synapticWeights[layers-1] - forward[layers-2].T.dot(d)* learning_rate

    def predict(self, X):
        layers = len(self.synapticWeights)
        forward = [0]*layers
        for i in range(layers):
                if i==0:
                    forward[i] = self.sigmoid(np.array(X).dot(self.synapticWeights[i]))
                else:
                    forward[i] = self.sigmoid(forward[i-1].dot(self.synapticWeights[i]))
        return forward[layers-1]
                

In [104]:
x_train = x_train / 255
x_train = x_train.reshape(-1, 784)

In [105]:
### Pretraining ###

batch_size = 64
epochs = 20
learning_rate = 0.01

model = DNI()
model.add(500, input_shape=784)
model.add(300)
model.add(10)