In [1]:
import numpy as np
import os
from matplotlib import pyplot as plt
import cv2
import math
import tensorflow as tf

### Load Dataset

In [2]:
(X_train,Y_train) , (X_test,Y_test) = tf.keras.datasets.mnist.load_data()
X_train , X_test = X_train/255. , X_test/255.

In [3]:
print(f'X_train shape : {X_train.shape}, Y_train shape : {Y_train.shape}')
print(f'X_test shape : {X_test.shape}, Y_test shape : {Y_test.shape}')

X_train shape : (60000, 28, 28), Y_train shape : (60000,)
X_test shape : (10000, 28, 28), Y_test shape : (10000,)


### Activations

In [4]:
class ReLU :
    def __init__ (self) :
        self.__type__ = 'activation'
        self.__Name__ = 'ReLU'
    
    def feed(self,X) :
        self.input = X
        self.input_shape=X.shape
        self.output = np.maximum(0,X)
        self.output_shape = self.input_shape
        return self.output
    
    def feed_back(self,Z,grad_output,lr) :
        grad = Z > 0
        #print(grad.shape,grad_output.shape)
        return grad_output*grad
    
class Softmax :
    def __init__ (self) :
        self.__type__ = 'activation'
        self.__Name__ = 'Softmax'
    
    def feed(self,X) :
        self.input = X
        e_x = np.exp(X-np.max(X))
        self.output = e_x/e_x.sum()
        return self.output
    
    def grad_feed(self,X) :
        e_x = np.exp(X)
        return (e_x/e_x.sum()) - (e_x**2/(e_x.sum()**2))
    
    def feed_back(self,Z,grad_output,lr) :
        e_x = np.exp(Z)
        out = e_x/ex.sum()
        grad = out*(1-out)
        #grad = e_x/e_x.sum()**2 - (e_x**2/(e_x.sum()**2))
        return grad_output*grad
    
class Sigmoid :
    def __init__ (self) :
        self.__type__ = 'activation'
        self.__Name__ = 'Sigmoid'
    
    def feed(self,X) :
        self.input = X
        self.output = 1/(1+np.exp(-X))
        return self.output
    
    def feed_back(self,Z,grad_output,lr) :
        out = 1/(1+np.exp(-Z))
        grad = out*((1-out)**2)
        return grad_output*grad

## Layers

`Convolution` Layer

`MaxPooling` Layer

`Average Pooling` Layer

### `Flatten` Layer

In [5]:
class Flatten :
    
    def __init__ (self,input_shape=None) :
        self.__Name__ = 'Flatten'
        self.__type__ = 'flat'
        self.input_shape = input_shape
        self.A_F = None
        re = 1
        for i in input_shape :
            re *= i
        self.output_shape = re
        
    def feed(self,X) :
        self.input = X
        self.output = X.ravel()
        return self.output
    
    def Summary(self) :
        l = len(self.__Name__)
        print(f'{self.__Name__}',' '*(20-l),self.input_shape,' '*(20-len(str(self.input_shape))),self.output_shape)

### `Dense` Layer

In [6]:
class Dense :
    
    def __init__ (self,input_shape,N_F,A_F=None) :
        self.__Name__ = 'Dense'
        self.__type__ = 'dense'
        self.input_shape = input_shape
        self.N_F = N_F
        self.A_F = A_F
        self.output_shape = N_F
        self.weights = np.random.randn(self.input_shape, self.output_shape) / np.sqrt(self.input_shape + self.output_shape)
        self.bias = np.random.randn(1, self.output_shape) / np.sqrt(self.input_shape + self.output_shape)
        #self.bias = np.random.randint(0,2,(1,self.output_shape)) / np.sqrt(self.input_shape + self.output_shape)
        
    def feed(self,X) :
        if X.shape[0] != 1 :
            output = []
            output.append(X)
            self.input = np.array(output)
        else :
            self.input = X
        self.output = np.dot(X,self.weights) + self.bias
        return self.output
    
    def feed_back(self, Z , output_error, learning_rate=1e-03):
        input_error = np.dot(output_error, self.weights.T)
        #output_error = output_error.mean(axis=0)*Z.T.shape[0]
        weights_error = np.dot(Z.T, output_error)
        self.weights -= learning_rate * weights_error
        self.bias -= learning_rate * output_error
        return input_error
    
#     def backward(self,grad_output,lr):
#         grad_input = np.dot(grad_output, self.weights.T)
#         grad_weights = np.dot(self.input.T, grad_output)
#         grad_biases = grad_output.mean(axis=0)*input.shape[0]
#         assert grad_weights.shape == self.weights.shape and grad_biases.shape == self.biases.shape
#         self.weights = self.weights - self.learning_rate * grad_weights
#         self.biases = self.biases - self.learning_rate * grad_biases
#         return grad_input
    
    def Summary(self) :
        l = len(self.__Name__)
        print(f'{self.__Name__}',' '*(20-l),self.input_shape,' '*(20-len(str(self.input_shape))),self.output_shape)

In [7]:
class Sequential :
    
    def __init__ (self) :
        self.Layers = []
        self.input_shape = None
        self.Activations = []
        self.acc = []
        self.val_acc = []
        self.error = []
        self.val_error = []
        
    def add(self,Layer) :
        self.Layers.append(Layer)
        if Layer.__type__ != 'activation' :
            if self.input_shape is None :
                self.input_shape = Layer.input_shape
            self.output_shape = Layer.output_shape
        if Layer.A_F is not None :
            if Layer.A_F.lower() == 'softmax' :
                self.Activations.append(Softmax())
            elif Layer.A_F.lower() == 'sigmoid' :
                self.Activations.append(Sigmoid())
            else :
                self.Activations.append(ReLU())
        else :
            self.Activations.append(None)
        
    def compile(self,loss='cross_entropy',metrics=['acc']) :
        self.loss = loss
        self.metrics = metrics
        
    def one_hot_encode(self,labels) :
        Labels = np.zeros((len(labels),len(set(labels))))
        for i,label in enumerate(labels) :
            Labels[i][label] = 1
        return Labels
        
    def fit(self,train_data,valid_data=None,validation_split=.1,epochs=10,lr=0.01) :
        self.epochs = epochs
        if train_data is None :
            raise ValueError('Training Data Required')
        else :
            self.input = train_data[0]
            #self.Total = self.one_hot_encode(train_data[1])
            self.Total = train_data[1]
            self.target = self.Total
            self.No_of_outs = len(set(self.target))
        N = len(self.input)
        
        if valid_data is None :
            if validation_split != 0 :
                n = int(len(self.input)*(1-validation_split))
                K = np.random.randint(0,N,(N))
                for i in range(N-1) :
                    train_data[0][K[i]] = train_data[0][K[i+1]]
                    train_data[1][K[i]] = train_data[1][K[i+1]]
                self.input , self.target = train_data[0][:n] , self.Total[:n]
                self.val_input , self.val_target = train_data[0][n:] , self.Total[n:]
        else :
            self.val_input , self.val_target = valid_data[0] , valid_data[1]
        
        print('\nModel Fitting\n')
        
        for ep in range(epochs) :
            error = 0
            acc = 0
            
            print(f'\nepoch : {ep+1}/{epochs}')
            
            for c,(X,Y) in enumerate(zip(self.input,self.target)) :
                
                L_INPUTS , L_OUTPUTS ,  A_INPUTS , A_OUTPUTS = [] , [] , [] , []
                
                output = X
                loss = None
                out_err = None
                
                """
                    Forward Feeding [ Z = W*X + B ]
                """
                for layer , activation in zip(self.Layers,self.Activations) :
                    L_INPUTS.append(output)
                    output = layer.feed(output) # Feeding to Layer
                    L_OUTPUTS.append(output)
                    if activation is not None :
                        A_INPUTS.append(output)
                        output = activation.feed(output) # applying activation to output of the Layers
                        A_OUTPUTS.append(output)
                        
                activation_output = self.Activations[-1].output # a-l
                output = self.Layers[-1].output # z-l

                """
                    Loss Calculation or Output Error
                """
                
                if self.loss == 'cross_entropy' :
                    loss = self.crossentropy(activation_output,Y)
                    grad_activation = self.Activations[-1].grad_feed(output)
                    out_err = self.grad_crossentropy(output,Y)*grad_activation
                
                """
                    Backward Feeding
                """
                
                for i in range(1,len(self.Layers)-1) :
                    if self.Layers[-i].__Name__ != 'Flatten' :
                        if self.Activations[-i].__Name__ != 'Softmax' :
                            out_err = self.Activations[-i].feed_back(A_INPUTS[-i],out_err,lr)
                        out_err = self.Layers[-i].feed_back(L_INPUTS[-i],out_err,lr)
                
                error = np.mean(loss)
                print('\rerror=%f' % (error),end="")
                
            """
                Accuracy measuring at every epoch
            """
            
            accuracy = sum([y == np.argmax(model.predict(x)) for x, y in zip(self.input, self.target)]) / len(self.input)
            self.acc.append(accuracy)
            
            if 'acc' in self.metrics :
                val_accuracy = sum([y == np.argmax(model.predict(x)) for x, y in zip(self.val_input, self.val_target)]) / len(self.val_input)
                self.val_acc.append(val_accuracy)
                print(' acc=%f , val_acc=%f' % (accuracy , val_accuracy))
            else :
                print('\racc=%f' % (accuracy))
            
        return None
            
    def mse(self,y_true, y_pred):
        return np.mean(np.power(y_true - y_pred, 2))
    
    def mse_prime(self,y_true, y_pred):
        return 2 * (y_pred - y_true) / y_pred.size
    
    def transfer_derivative(self,output):
        return output * (1.0 - output)
    
#     def crossentropy(self,pred,Truth) :
#         GroundTruth = np.zeros(self.No_of_outs)
#         GroundTruth[Truth] = 1
#         return np.mean(GroundTruth*np.log(pred)+(1-GroundTruth)*np.log(1-pred))
    
#     def grad_crossentropy(self,pred,Truth) :
#         GroundTruth = np.zeros(self.No_of_outs)
#         GroundTruth[Truth] = 1
#         return ((GroundTruth/pred)-((1-GroundTruth)/(1-pred)))
    
    def crossentropy(self,logits,reference_answers):
        #print(logits[0][reference_answers]+np.log(np.sum(np.exp(logits),axis=-1)))
        return - logits[0][reference_answers] + np.log(np.sum(np.exp(logits),axis=-1))
    
    def grad_crossentropy(self,logits,reference_answers):
        ones_for_answers = np.zeros_like(logits)
        ones_for_answers[np.arange(len(logits)),reference_answers] = 1
        softmax = np.exp(logits) / np.exp(logits).sum(axis=-1,keepdims=True)
        return (- ones_for_answers + softmax) / logits.shape[0]
    
    def showImg(self,X) :
        plt.imshow(X)
        plt.show()
    
    def predict(self,X):
        output = X
        for layer , activation in zip(self.Layers,self.Activations) :
            output = layer.feed(output)
            if activation is not None :
                output = activation.feed(output)
        return output
    
    def pred_class(self,X) :
        classes = []
        if X.shape == model.input_shape :
            output = self.predict(X)
            return np.argmax(output)
        else :
            for output in X :
                output = self.predict(output)
                classes.append(np.argmax(output))
            return np.array(classes)
    
    def Summary(self) :
        print('='*60)
        print('Model Summary')
        print('_'*60)
        print('Layers',' '*(20-len('Layers')),'Input Shape',' '*(20-len('Input Shape')),'Output Shape',' '*(20-len('Output Shape')))
        print('='*60)
        for Layer in self.Layers :
            if Layer.__type__ != 'activation' :
                Layer.Summary()
                print('_'*60)
        print('='*60)
                
    def save(self,path) :
        f = open(path,'w')
        for i in self.Layers :
            if i.__Name__ != 'Flatten' :
                f.write(i.__Name__,i.N_F,i.A_F,i.weights)
                
    def load_model(self,path) :
        f = open(path,'r')
        for i in self.Layers :
            if i.__Name__ != 'Flatten' :
                return

In [8]:
model = Sequential()
model.add(Flatten(input_shape=X_train[0].shape))
model.add(Dense(input_shape=model.output_shape,N_F=100,A_F='ReLU'))
model.add(Dense(input_shape=model.output_shape,N_F=32,A_F='ReLU'))
model.add(Dense(input_shape=model.output_shape,N_F=len(set(Y_train)),A_F='Softmax'))
model.compile(loss='cross_entropy',metrics=['acc'])

model.Summary() 
model.fit(train_data=(X_train,Y_train),epochs=10,lr=1e-02,validation_split=.1)#,valid_data=(X_test,Y_test))

Model Summary
____________________________________________________________
Layers                Input Shape           Output Shape         
Flatten               (28, 28)              784
____________________________________________________________
Dense                 784                   100
____________________________________________________________
Dense                 100                   32
____________________________________________________________
Dense                 32                    10
____________________________________________________________

Model Fitting


epoch : 1/10
error=2.338188 acc=0.743241 , val_acc=0.762167

epoch : 2/10
error=2.194753

KeyboardInterrupt: 

In [None]:
plt.plot(range(model.epochs),model.acc,c='r',label='acc')
plt.plot(range(model.epochs),model.val_acc,c='b',label='val_acc')
plt.legend()
plt.show()

In [None]:
c = 0
for i in range(len(X_test)) :
    print(model.pred_class(X_test[i]),Y_test[i])
    if model.pred_class(X_test[i]) == Y_test[i] :
        c += 1
print(c*100/len(X_test))

In [None]:
model.pred_class(X_test)

In [None]:
model.pred_class(X_train[0])

In [None]:
c = 0
for i in range(len(X_train)) :
    #print(model.pred_class(X_train[i]),Y_train[i])
    if model.pred_class(X_train[i]) == Y_train[i] :
        c += 1
print(c*100/len(X_train))

In [None]:
img = X_train[1]
print(model.predict(img))
print(model.pred_class(img))
plt.imshow(img)
plt.show()

In [None]:
img = X_train[6]
print(model.predict(img))
print(model.pred_class(img))
plt.imshow(img)
plt.show()