In [1]:
import numpy as np
import joblib
import scipy.special
import sys

In [93]:
class MLPClassifier:

    acti_fns = ['relu', 'sigmoid', 'tanh']
    weight_inits = ['random', 'he', 'xavier']
    optimizers = ['gradient_descent','momentum', 'nag', 'adam', 'adagrad', 'rmsprop']
    regularizations = ['l1', 'l2', 'layer_norm', 'batch_norm']
    
    def __init__(self, layers, num_epochs, dropouts, learning_rate = 1e-5, activation_function='relu', optimizer="gradient_descent",
weight_init="random", regularization="l2", batch_size=64, **kwargs):
        
        if (activation_function not in self.acti_fns):
            raise Exception("Incorrect Weight Activation Function")
        if (optimizer not in self.optimizers):
            raise Exception("Incorrect Optimizer")
        if (regularization not in self.regularizations):
            raise Exception("Incorrect Regularizers")
        if (weight_init not in self.weight_inits):
            raise Exception("Incorrect Weight Initialization")
        
        
        self.weights = []
        self.weights_grad = []
        self.inputs = []
        for i in range(len(layers)-1):
            if weight_init == "random":
                w = np.random.random((layers[i]+1,layers[i+1])).astype(np.float128)
                #else : he xavier
            self.weights.append(w)
            self.weights_grad.append(np.zeros(w.shape,dtype=np.float128))
            

    
        if activation_function == 'relu':
            self.acti_fns = self.relu
            self.acti_fns_grad = self.relu_grad
        elif activation_function == 'sigmoid':
            self.acti_fns = self.sigmoid
            self.acti_fns_grad = self.sigmoid_grad
        elif activation_function == 'tanh':
            self.acti_fns = self.tanh
            self.acti_fns_grad = self.tanh_grad
    
    
    
        if optimizer == 'gradient_descent': 
            self.optimizer = self.gradient_descent
            self.gradient_descent_init(learning_rate)
        elif optimizer == 'momentum':
            self.optimizer = self.momentum
            self.momentum_init()
        elif optimizer == 'nag':
            self.optimizer = self.nag
            self.nag_init()
        elif optimizer == 'adam':
            self.optimizer = self.adam
            self.adam_init()
        elif optimizer == 'adagrad':
            self.optimizer = self.adagrad
            self.adagrad_init()
        elif optimizer =='rmsprop':
            self.optimizer = self.rmsprop
            self.rmsprop_init()
    
    
    
#         self.lr = learning_rate

        self.batch_size = batch_size
        self.epochs = num_epochs
        self.dropouts = dropouts
        self.regul = regularization
    
    def fit(self,X,Y):
        
        # make it loop over batches <---
        training_loss = []
        iterations = []
        for e in range(self.epochs):
            rows = np.random.choice(X.shape[0],size = self.batch_size,replace = False)
            x_batch = X[rows,:]
            y_batch = Y[rows]
            one_hot = np.eye(y_batch.max() + 1)[y_batch] # D(s)
            
            
            probs = self.forward_pass(x_batch) # Y(s)           
            predicted_labels = np.argmax(probs,axis = 1)
            loss = self.loss(probs,one_hot)
            acc = self.accuracy(predicted_labels,y_batch)
#             print(probs,loss,acc)
#             sys.exit()
#             print("probs:{},pred:{},y_batch:{},one_hot:{}".format(probs.shape,predicted_labels.shape,y_batch.shape,one_hot.shape))
            
            
            self.backward_pass(probs,one_hot)
            self.optimizer()
            
            print("---> Epoch: {}/{},Batch Size: {},Loss:{} ,Accuracy: {}".format(e+1,epochs,self.batch_size,loss,acc))
            training_loss.append(loss)
            iterations.append(e+1)
            
    def forward_pass(self,X):
        self.inputs =[] # Yi(s)
        
        for i in range(len(self.weights)):
            X = np.append(X,np.ones((X.shape[0],1)),axis=1)
            self.inputs.append(X)
            X = np.matmul(X,self.weights[i])
            if (i < len(self.weights) - 1):
                X = self.acti_fns(X)
            else :
                X = self.softmax(X)
        self.inputs.append(X)
        return X
        
    def backward_pass(self,Y,D):
        self.zero_grad()
        dl_dy = self.loss_grad(Y,D)
        for l in range(len(self.inputs)-1,1,-1):
            if (l == len(self.inputs) -1 ):
                grad = self.softmax_grad(self.inputs[l])
            else: 
                grad = self.acti_fns_grad(self.inputs[l])
            dl_dz = np.multiply(grad,dl_dy)
            for instance in range(dl_dz.shape[0]):
                for i in range(self.weights_grad[l-1].shape[0]):
                    for j in range(self.weights_grad[l-1].shape[1]):
                        self.weights_grad[l-1][i,j] += dl_dz[instance,j]*self.inputs[l-1][instance,i]
            dl_dy = np.matmul(dl_dz,self.weights[l-1].T)
            
    
    
    
    
    
    
    
    def gradient_descent(self):
        for i in range(len(self.weights)):
            self.weights[i] -= self.weights_grad[i]
        
            
    
    def momentum(self):
        pass
    
    def nag(self):
        pass
    
    def adam(self):
        pass
    
    def adagrad(self):
        pass
    
    def rmsprop(self):
        pass
    
    def gradient_descent_init(self,learning_rate):
        """ initialize learing rate
        """
        self.lr = learning_rate
        pass
    
    def momentum_init(self):
        pass
    
    def nag_init(self):
        pass
    
    def adam_init(self):
        pass
    
    def adagrad_init(self):
        pass
    
    def rmsprop_init(self):
        pass
        
        
    
    def predict(self,X):  
        probs = self.predict_proba(X)
        return np.argmax(probs,axis=1)
    
    def predict_proba(self,X):
        return self.forward_pass(X)
        
    def get_params(self):
        return self.weights
        
    def score(self,X,Y):
        pass
        
    
        

    def zero_grad(self):
        for i in range(len(self.weights_grad)):
            self.weights_grad[i] = np.zeros(self.weights_grad[i].shape)
        
    
    
    
    def loss(self,Y,D):
        ''' Y: probs (n_instances,M)
            D: Actual Labels (n_instances,M); M:number of neurons in the last layer
        '''
        return np.multiply(np.log(Y + 1e-9),-D).sum(axis=1)
    
    def loss_grad(self,Y,D):
        '''Y : probabilites (n_instances,M)
           D : Actual Labels(n_instances,M); categorical cross entropy
        '''
        N = Y.copy() + 1e-9
        N = -1/N
        return N
    
    
    def relu(self,X):
        X[X<0] = 0
        return X

    def relu_grad(self,X):
        X[X>=0] = 1
        X[X<0] = 0
        return X
    
    def tanh(self,X):
        return np.tanh(X)
    
    def tanh_grad(self,X):
        return 1 - self.tanh(X)**2
    
    def sigmoid(self,X):
        return 1/(1 + np.exp(-X))
    
    def sigmoid_grad(self,X):
        a = self.sigmoid(X)
        return a*(1 - a)
    
    
    
    def softmax(self,X):
#         X = X.astype(np.float128)
#         X = np.exp(X)/np.sum(np.exp(X),axis= 1)[:,None]
        X = scipy.special.softmax(X,axis=1)
        return X
        
        
    def softmax_grad(self,X):
        ''' there is an other of derivative, however for gradient computation this term suffices
        '''
        S = self.softmax(X)
        out = S*(1- S)
        return out
    
    def accuracy(self,predicted_labels,true_labels):
        s = predicted_labels == true_labels
        return s.sum()/s.shape[0]
        
                  

In [94]:
n = MLPClassifier(layers = [784,512,256,10], dropouts = 0.2,num_epochs=1, learning_rate = 1e-5, activation_function='relu', optimizer="gradient_descent",
weight_init="random")

In [95]:
n.fit(x_train,y_train)

ValueError: matmul: Input operand 1 has a mismatch in its core dimension 0, with gufunc signature (n?,k),(k,m?)->(n?,m?) (size 256 is different from 257)

In [53]:
r = np.matmul(n.inputs[2],n.weights[2])
r = r.astype(np.float128)

In [258]:
softmax(r,axis = 1)

array([[0., 0., 0., 0., 0., 0., 1., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 1., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 1., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 1., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 1., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 1., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 1., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 1., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 1., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 1., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 1., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 1., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 1., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 1., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 1., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 1., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 1., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 1., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 1., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 1., 0., 0., 0.],
       [0., 0., 0., 

In [2]:
def upload_datasets():
    val_set = joblib.load('val_set.pkl')
    train_set = joblib.load('train_set.pkl')
    return train_set,val_set
def preprocess_data(dataset):
    x_temp = dataset['Image'].values
    x = []
    y = dataset['Labels'].values
#     y = np.array(y).reshape((y.shape[0],1))
    for i in range(0,len(x_temp)):
        x.append(np.array(x_temp[i]).reshape(784,)/255)
    x = np.array(x)
    return x,y
train_set, val_set = upload_datasets()
x_train, y_train = preprocess_data(train_set)
x_val , y_val = preprocess_data(val_set)


Unnamed: 0,Image,Labels
0,<PIL.Image.Image image mode=L size=28x28 at 0x...,5
1,<PIL.Image.Image image mode=L size=28x28 at 0x...,0
2,<PIL.Image.Image image mode=L size=28x28 at 0x...,4
3,<PIL.Image.Image image mode=L size=28x28 at 0x...,1
4,<PIL.Image.Image image mode=L size=28x28 at 0x...,9
...,...,...
9995,<PIL.Image.Image image mode=L size=28x28 at 0x...,5
9996,<PIL.Image.Image image mode=L size=28x28 at 0x...,5
9997,<PIL.Image.Image image mode=L size=28x28 at 0x...,5
9998,<PIL.Image.Image image mode=L size=28x28 at 0x...,5


In [291]:
np.eye(y_val.max()+1)[y_val]

array([[0., 0., 0., ..., 1., 0., 0.],
       [0., 0., 1., ..., 0., 0., 0.],
       [0., 1., 0., ..., 0., 0., 0.],
       ...,
       [1., 0., 0., ..., 0., 0., 0.],
       [1., 0., 0., ..., 0., 0., 0.],
       [1., 0., 0., ..., 0., 0., 0.]])