In [24]:
import numpy as np
import pandas as pd
import os
import matplotlib.pyplot as plt


In [2]:
def Load_data(path_of_folders):
    list_of_images=[]
    classes=[]
    list_of_folder_names = os.listdir(path_of_folders)
    k=0
    for each_folder in list_of_folder_names:
        base_path=os.path.join(path_of_folders,each_folder)
        list_of_image_in_folder=os.listdir(base_path)
        list_of_images.extend(map(lambda x: plt.imread(os.path.join(base_path,x)).reshape(1024,),list_of_image_in_folder))
        classes.extend(map(lambda x: k,list_of_image_in_folder))
        k+=1
    images_df = pd.DataFrame(np.array(list_of_images))
    classes = np.array(classes)
    return (images_df,classes)

In [36]:
def Softmax(data,params):
    ans = np.exp(np.matmul(data,params[0])+params[1])
    ans = (ans/(np.sum(ans,axis=1)).reshape(data.shape[0],1))
    return ans

In [81]:
def Neg_log_loss(labels,softmax,theta,regularisation,lamb):
    ans = -np.mean(labels*np.log(softmax))
    if regularisation:
        ans = ans - lamb/(2*softmax.shape[0])*(np.linalg.norm(theta))
    return np.array(ans)

In [39]:
def fdash_thet0(sftmax,labels):
    '''derivative of theta_0 is calculated as summation of labels-Softmax'''
    ans = np.sum(sftmax-labels,axis=0)
    return np.array(ans)/sftmax.shape[0]

In [82]:
def fdash_thet(data,labels,sftmx,theta,regularisation,lamb):
    '''derivative of theta is calculated as (Updated_labels.T x Data).T
       Updated labels = -(labels-Softmax)
       This negative sign is due to Negative Log Loss'''
    ans = np.matmul(np.transpose(data),sftmx-labels)
    if regularisation:
        ans = np.matmul(np.transpose(data),sftmx-labels) - lamb/(2*data.shape[0])*np.sum(theta)
    return np.array(ans)/data.shape[0]

In [77]:
def gradient_descent(training_data,labels,thetas,epsilon,learning_rate,Type=None,epochs=0,batch_size=0,regularisation=None,lamb=0):
    '''
    This function will use Batch Gradient Descent and Mini-Batch Gradient Descent 
    for optimisation depending upon the choice of user.
    
    Parameters
    ----------
    training_data: array-like
        input of shape (m,n) where m represents the number of training samples.
    
    labels: array-like
        input of shape (m,1)
        should not be one hot encoded
    
    thetas: list of arrays [thetas,theta_0]
        they should be pre-initialised
        thetas should be of the shape (n,k) where k is the number of unique labels
        theta_0 should be of the shape (1,k)
    
    epsilon: float
        error tolerance
    
    learning_rate: float
    
    Type: str input, optional
        use 'mini' for Mini-Batch Gradient Descent
        use None for Batch Gradient Descent
    
    epochs: int, optional
        should be used if user has mentioned the type of gradient descent to be used
        
    batch_size: int, optional
        size of the mini batch
        
    regularisation: bool, optional
        Default None
        set to True for l2 regularisation
        
    lamb: int, optional
        a positive value which will be used as regularisation parameter
        
        
    returns a tuple containing thet_f, thet0_f and neg_loss_history
    '''
    assert thetas[0].shape==(training_data.shape[1],len(np.unique(labels)))
    assert thetas[1].shape==(1,len(np.unique(labels)))
    assert epochs>=0
    assert learning_rate>=0
    assert epsilon>=0
    assert lamb>=0
    assert batch_size>=0 and batch_size<training_data.shape[0]
    
    
    thet_i = thetas[0]
    thet0_i = thetas[1]
    
    iterations=[]
    neg_log_history=[]
    deriv=[]
    iden=np.identity(len(np.unique(labels)))
    classes_train = labels
        
    if Type==None:
        training_data_1 = training_data
        classes_tr = np.array(iden[classes_train])
        i=0
        while True:
            sftmx_ini = Softmax(training_data,[thet_i,thet0_i])
            #print('labels',labels.shape)


            thet0_f = thet0_i - learning_rate*(fdash_thet0(sftmx_ini,classes_tr))
            #print('thet0_f',thet0_i.shape)
            thet_f = thet_i - learning_rate*(fdash_thet(training_data_1,classes_tr,sftmx_ini,theta=thet_i,regularisation=regularisation,lamb=lamb))
            #print('thet_f',thet_f.shape)

            sftmx_fin = Softmax(training_data_1,[thet_f,thet0_f])


            neg_loss = abs(Neg_log_loss(classes_tr,sftmx_fin,thet_f,regularisation,lamb) - Neg_log_loss(classes_tr,sftmx_ini,thet_i,regularisation,lamb))
            neg_log_history.append(abs(Neg_log_loss(classes_tr,sftmx_fin,thet_f,regularisation,lamb)))
            iterations.append(i)
            deriv.append(thet_f[:,0])
            if i%500==0:
                print('neg log loss at iteration {} is {} {}'.format(i,neg_log_history[-1],neg_loss))

            i+=1

            if (neg_loss)<epsilon:
                print('neg log loss final at iteration {} is {}'.format(i,neg_log_history[-1]))
                return thet_f,thet0_f,neg_log_history,deriv
            else:
                thet0_i = thet0_f
                thet_i = thet_f

                
    elif Type=='mini':
        #combine data and labels
        shuffled_data = pd.concat([pd.DataFrame(cp.asnumpy(training_data)),pd.DataFrame((classes_train))],axis=1)
        shuffled_data = shuffled_data.sample(frac=1)
        
        #shuffle the data
        training_data,classes_train = np.array(shuffled_data.iloc[:,0:training_data.shape[1]]),np.array(shuffled_data.iloc[:,-1])

        classes_train=classes_train.reshape(training_data.shape[0])
        batches = training_data.shape[0]//batch_size
        
        for e in range(epochs):
            for batch in range(batches):
                training_data_1 = np.array(training_data[batch*batch_size:(batch+1)*batch_size])
                classes_tr = np.array(iden[classes_train[batch*batch_size:(batch+1)*batch_size]])
                
                sftmx_ini = Softmax(training_data_1,[thet_i,thet0_i])

                thet0_f = thet0_i - learning_rate*(fdash_thet0(sftmx_ini,classes_tr))
                thet_f = thet_i - learning_rate*(fdash_thet(training_data_1,classes_tr,sftmx_ini,thet_i,regularisation,lamb))

                sftmx_fin = Softmax(training_data_1,[thet_f,thet0_f])
                

                neg_loss = abs(Neg_log_loss(classes_tr,sftmx_fin,thet_f,regularisation,lamb) - Neg_log_loss(classes_tr,sftmx_ini,thet_i,regularisation,lamb))
                neg_log_history.append(abs(Neg_log_loss(classes_tr,sftmx_fin,thet_f,regularisation,lamb)))
                iterations.append((e,batch))


#                 if (neg_loss)<epsilon:
#                     print('neg log loss final at iteration {} {} is {}'.format(e,batch,neg_loss))
# #                     return thet_f,thet0_f,neg_log_history,deriv
#                 else:
                thet0_i = thet0_f
                thet_i = thet_f
            if e%10==0:
                print('neg log loss after epoch {} is {} {}'.format(e,neg_log_history[-1],neg_loss))
            deriv.append(thet_f[:,0])

        print('neg log loss after epoch {} is {} {}'.format(e,neg_log_history[-1],neg_loss))
    return thet_f,thet0_f,neg_log_history,deriv

In [8]:
# load training data
path_of_train_folders = 'D:/Datasets/DevanagariHandwrittenCharacterDataset/Train'
raw_data,classes_train = Load_data(path_of_train_folders)

In [9]:
# load testing data
path_of_test_folders = 'D:/Datasets/DevanagariHandwrittenCharacterDataset/Test'
test_data,classes_test = Load_data(path_of_test_folders)

Since this is a multi-class LR problem, we will create one-hot-encoded vectors as our labels. This makes it easier to apply Softmax function for probabilities.

In [30]:
iden=np.identity(46)
classes_tr = iden[classes_train]
classes_tr = np.array(classes_tr)
raw_data_copy = raw_data.copy()

In [54]:
#PCA for dimensionality reduction. This will reduce the amount of calculations to be done and will
#improve the execution speed.
cov_matrix = raw_data_copy.cov()
q,lam,qt = np.linalg.svd(cov_matrix)
trace = np.sum(lam)
f_vector=[]
s=0
for i in range(len(lam)):
    s+=lam[i]
    if s/trace>0.97:
        break
    else:
        f_vector.append(q[:,i])
f_vector = np.array(f_vector)
f_vector.shape

(238, 1024)

In [55]:
training_data = np.matmul(cp.array(raw_data_copy),np.transpose(f_vector))
training_data.shape

(78200, 238)

In [88]:
#Run the training process
thet0_i = np.zeros(shape=(1,46))
thet_i = np.zeros(shape=(training_data.shape[1],46))

tf,t0f,nlh,derivs = gradient_descent(training_data=training_data,labels=classes_train,thetas=[thet_i,thet0_i],
                                     epsilon=1e-6,learning_rate=0.001,Type='mini',epochs=200,batch_size=170,
                                     regularisation=True,lamb=0.5)

neg log loss after epoch 0 is 0.07927907947742127 3.112476981098622e-05
neg log loss after epoch 10 is 0.053378486373530286 2.447947030378289e-05
neg log loss after epoch 20 is 0.04173628972844582 2.0219167498697344e-05
neg log loss after epoch 30 is 0.03547434704548097 1.7802997379627594e-05
neg log loss after epoch 40 is 0.03148643480833062 1.6364560580003906e-05
neg log loss after epoch 50 is 0.028644195484804805 1.5425132573316375e-05
neg log loss after epoch 60 is 0.02646291833036317 1.4757293376920638e-05
neg log loss after epoch 70 is 0.024702666412148067 1.4252009725115866e-05
neg log loss after epoch 80 is 0.023230818123721644 1.3852637108013877e-05
neg log loss after epoch 90 is 0.021967605424858018 1.3526968195815003e-05
neg log loss after epoch 100 is 0.02086181118563573 1.325517629501699e-05
neg log loss after epoch 110 is 0.019878810433762602 1.3024247615255058e-05
neg log loss after epoch 120 is 0.018994200084621823 1.2825200439235684e-05
neg log loss after epoch 130 is 

In [89]:
#training accuracy
ans=[]
sft=Softmax(training_data,[tf,t0f])
for i in range(training_data.shape[0]):
    ans.append(np.argmax(sft[i]))
np.count_nonzero(ans==classes_train)/training_data.shape[0]

0.7037468030690537

In [90]:
#testing
classes_te = np.array(iden[classes_test])
test_data_copy = test_data.copy()
test_data_copy = np.matmul(np.array(test_data_copy),np.transpose(f_vector))

In [91]:
#testing accuracy
test_sft=Softmax(test_data_copy,[tf,t0f])
ans_test = []
for i in range(test_data_copy.shape[0]):
    ans_test.append(np.argmax(test_sft[i]))
np.count_nonzero(ans_test==classes_test)/test_data_copy.shape[0]

0.702463768115942

The accuracy can be improved by tuning our hyperparameters