In [None]:
import pandas as pd
import numpy as np
import scipy.stats as s
import os

In [None]:
base_path = "./DevanagariHandwrittenCharacterDataset/Train"

In [None]:
list_of_folders = os.listdir(base_path)

In [None]:
import matplotlib.pyplot as plt

In [None]:
def image_stretching(one_image_path):
    
    image_matrix = plt.imread(one_image_path)
    
    image_matrix = image_matrix.reshape(1,1024)
    
    return image_matrix

In [None]:
stacked_up_images = []

for one_folder in list_of_folders:
    
    one_folder_path = os.path.join(base_path,one_folder)
    
    list_of_images_one_folder = os.listdir(one_folder_path)
    
    stacked_up_images.extend(map(lambda x: image_stretching(os.path.join(one_folder_path,x)),list_of_images_one_folder))

In [None]:
len(stacked_up_images)

In [None]:
stacked_up_images = np.array(stacked_up_images)

In [None]:
stacked_up_images.shape

In [None]:
stacked_up_images = stacked_up_images.reshape(78200,1024)

In [None]:
training_data = pd.DataFrame(stacked_up_images)

In [None]:
labels = []

labels.extend(map(lambda x: x*np.ones((1700,1)),np.arange(0,46)))

In [None]:
labels = np.concatenate(labels)

In [None]:
labels

In [None]:
labels.shape

In [None]:
training_data['labels'] = labels

In [None]:
training_data

In [None]:
training_data['labels']

In [None]:
labels = np.array(training_data['labels'])

In [None]:
labels = labels.reshape(labels.shape[0],1)

In [None]:
labels.shape

In [None]:
labels = np.uint8(labels)

In [None]:
labels

In [None]:
I = np.eye(46,46)

In [None]:
print(I)

In [None]:
O = I[labels]

In [None]:
O = O.reshape(78200,46)

In [None]:
O.shape

In [None]:
O[0:1700,:]

In [None]:
O[1700:3399,:]

In [None]:
O.shape

In [None]:
X = training_data.drop(['labels'],axis=1)

In [None]:
X = np.array(X)

In [None]:
X.shape

In [None]:
sigma = np.cov(X,rowvar=False)

In [None]:
def pca_reduced_data(whole_data_single_cov_mat,preserved_variance):
    
    svd_factorized_matrix = np.linalg.svd(whole_data_single_cov_mat)
    
    eig_vals_total = np.sum(svd_factorized_matrix[1])
    
    eig_vals_sum = 0
    
    i = 0
    
    for lmda in svd_factorized_matrix[1]:
        
        if eig_vals_sum/eig_vals_total > preserved_variance:
            
            break
        
        i+=1
        
        eig_vals_sum += lmda
        
    eig_vecs = svd_factorized_matrix[0][:,0:i]
    
    training_examples = X
    
    reduced_training_examples = np.matmul(training_examples,eig_vecs)
    
    return reduced_training_examples,eig_vecs

In [None]:
normalized_data,eig_vecs_final = pca_reduced_data(sigma,0.95)

In [None]:
normalized_data.shape

In [None]:
M = O.shape[1]

N_train = O.shape[0]

N = normalized_data.shape[1]

In [None]:
M

In [None]:
N_train

In [None]:
N

In [None]:
def P(theta0,theta,normalized_data):
    
    U = np.exp(theta0 + np.matmul(normalized_data,theta))
    
    S = np.sum(U,axis=1).reshape(78200,1)
    
    return U/S

In [None]:
def neg_log_loss(O,P):
    
    O_logP = np.log(P) * O
    
    return -np.mean(np.sum(O_logP,axis=1))

In [None]:
def derivatives(normalized_data,P,O):
    
    del_theta0 = (1/N_train) * np.sum((O-P),axis=0)
    
    del_theta = (1/N_train) * np.matmul(normalized_data.T,(O-P))
    
    return del_theta0,del_theta

In [None]:
tol = 10**(-6)

step_size = 1.1

theta0_initial = np.random.randn(1,M)

theta_initial = np.random.randn(N,M)

iterations = []

neg_log_loss_history = []

i = 0

while(True):
    
    P_initial = P(theta0_initial,theta_initial,normalized_data)
    
    del_theta0_initial, del_theta_initial = derivatives(normalized_data,P_initial,O)
    
    
    
    theta0_final = theta0_initial + step_size *(del_theta0_initial)
    
    theta_final = theta_initial + step_size *(del_theta_initial)
    
    
    P_final = P(theta0_final,theta_final,normalized_data)
    
    
    
    neg_log_loss_initial = neg_log_loss(O,P_initial)
    
    neg_log_loss_final = neg_log_loss(O,P_final)
    
    
    
    if abs(neg_log_loss_final - neg_log_loss_initial) < tol:
        
        break
        
    
    theta0_initial = theta0_final
    
    theta_initial = theta_final
    
    
    iterations.append(i)
    
    neg_log_loss_history.append(neg_log_loss_initial)
    
    print("iteration # =",i,"and neg log loss =",neg_log_loss_initial)
    
    i += 1

In [None]:
P_trained = P(theta0_final,theta_final,normalized_data)

In [None]:
P_trained.shape

In [None]:
predicted_classes_train = np.argmax(P_trained,axis=1)

In [None]:
predicted_classes_train = predicted_classes_train.reshape(predicted_classes_train.shape[0],1)

In [None]:
actual_classes_train = np.argmax(O,axis=1)

In [None]:
actual_classes_train = actual_classes_train.reshape(actual_classes_train.shape[0],1)

In [None]:
correct_count = np.count_nonzero(np.equal(predicted_classes_train,actual_classes_train))

In [None]:
correct_count