In [1]:
import numpy as np
import time
import pickle
import os
import matplotlib.pyplot as plt
import warnings
import pandas as pd

warnings.filterwarnings('ignore')

In [30]:
print("----------------Reading the Data-------------------------")
PATH = os.getcwd()
os.chdir('Alphabets/')

X_train = pd.read_csv('train.csv', sep=',', header=None, index_col=False)
X_test = pd.read_csv('test.csv', sep=',', header=None, index_col=False)
np.random.shuffle(X_train.to_numpy())
train_class = X_train[X_train.columns[-1]]
test_actual_class = X_test[X_test.columns[-1]]

X_train = X_train.drop(X_train.columns[-1], axis=1)
X_test = X_test.drop(X_test.columns[-1], axis=1)

print("----------------Data Reading completed-------------------")

os.chdir('../')

X_train = X_train/255
X_test = X_test/255
m = X_train.shape[0] # Number of Training Samples
n = X_train.shape[1] # Number of input features

print("The total number of training samples = {}".format(m))
print("The number of features = {}".format(n))

----------------Reading the Data-------------------------
----------------Data Reading completed-------------------
The total number of training samples = 13000
The number of features = 784


In [31]:
#To get the one hot encoding of each label
print("--------Perform 1-hot encoding of class labels------------")

train_class_enc = pd.get_dummies(train_class).to_numpy()
test_actual_class_enc = pd.get_dummies(test_actual_class).to_numpy()

--------Perform 1-hot encoding of class labels------------


In [32]:
#Add the intercept term to the data samples both in training and test dataset
X_train = np.hstack((np.ones((m,1)),X_train.to_numpy()))
X_test = np.hstack((np.ones((X_test.shape[0],1)),X_test.to_numpy()))

In [33]:
lr = 0.1
arch_test = [1,5,10,50,100] # Only for single layer
arch = [arch_test[3]] #means one hidden layer with #perceptrons 
batch_size = 100 # Mini-Batch Size
r = np.max(train_class) + 1 # Default value of the number of classes = 26

In [34]:
#Mini-Batch formation
mini_batch = [(X_train[i:i+batch_size,:], train_class_enc[i:i+batch_size]) for i in range(0, m, batch_size)]
print("The number of mini-batches formed is = {}".format(len(mini_batch)))

The number of mini-batches formed is = 130


In [35]:
#Theta Initialization 
#np.random.seed(1)
def theta_init(arch=[50]):
    theta = []
    for i in range(len(arch)+1):
        if i == 0:
            dim0=n+1
            dim1=arch[i]
        elif (i == len(arch)):
            dim0=arch[i-1]
            dim1 = r
        else:
            dim0=arch[i-1]
            dim1= arch[i]

        theta.append((2*np.random.random((dim0, dim1))-1))
        #theta.append(np.zeros((dim0, dim1)))
    return theta

In [36]:
def activation(x):
    return 1/(1+np.exp(-x))

In [37]:
def forward_prop(data, theta):
    fm = []
    fm.append(data)
    for l in range(len(theta)):
        fm.append(activation(np.dot(fm[l], theta[l])))
    return fm

In [38]:
def cost_total(X, theta, Y, m):
    fm = forward_prop(X, theta)
    cost = (1/(2*m))*np.sum((Y-fm[-1])**2)
    return cost

In [39]:
def calc_accuracy(data, theta, actual_class):
    pred_class = forward_prop(data, theta)
    test_pred_class = pred_class[-1]
    for i in range(len(test_pred_class)):
        test_pred_class[i][test_pred_class[i] == np.max(test_pred_class[i])] = 1
        test_pred_class[i][test_pred_class[i] != np.max(test_pred_class[i])] = 0


    test_acc = 0
    for i in range(len(actual_class)):
        if (np.array_equal(test_pred_class[i], actual_class[i])):
            test_acc+=1
    test_acc /= data.shape[0]

    #print("The Test Accuracy of the model = {}%".format(test_acc*100))
    return (test_acc*100)

In [40]:
epochs = []
train_accuracy = []
test_accuracy = []
train_time = []
costs=[]

In [41]:
lr = 0.1

In [None]:
arch_test=[100]
for i in range(len(arch_test)):
    theta = theta_init([arch_test[i], arch_test[i]])
    print(theta[0].shape, theta[1].shape, theta[2].shape)
    fm = forward_prop(X_train, theta)
    
    epoch = 0
    start = time.time()
    cost_init = cost_total(X_train, theta, train_class_enc, m)

    while(True):
        count = 0

        print("Initial Cost for this epoch {} = {}".format(epoch, cost_init))
        
        for b in mini_batch:
            X_b = b[0]
            Y_b = b[1]
            fm = forward_prop(X_b, theta)
            delta = [None]*len(fm)

            if (count % 60 == 0):
                print("Error = "+str(cost_total(X_b, theta, Y_b, batch_size)))
            #Backward Propagation

            for l in range(len(fm)-1, 0, -1):
                if (l == len(fm)-1):
                    delta[l] = ((1/batch_size)*(Y_b - fm[l])*fm[l]*(1-fm[l]))
                else:
                    delta[l]=(np.dot(delta[l+1], theta[l].T)*fm[l]*(1-fm[l]))

            #delta_l1 = np.dot(delta_l2, theta2.T)*l1*(1-l1)

            for t in range(len(theta)):
                theta[t] += lr*np.dot(fm[t].T, delta[t+1]) 

            count+=1
            
        if epoch %10 == 0:
            costs.append(cost_init)
        epoch+=1 #Number of epochs
        
        cost_final = cost_total(X_train, theta, train_class_enc, m)
        
        print("Cost after {} epochs is = {}".format(epoch, cost_final))
        if (abs(cost_final-cost_init) < 1e-06):
            print("cost initial= {} , cost final={} , change in cost= {}".format(cost_init,cost_final, cost_final-cost_init))
            break
        cost_init = cost_final
    epochs.append(epoch)
    train_time.append(time.time()-start)
    train_accuracy.append(calc_accuracy(X_train, theta, train_class_enc))
    test_accuracy.append(calc_accuracy(X_test, theta, test_actual_class_enc))
    print("\n------------------------------------------------------------------------------")
    print("The stats for number of units in the hidden layer = {} are as below:".format(arch_test[i]))
    print("------------------------------------------------------------------------------")
    print("The number of epochs = {}".format(epochs[-1]))
    print("The training time = {}sec".format(train_time[-1]))
    print("The training accuracy is = {}%".format(train_accuracy[-1]))
    print("The test accuracy is = {}%".format(test_accuracy[-1]))
    print("------------------------------------------------------------------------------\n")

(785, 100) (100, 100) (100, 26)
Initial Cost for this epoch 0 = 4.907509036422232
Error = 4.929768060968566
Error = 1.909973083711403
Error = 1.8719101530381232
Cost after 1 epochs is = 1.819762811576304
Initial Cost for this epoch 1 = 1.819762811576304
Error = 1.8158952744639145
Error = 0.9671337138337412
Error = 0.49826478979325717
Cost after 2 epochs is = 0.4965906484809551
Initial Cost for this epoch 2 = 0.4965906484809551
Error = 0.49661686537226957
Error = 0.4968075526395006
Error = 0.48963428591897673
Cost after 3 epochs is = 0.4907493098157923
Initial Cost for this epoch 3 = 0.4907493098157923
Error = 0.49208493252361724
Error = 0.4941793960647315
Error = 0.48766100401298035
Cost after 4 epochs is = 0.48904085292325367
Initial Cost for this epoch 4 = 0.48904085292325367
Error = 0.4908077901229025
Error = 0.4928541018456798
Error = 0.4859190504145806
Cost after 5 epochs is = 0.48755214997250396
Initial Cost for this epoch 5 = 0.48755214997250396
Error = 0.4897313868851457
Error 

Cost after 46 epochs is = 0.3492391576660174
Initial Cost for this epoch 46 = 0.3492391576660174
Error = 0.34962022323508507
Error = 0.358385577127417
Error = 0.3254332280141338
Cost after 47 epochs is = 0.3465207206060776
Initial Cost for this epoch 47 = 0.3465207206060776
Error = 0.34683247911258563
Error = 0.3555411551678823
Error = 0.3230454386582314
Cost after 48 epochs is = 0.34382052650165995
Initial Cost for this epoch 48 = 0.34382052650165995
Error = 0.3441431864181191
Error = 0.352729353944448
Error = 0.32067852832070204
Cost after 49 epochs is = 0.3411336364682423
Initial Cost for this epoch 49 = 0.3411336364682423
Error = 0.3415312949656868
Error = 0.34994986307458853
Error = 0.3183254961176576
Cost after 50 epochs is = 0.3384582034161454
Initial Cost for this epoch 50 = 0.3384582034161454
Error = 0.33897586329761653
Error = 0.34720613796169
Error = 0.3159825263399798
Cost after 51 epochs is = 0.3357955442312997
Initial Cost for this epoch 51 = 0.3357955442312997
Error = 0.

Error = 0.2635513297566716
Error = 0.2342502366813892
Cost after 92 epochs is = 0.25344788086504855
Initial Cost for this epoch 92 = 0.25344788086504855
Error = 0.24378699248496236
Error = 0.26202031122288605
Error = 0.23275653483867387
Cost after 93 epochs is = 0.25200251717862826
Initial Cost for this epoch 93 = 0.25200251717862826
Error = 0.2418281686487233
Error = 0.2605244838798
Error = 0.23129482573474733
Cost after 94 epochs is = 0.2505913567341702
Initial Cost for this epoch 94 = 0.2505913567341702
Error = 0.23991908394080935
Error = 0.25906329120466937
Error = 0.22986349205166104
Cost after 95 epochs is = 0.24921327399743567
Initial Cost for this epoch 95 = 0.24921327399743567
Error = 0.2380603899126123
Error = 0.25763599036305607
Error = 0.22846108161360262
Cost after 96 epochs is = 0.24786707639918892
Initial Cost for this epoch 96 = 0.24786707639918892
Error = 0.2362522147591992
Error = 0.25624170821183034
Error = 0.22708632882663757
Cost after 97 epochs is = 0.246551541272

Error = 0.189564445831052
Cost after 137 epochs is = 0.2063000978172156
Initial Cost for this epoch 137 = 0.2063000978172156
Error = 0.19277361319220007
Error = 0.21234984405211826
Error = 0.18889054157552784
Cost after 138 epochs is = 0.20546040221069906
Initial Cost for this epoch 138 = 0.20546040221069906
Error = 0.19208806224634292
Error = 0.21140101298884
Error = 0.1882215369756324
Cost after 139 epochs is = 0.20463595247651792
Initial Cost for this epoch 139 = 0.20463595247651792
Error = 0.19140497290879602
Error = 0.21046299446595423
Error = 0.18755746606480236
Cost after 140 epochs is = 0.20382643489372756
Initial Cost for this epoch 140 = 0.20382643489372756
Error = 0.19072461568844726
Error = 0.20953520367277023
Error = 0.18689836953995667
Cost after 141 epochs is = 0.2030314421125206
Initial Cost for this epoch 141 = 0.2030314421125206
Error = 0.19004720246091006
Error = 0.20861706356917623
Error = 0.18624428596159245
Cost after 142 epochs is = 0.2022505105771394
Initial Cos

Error = 0.17529752042577884
Error = 0.16329868349003426
Cost after 182 epochs is = 0.17814941414673252
Initial Cost for this epoch 182 = 0.17814941414673252
Error = 0.1634689178631168
Error = 0.17439331008248107
Error = 0.16272731409691446
Cost after 183 epochs is = 0.17762900192812864
Initial Cost for this epoch 183 = 0.17762900192812864
Error = 0.16272667250600073
Error = 0.1734701570761577
Error = 0.1621451655216425
Cost after 184 epochs is = 0.17710554532657902
Initial Cost for this epoch 184 = 0.17710554532657902
Error = 0.1619622430524892
Error = 0.17252754054093403
Error = 0.16155051247827484
Cost after 185 epochs is = 0.1765782231364188
Initial Cost for this epoch 185 = 0.1765782231364188
Error = 0.16117355969117161
Error = 0.17156576341368962
Error = 0.16094168263692868
Cost after 186 epochs is = 0.17604627907493386
Initial Cost for this epoch 186 = 0.17604627907493386
Error = 0.16035909257650005
Error = 0.1705861237619746
Error = 0.16031717023619305
Cost after 187 epochs is =

Error = 0.1357357111847412
Cost after 227 epochs is = 0.15450166124623355
Initial Cost for this epoch 227 = 0.15450166124623355
Error = 0.1328422178341475
Error = 0.13876267303309675
Error = 0.13532937795577524
Cost after 228 epochs is = 0.15407576560814865
Initial Cost for this epoch 228 = 0.15407576560814865
Error = 0.13243044116499106
Error = 0.13825476532865325
Error = 0.13493050698683492
Cost after 229 epochs is = 0.15365455506068726
Initial Cost for this epoch 229 = 0.15365455506068726
Error = 0.13202163698009334
Error = 0.1377547391885349
Error = 0.13453877510447676
Cost after 230 epochs is = 0.1532379252930502
Initial Cost for this epoch 230 = 0.1532379252930502
Error = 0.1316156010247924
Error = 0.13726225161322078
Error = 0.13415387177319563
Cost after 231 epochs is = 0.15282577607241118
Initial Cost for this epoch 231 = 0.15282577607241118
Error = 0.13121215789174606
Error = 0.1367769789648686
Error = 0.13377549897370933
Cost after 232 epochs is = 0.1524180109435455
Initial 

Error = 0.12115643379077018
Error = 0.12186652793003638
Cost after 272 epochs is = 0.13885220169727566
Initial Cost for this epoch 272 = 0.13885220169727566
Error = 0.116481721785719
Error = 0.12083187289471133
Error = 0.12161912158651127
Cost after 273 epochs is = 0.13856524865474565
Initial Cost for this epoch 273 = 0.13856524865474565
Error = 0.1161678244858604
Error = 0.12050979762738603
Error = 0.12137336729312749
Cost after 274 epochs is = 0.13828024839475223
Initial Cost for this epoch 274 = 0.13828024839475223
Error = 0.11585607784063122
Error = 0.12019017979096176
Error = 0.1211292337776451
Cost after 275 epochs is = 0.1379971704910981
Initial Cost for this epoch 275 = 0.1379971704910981
Error = 0.11554646975564144
Error = 0.11987299208674713
Error = 0.12088669098845813
Cost after 276 epochs is = 0.13771598527686285
Initial Cost for this epoch 276 = 0.13771598527686285
Error = 0.11523898763233564
Error = 0.11955820804223613
Error = 0.12064571002152223
Cost after 277 epochs is 

In [24]:
fig = plt.figure()
ax = fig.add_subplot(111)
ax.set_title("Loss with Epochs")
x = np.arange(0,len(costs[5:]))
ax.plot(x, costs[5:], marker='o', label='Train Accuracy')
ax.set_xlabel("number of epochs")
ax.set_ylabel("Train Loss")

plt.legend()
#plt.savefig("accuracy_HiddenUnit_val20per.png", dpi=1000, bbox_inches='tight')
plt.show()

0.5

In [19]:
test_accuracy

[88.75384615384615,
 3.8461538461538463,
 3.8461538461538463,
 3.8461538461538463,
 91.13846153846154]

In [20]:
train_time

[1574.8899838924408,
 9.588012456893921,
 3.52205491065979,
 3.0311527252197266,
 1853.354641675949]

In [21]:
train_accuracy

[97.09230769230768,
 3.8461538461538463,
 3.8461538461538463,
 3.8461538461538463,
 97.99230769230769]