# MNIST classification

This notebook explores performance in MNIST classification using object-oriented deep neural network I implemented using softmax regression

In [1]:
from neural_network import NeuralNetwork
import numpy as np
import struct

# read in mnist dataset
def read_idx(filename):
    with open(filename, 'rb') as f:
        zero, data_type, dims = struct.unpack('>HBB', f.read(4))
        shape = tuple(struct.unpack('>I', f.read(4))[0] for d in range(dims))
        array = np.fromstring(f.read(), dtype=np.uint8).reshape(shape)
        return array.reshape(array.shape[0],-1).T
        
def convert_to_multiclass(Y):
    '''
    Convert Y from shape(1 x m) to shape (10 x m)
    '''
    Y_list = []
    for each in Y[0,:]:
        this_Y = np.zeros((10,))
        this_Y[each] = 1
        Y_list.append(this_Y)
    
    multiclass = np.array(Y_list).T
    return multiclass


def show_numbers(data, start_idx, end_idx):
    side = np.int8(np.sqrt(data.shape[0]))
    for each in range(start_idx, end_idx):
        pixels = ((data[:,each].reshape(side,side) * 255).astype(np.uint8))
        plt.imshow(pixels, cmap=plt.cm.binary)
        plt.show()
        
def model(train_X, test_X, train_Y, test_Y, layer_dims, init_method = 'standard', learning_rate = 0.05, 
          batch_size = 64, num_epochs = 50, optimizer = 'gd', lambd = 0, keep_prob = 1, 
          beta1 = 0.9, beta2 = 0.999, epsilon = 10**-8, print_int = 1, print_costs = True):
    '''Convenience method used to perform modeling'''
    
    nn = NeuralNetwork(layer_dims,init_method=init_method)
    
    costs = nn.train(train_X, train_Y, learning_rate = learning_rate, batch_size = batch_size, 
                     num_epochs = num_epochs, optimizer = optimizer, lambd = lambd,
                     keep_prob = keep_prob, beta1 = beta1, beta2 = beta2, epsilon = epsilon,
                     print_int = print_int, print_costs = print_costs)
    
    
    train_predict = nn.predict(train_X)
    NeuralNetwork.print_accuracy(train_predict, train_Y, dataset_name="Train set")

    test_predict = nn.predict(test_X)
    NeuralNetwork.print_accuracy(test_predict, test_Y, dataset_name="Dev set")
    
    return nn

In [2]:
# load
train_X = read_idx('dataset/train_images_ubyte')
train_Y = read_idx('dataset/train_labels_ubyte')
test_X = read_idx('dataset/test_images_ubyte')
test_Y = read_idx('dataset/test_labels_ubyte')

# normalize
train_X = train_X / np.max(train_X)
test_X = test_X / np.max(test_X)

print("Train shape:\nX: {}\nY: {}".format(train_X.shape,train_Y.shape))

  # Remove the CWD from sys.path while we load stuff.


Train shape:
X: (784, 60000)
Y: (1, 60000)


In [3]:
# convert output data to 2D array (to allow use with softmax)

multiclass_train_Y = convert_to_multiclass(train_Y)
multiclass_test_Y = convert_to_multiclass(test_Y)

print(train_Y[:,:2])
print(multiclass_train_Y[:,:2])

[[5 0]]
[[0. 1.]
 [0. 0.]
 [0. 0.]
 [0. 0.]
 [0. 0.]
 [1. 0.]
 [0. 0.]
 [0. 0.]
 [0. 0.]
 [0. 0.]]


In [4]:
n_x = train_X.shape[0]
n_y = multiclass_train_Y.shape[0]

### Single layer NN (logistic regression)

In [5]:
layer_dims = [n_x,n_y]
nn = model(train_X, test_X, multiclass_train_Y, multiclass_test_Y, layer_dims)

Cost after epoch 0/50: 5.936088549185273
Cost after epoch 1/50: 9.097407274554952
Cost after epoch 2/50: 11.063471526602306
Cost after epoch 3/50: 14.338950909919895
Cost after epoch 4/50: 13.00460203378837
Cost after epoch 5/50: 11.211778738255283
Cost after epoch 6/50: 5.204176630521559
Cost after epoch 7/50: 4.739740649963984
Cost after epoch 8/50: 14.2458075121803
Cost after epoch 9/50: 10.316136264787444
Cost after epoch 10/50: 8.797722249407478
Cost after epoch 11/50: 12.773882787790978
Cost after epoch 12/50: 4.049367174258591
Cost after epoch 13/50: 2.8786409988723802
Cost after epoch 14/50: 10.131550730605678
Cost after epoch 15/50: 1.956743233520785
Cost after epoch 16/50: 12.926262204400317
Cost after epoch 17/50: 6.918485318397735
Cost after epoch 18/50: 9.353000633089614
Cost after epoch 19/50: 2.720676530688244
Cost after epoch 20/50: 8.073585055481978
Cost after epoch 21/50: 9.676306537325704
Cost after epoch 22/50: 7.254928848911033
Cost after epoch 23/50: 8.46622061504

#### Overall, not much overfitting here.  Just need to perform better on training data (high bias).  Therefore need better architecture or to train longer

### 2 layer NN with 100 HU

In [6]:
layer_dims = [n_x,100,n_y]
nn = model(train_X, test_X, multiclass_train_Y, multiclass_test_Y, layer_dims)

Cost after epoch 0/50: 9.613306056424634
Cost after epoch 1/50: 9.122927167419721
Cost after epoch 2/50: 4.202536282923826
Cost after epoch 3/50: 5.623462360216738
Cost after epoch 4/50: 9.467037999129062
Cost after epoch 5/50: 9.599000766280955
Cost after epoch 6/50: 8.834463333758139
Cost after epoch 7/50: 4.91717113681793
Cost after epoch 8/50: 1.5495119669034985
Cost after epoch 9/50: 1.1868075312243227
Cost after epoch 10/50: 2.2973349809640675
Cost after epoch 11/50: 5.91045718531282
Cost after epoch 12/50: 4.512645788659789
Cost after epoch 13/50: 0.48005315508658775
Cost after epoch 14/50: 2.64028532640309
Cost after epoch 15/50: 6.110973503386994
Cost after epoch 16/50: 3.57921696787658
Cost after epoch 17/50: 3.819206034867924
Cost after epoch 18/50: 0.7187427159359527
Cost after epoch 19/50: 1.0820061337734148
Cost after epoch 20/50: 0.7136652579810604
Cost after epoch 21/50: 5.217090761735295
Cost after epoch 22/50: 0.8545827937499285
Cost after epoch 23/50: 2.1070698245105

#### Wow, great performance on train set (bias much improved with only 0.27% error).  Let's try some more architectures

### 2 layer NN with 300 HU

In [7]:
layer_dims = [n_x,300,n_y]
nn = model(train_X, test_X, multiclass_train_Y, multiclass_test_Y, layer_dims)

Cost after epoch 0/50: 5.9681816138079675
Cost after epoch 1/50: 5.167716966832673
Cost after epoch 2/50: 4.910781525010577
Cost after epoch 3/50: 6.7333971309139145
Cost after epoch 4/50: 5.749005001595094
Cost after epoch 5/50: 2.2129575228460556
Cost after epoch 6/50: 10.706185930849177
Cost after epoch 7/50: 8.130948564518759
Cost after epoch 8/50: 4.099771939347459
Cost after epoch 9/50: 7.383392178413406
Cost after epoch 10/50: 3.1406043334121367
Cost after epoch 11/50: 8.737075335510475
Cost after epoch 12/50: 3.075514307247179
Cost after epoch 13/50: 3.0473505046829827
Cost after epoch 14/50: 1.8790954958433765
Cost after epoch 15/50: 5.055506990998065
Cost after epoch 16/50: 1.2284294284579094
Cost after epoch 17/50: 1.5942712468122633
Cost after epoch 18/50: 4.884040039531151
Cost after epoch 19/50: 0.8086234076014627
Cost after epoch 20/50: 1.054636923441886
Cost after epoch 21/50: 0.30792123527533855
Cost after epoch 22/50: 0.6441207033131798
Cost after epoch 23/50: 0.59611

### 2 layer NN with 800 HU

In [8]:
layer_dims = [n_x,800,n_y]
nn = model(train_X, test_X, multiclass_train_Y, multiclass_test_Y, layer_dims)

Cost after epoch 0/50: 8.355840472283738
Cost after epoch 1/50: 15.92123096217971
Cost after epoch 2/50: 3.2095645730185067
Cost after epoch 3/50: 9.606212319937553
Cost after epoch 4/50: 6.152624362678135
Cost after epoch 5/50: 6.646742401001461
Cost after epoch 6/50: 0.9709668343366863
Cost after epoch 7/50: 2.502584622632905
Cost after epoch 8/50: 11.411141494152067
Cost after epoch 9/50: 0.649530298894871
Cost after epoch 10/50: 0.6172386289189543
Cost after epoch 11/50: 3.1409180978432234
Cost after epoch 12/50: 4.197036391659313
Cost after epoch 13/50: 2.210493375539399
Cost after epoch 14/50: 6.746504895788266
Cost after epoch 15/50: 0.7528228353754426
Cost after epoch 16/50: 2.994275552379687
Cost after epoch 17/50: 0.2813141425831936
Cost after epoch 18/50: 2.3465754379492334
Cost after epoch 19/50: 0.33658043677449967
Cost after epoch 20/50: 0.6976421799186874
Cost after epoch 21/50: 0.8575875387408904
Cost after epoch 22/50: 0.9272635980002919
Cost after epoch 23/50: 0.36200

### 3 layer NN with 300 + 100 HU

In [10]:
layer_dims = [n_x,300,100,n_y]
nn = model(train_X, test_X, multiclass_train_Y, multiclass_test_Y, layer_dims)

Cost after epoch 0/50: 17.720276486414523
Cost after epoch 1/50: 14.573733221394559
Cost after epoch 2/50: 5.112767288863813
Cost after epoch 3/50: 1.8586629864132553
Cost after epoch 4/50: 1.9696872883423233
Cost after epoch 5/50: 6.126213833778041
Cost after epoch 6/50: 2.1240206099920096
Cost after epoch 7/50: 3.6507795609068827
Cost after epoch 8/50: 1.0813468429669422
Cost after epoch 9/50: 0.8124234402030417
Cost after epoch 10/50: 0.159510154099897
Cost after epoch 11/50: 0.38548194017194104
Cost after epoch 12/50: 0.42184306773344743
Cost after epoch 13/50: 1.2377491430995766
Cost after epoch 14/50: 0.8190509697253914
Cost after epoch 15/50: 3.9692684583066833
Cost after epoch 16/50: 0.47191151961582706
Cost after epoch 17/50: 0.031214338619493076
Cost after epoch 18/50: 0.5683466958490055
Cost after epoch 19/50: 0.19429074689752446
Cost after epoch 20/50: 0.35153023382126813
Cost after epoch 21/50: 0.5498093350646287
Cost after epoch 22/50: 0.27093805031621543
Cost after epoch

#### As expected increasing complexity of the model has marginal impact on results as training set performance (bias) already high.  Regularization or larger dataset may help decrease variance

In [6]:
layer_dims = [n_x,300,100,n_y]
nn = model(train_X, test_X, multiclass_train_Y, multiclass_test_Y, layer_dims, keep_prob = 0.86)

Cost after epoch 0/50: 23.886216685942053
Cost after epoch 1/50: 9.493746210987526
Cost after epoch 2/50: 3.2902235864504283
Cost after epoch 3/50: 13.409679400243219
Cost after epoch 4/50: 4.204159404731235
Cost after epoch 5/50: 3.262105649480446
Cost after epoch 6/50: 8.75454453833418
Cost after epoch 7/50: 3.2318109479499633
Cost after epoch 8/50: 0.5196074391467999
Cost after epoch 9/50: 0.8689797812053517
Cost after epoch 10/50: 2.7606319330859255
Cost after epoch 11/50: 0.38333601430563513
Cost after epoch 12/50: 3.5748539669921264
Cost after epoch 13/50: 0.9446851629748904
Cost after epoch 14/50: 0.2925058475744569
Cost after epoch 15/50: 0.4304359239002483
Cost after epoch 16/50: 3.7826303534955987
Cost after epoch 17/50: 5.107105330585888
Cost after epoch 18/50: 2.838112141378349
Cost after epoch 19/50: 1.5327663293918716
Cost after epoch 20/50: 1.1780137814518812
Cost after epoch 21/50: 0.16899866271616332
Cost after epoch 22/50: 0.4191272519296817
Cost after epoch 23/50: 0.

#### Implementing dropout did help a bit with variance, but ~1.7% error still remains

In [7]:
layer_dims = [n_x,300,100,n_y]
nn = model(train_X, test_X, multiclass_train_Y, multiclass_test_Y, layer_dims, keep_prob = 0.86, num_epochs=75)

Cost after epoch 0/75: 31.2507897579779
Cost after epoch 1/75: 5.059678218265086
Cost after epoch 2/75: 15.821475747774056
Cost after epoch 3/75: 5.54629267642279
Cost after epoch 4/75: 6.829270574295787
Cost after epoch 5/75: 4.68392157691227
Cost after epoch 6/75: 4.150069575619378
Cost after epoch 7/75: 1.6395259951866563
Cost after epoch 8/75: 2.6854260583030882
Cost after epoch 9/75: 0.49685811774902466
Cost after epoch 10/75: 0.644892067476362
Cost after epoch 11/75: 0.905927163431669
Cost after epoch 12/75: 2.273519096402857
Cost after epoch 13/75: 1.607673099061223
Cost after epoch 14/75: 0.41108275640082476
Cost after epoch 15/75: 0.10916540213573109
Cost after epoch 16/75: 4.923254298303088
Cost after epoch 17/75: 0.996521911878915
Cost after epoch 18/75: 0.8032113119605925
Cost after epoch 19/75: 0.6390303280450816
Cost after epoch 20/75: 1.595349489893568
Cost after epoch 21/75: 0.33053776718040595
Cost after epoch 22/75: 0.31318445559721975
Cost after epoch 23/75: 0.288859

#### As expected, training longer doesn't increase test set accuracy, just helps with training set accuracy (bias)

In [8]:
layer_dims = [n_x,300,100,n_y]
nn = model(train_X, test_X, multiclass_train_Y, multiclass_test_Y, layer_dims, keep_prob = 0.8, num_epochs=75)

Cost after epoch 0/75: 20.15402986198987
Cost after epoch 1/75: 16.440718884310872
Cost after epoch 2/75: 8.018488134126489
Cost after epoch 3/75: 7.775998882833512
Cost after epoch 4/75: 8.613347989262147
Cost after epoch 5/75: 3.8848817233226525
Cost after epoch 6/75: 10.060491676417541
Cost after epoch 7/75: 1.3442142676765443
Cost after epoch 8/75: 0.9925948917234688
Cost after epoch 9/75: 2.889430850940807
Cost after epoch 10/75: 2.361045013228947
Cost after epoch 11/75: 0.619129487966805
Cost after epoch 12/75: 3.442135100869809
Cost after epoch 13/75: 1.5171638258531632
Cost after epoch 14/75: 1.2714932913607493
Cost after epoch 15/75: 1.2551629890496117
Cost after epoch 16/75: 3.533371102528295
Cost after epoch 17/75: 0.5523354932224976
Cost after epoch 18/75: 0.06850334660501091
Cost after epoch 19/75: 0.2717212086192526
Cost after epoch 20/75: 0.09779773990452532
Cost after epoch 21/75: 3.818322000287265
Cost after epoch 22/75: 3.223984156353758
Cost after epoch 23/75: 0.9205

In [9]:
layer_dims = [n_x,300,100,n_y]
nn = model(train_X, test_X, multiclass_train_Y, multiclass_test_Y, layer_dims, keep_prob = 0.75, num_epochs=75)

Cost after epoch 0/75: 22.13693810347857
Cost after epoch 1/75: 4.763329552626246
Cost after epoch 2/75: 16.85698516231806
Cost after epoch 3/75: 6.910404867869886
Cost after epoch 4/75: 7.94646148312212
Cost after epoch 5/75: 3.032975935694297
Cost after epoch 6/75: 2.42873022868035
Cost after epoch 7/75: 0.41456336957269985
Cost after epoch 8/75: 3.357159836246548
Cost after epoch 9/75: 4.081731826530864
Cost after epoch 10/75: 1.281177455293153
Cost after epoch 11/75: 1.1536128761562714
Cost after epoch 12/75: 2.845972892579802
Cost after epoch 13/75: 1.3767412331179272
Cost after epoch 14/75: 0.9112793873471084
Cost after epoch 15/75: 0.948507090830297
Cost after epoch 16/75: 2.8626467697763744
Cost after epoch 17/75: 0.17840984403903049
Cost after epoch 18/75: 0.34367814906936767
Cost after epoch 19/75: 0.7466404576304722
Cost after epoch 20/75: 0.4263991080027515
Cost after epoch 21/75: 0.9307242713926515
Cost after epoch 22/75: 1.8381438680425553
Cost after epoch 23/75: 0.080237

In [10]:
layer_dims = [n_x,300,100,n_y]
nn = model(train_X, test_X, multiclass_train_Y, multiclass_test_Y, layer_dims, keep_prob = 0.75, batch_size=128, num_epochs=37)

Cost after epoch 0/37: 203.5151948487828
Cost after epoch 1/37: 76.39290870857435
Cost after epoch 2/37: 40.0208873439112
Cost after epoch 3/37: 34.78961540631101
Cost after epoch 4/37: 41.09089147519268
Cost after epoch 5/37: 36.47115061149712
Cost after epoch 6/37: 20.66196768176234
Cost after epoch 7/37: 11.153884174768306
Cost after epoch 8/37: 15.462021001360597
Cost after epoch 9/37: 18.278632665918877
Cost after epoch 10/37: 13.185858254407712
Cost after epoch 11/37: 18.051065876934935
Cost after epoch 12/37: 12.175705859663669
Cost after epoch 13/37: 11.334603234757923
Cost after epoch 14/37: 20.898612133583015
Cost after epoch 15/37: 14.824241252447022
Cost after epoch 16/37: 17.237217800571607
Cost after epoch 17/37: 6.396205699789428
Cost after epoch 18/37: 1.6812831155223495
Cost after epoch 19/37: 8.785620366818643
Cost after epoch 20/37: 5.011284165980015
Cost after epoch 21/37: 10.581264323631496
Cost after epoch 22/37: 6.87705624077591
Cost after epoch 23/37: 13.6522439

In [11]:
layer_dims = [n_x,300,100,n_y]
nn = model(train_X, test_X, multiclass_train_Y, multiclass_test_Y, layer_dims, keep_prob = 0.75, batch_size=128, num_epochs=50)

Cost after epoch 0/50: 210.47232090236122
Cost after epoch 1/50: 58.97995453324618
Cost after epoch 2/50: 43.38719882607248
Cost after epoch 3/50: 48.59046654779985
Cost after epoch 4/50: 25.902830990420192
Cost after epoch 5/50: 29.866048288304167
Cost after epoch 6/50: 30.950213567578672
Cost after epoch 7/50: 17.947395803467177
Cost after epoch 8/50: 26.748775832610498
Cost after epoch 9/50: 13.285733854529784
Cost after epoch 10/50: 13.38115527143611
Cost after epoch 11/50: 24.86286485451406
Cost after epoch 12/50: 13.59771934457896
Cost after epoch 13/50: 17.582518884369758
Cost after epoch 14/50: 7.550901486900445
Cost after epoch 15/50: 4.189104793006425
Cost after epoch 16/50: 6.774542834122201
Cost after epoch 17/50: 14.194787838163009
Cost after epoch 18/50: 18.561558823206113
Cost after epoch 19/50: 12.851504505394999
Cost after epoch 20/50: 14.596407557065463
Cost after epoch 21/50: 13.366168307025799
Cost after epoch 22/50: 11.218298874522127
Cost after epoch 23/50: 8.2909

In [12]:
layer_dims = [n_x,300,100,n_y]
nn = model(train_X, test_X, multiclass_train_Y, multiclass_test_Y, layer_dims, keep_prob = 0.87, batch_size=128, num_epochs=37)

Cost after epoch 0/37: 203.8845111629553
Cost after epoch 1/37: 55.11491449293612
Cost after epoch 2/37: 39.29792575843126
Cost after epoch 3/37: 29.57543719540113
Cost after epoch 4/37: 26.194112053401135
Cost after epoch 5/37: 24.133014253324717
Cost after epoch 6/37: 44.453729727877615
Cost after epoch 7/37: 19.094352516295018
Cost after epoch 8/37: 11.97385603357225
Cost after epoch 9/37: 16.984007177760002
Cost after epoch 10/37: 10.20011424228687
Cost after epoch 11/37: 14.29889992860059
Cost after epoch 12/37: 13.551035497108867
Cost after epoch 13/37: 18.658543107219085
Cost after epoch 14/37: 23.575710334188507
Cost after epoch 15/37: 5.546935206953768
Cost after epoch 16/37: 14.263577324972918
Cost after epoch 17/37: 5.224933380646222
Cost after epoch 18/37: 7.746769108004111
Cost after epoch 19/37: 5.004515061956633
Cost after epoch 20/37: 17.456548066328537
Cost after epoch 21/37: 3.9790181762683874
Cost after epoch 22/37: 7.870317945969533
Cost after epoch 23/37: 3.2518162

In [14]:
layer_dims = [n_x,300,100,n_y]
nn = model(train_X, test_X, multiclass_train_Y, multiclass_test_Y, layer_dims, lambd = 0.3, batch_size=128, num_epochs=150)

Cost after epoch 0/150: 213.79688149126272
Cost after epoch 1/150: 79.95075608862615
Cost after epoch 2/150: 55.54115945304653
Cost after epoch 3/150: 30.40679367888955
Cost after epoch 4/150: 28.561731171274527
Cost after epoch 5/150: 24.74528333774684
Cost after epoch 6/150: 21.69897342435509
Cost after epoch 7/150: 14.791370370938782
Cost after epoch 8/150: 22.381045820850144
Cost after epoch 9/150: 18.750002662121755
Cost after epoch 10/150: 15.498528648222976
Cost after epoch 11/150: 16.361811124966742
Cost after epoch 12/150: 12.286154507150286
Cost after epoch 13/150: 10.875641676150467
Cost after epoch 14/150: 15.934584838758907
Cost after epoch 15/150: 12.722277438337763
Cost after epoch 16/150: 22.70959996007737
Cost after epoch 17/150: 4.2779946111607945
Cost after epoch 18/150: 17.450133146230925
Cost after epoch 19/150: 6.41669659035071
Cost after epoch 20/150: 13.974217098606568
Cost after epoch 21/150: 9.486221211105288
Cost after epoch 22/150: 9.265094044436506
Cost aft

In [15]:
layer_dims = [n_x,300,100,n_y]
nn = model(train_X, test_X, multiclass_train_Y, multiclass_test_Y, layer_dims, keep_prob = 0.75, batch_size=128, num_epochs=150)

Cost after epoch 0/150: 191.11575436035528
Cost after epoch 1/150: 92.11838829522749
Cost after epoch 2/150: 49.885929987116576
Cost after epoch 3/150: 37.484357301833235
Cost after epoch 4/150: 31.83322663631082
Cost after epoch 5/150: 22.738887172532255
Cost after epoch 6/150: 23.48309120718109
Cost after epoch 7/150: 15.00506157841925
Cost after epoch 8/150: 15.218995931054694
Cost after epoch 9/150: 15.117365569495622
Cost after epoch 10/150: 13.301341190653122
Cost after epoch 11/150: 11.953372145995454
Cost after epoch 12/150: 7.4061628494026985
Cost after epoch 13/150: 5.965059713565438
Cost after epoch 14/150: 12.644399338728073
Cost after epoch 15/150: 3.504732551698198
Cost after epoch 16/150: 17.727368296317746
Cost after epoch 17/150: 11.489685123199488
Cost after epoch 18/150: 8.422821333252054
Cost after epoch 19/150: 10.85133035869328
Cost after epoch 20/150: 6.649805097667295
Cost after epoch 21/150: 10.59615983246277
Cost after epoch 22/150: 8.356858500804199
Cost afte