In [1]:
import argparse
import numpy as np
import json
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker

In [2]:
digits = 10

test = np.load('test.npz')
train = np.load('train.npz')

y_test = test['label']
x_test = test['image']
y_train = train['label']
x_train = train['image']

x_train = x_train.reshape(-1, x_train.shape[1] * x_train.shape[2])
x_test = x_test.reshape(-1, x_test.shape[1] * x_test.shape[2])
y_train = y_train.reshape(-1, 1)
y_test = y_test.reshape(-1, 1)

X = np.vstack((x_train, x_test))
y = np.vstack((y_train, y_test))

examples = y.shape[0]
y = y.reshape(1, examples)
Y_new = np.eye(digits)[y.astype('int32')]
Y_new = Y_new.T.reshape(digits, examples)
  #print(y)

m = x_train.shape[0]
X_train, X_test = X[:m].T, X[m:].T
Y_train, Y_test = Y_new[:, :m], Y_new[:, m:]

shuffle_index = np.random.permutation(m)
X_train, Y_train = X_train[:, shuffle_index], Y_train[:, shuffle_index]

In [3]:
def sigmoid(z):
    s = 1. / (1. + np.exp(-z))
    return s


In [4]:
def sigmoid_gradient(z):
#     # To prevent from overflow
#     z = np.clip(z, 1e-15, 1 - 1e-15)
    s = sigmoid(z) * (1 - sigmoid(z))
    return s


In [5]:
def cross_entropy(Y, Y_hat):
    L_sum = np.sum(np.multiply(Y, np.log(Y_hat)))
    m = Y.shape[1]
    L = -(1./m) * L_sum

    return L

In [6]:
def cross_entropy_gradient(Y, Y_hat):
    L = Y_hat - Y
    return L

In [7]:
def softmax(z):
    s = np.exp(z) / np.sum(np.exp(z), axis=0)
    return s

In [8]:
def evaluation(predicts, golds):
    correct = 0
    total = len(predicts)
    assert len(predicts) == len(golds)
    for predict, gold in zip(predicts, golds):
        if predict == gold:
            correct += 1
    accurancy = correct / total
    return accurancy

In [9]:
class Layer:
    def __init__(self, input_, output):
        self.input = input_
        self.output = output  # number of layer node
        self.W = np.random.randn(self.output, self.input) * np.sqrt(1. / self.input)
        self.b = np.zeros((self.output, 1)) * np.sqrt(1. / self.input)
        
    def forward(self, last_layer):
        self.last_layer = last_layer
        layer_output = np.matmul(self.W, self.last_layer) + self.b
#         layer_output = sigmoid(layer_output_temp)
        return layer_output
    
    def back_propagation(self, CE_gradientorgradient, m_batch, learning_rate):
        W_temp = self.W
        W_gradient = (1. / m_batch) * np.matmul(CE_gradientorgradient, self.last_layer.T)
        b_gradient = (1. / m_batch) * np.sum(CE_gradientorgradient, axis=1, keepdims=True)
        self.W_new = self.W - learning_rate * W_gradient
        self.b_new = self.b - learning_rate * b_gradient
        self.W = self.W_new
        self.b = self.b_new
        gradient_temp = np.matmul(W_temp.T, CE_gradientorgradient)
        return gradient_temp
        
    
    

In [10]:
hiddenlayer1 = Layer(784, 400)
hiddenlayer2 = Layer(400, 400)
outputlayer = Layer(400, 10)

In [11]:
def SGD_train_epoch(X_train, Y_train, batch_size = 64, epoch = 10, learning_rate = 0.03):
    for i in range(epoch):
       
        # shuffle training set
        permutation = np.random.permutation(X_train.shape[1])
        X_train_shuffled = X_train[:, permutation]
        Y_train_shuffled = Y_train[:, permutation]
    
        batch_num = len(X_train) // batch_size
        predicts = []
        golds = []
        predicts_test = []
        golds_test = []
        
        for j in range(batch_num):
            begin = j * batch_size
            end = min(begin + batch_size, X_train.shape[1] - 1)
            X = X_train[:, begin:end]
            Y = Y_train[:, begin:end]
            m_batch = end - begin
            
            output1_temp = hiddenlayer1.forward(X)
            output1 = sigmoid(output1_temp)
            output2_temp = hiddenlayer2.forward(output1)
            output2 = sigmoid(output2_temp)
            y_hat_temp = outputlayer.forward(output2)
            y_hat = softmax(y_hat_temp)
            #print(y_hat)
            
            predicts += np.argmax(y_hat, axis = 0).tolist()
            golds += np.argmax(Y, axis = 0).tolist()
            
            
            loss = cross_entropy(Y, y_hat)
            gradient = cross_entropy_gradient(Y, y_hat)
            
            back_output1 = outputlayer.back_propagation(gradient, m_batch, learning_rate)
            back_output2_temp = sigmoid_gradient(output2_temp) * back_output1
            back_output2 = hiddenlayer2.back_propagation(back_output2_temp, m_batch, learning_rate)
            back_output3_temp = sigmoid_gradient(output1_temp) * back_output2
            back_output3 = hiddenlayer1.back_propagation(back_output3_temp, m_batch, learning_rate)
            
            #---------test data-----------
            
            
            output1_temp = hiddenlayer1.forward(X_test)
            output1 = sigmoid(output1_temp)
            output2_temp = hiddenlayer2.forward(output1)
            output2 = sigmoid(output2_temp)
            y_hat_temp = outputlayer.forward(output2)
            y_hat = softmax(y_hat_temp)
#             print(y_hat.shape)
            #print(y_hat)
#             print(Y_test.shape)
            predicts_test += np.argmax(y_hat, axis=0).tolist()
            golds_test += np.argmax(Y_test, axis=0).tolist()
            
            
            loss_test = cross_entropy(Y_test, y_hat)
            
        print('Epoch : ', i + 1, 'training_loss = ', loss, 'test_loss = ', loss_test, 'train_accur = ', evaluation(predicts, golds), 'test_accur = ', evaluation(predicts_test, golds_test))
            
            


In [12]:
SGD_train_epoch(X_train, Y_train, batch_size = 64, epoch = 1000, learning_rate = 0.01)

Epoch :  1 training_loss =  2.3157105331529597 test_loss =  2.2923375338066214 train_accur =  0.11848958333333333 test_accur =  0.13570561719833565
Epoch :  2 training_loss =  2.246985622304252 test_loss =  2.246816531120839 train_accur =  0.22526041666666666 test_accur =  0.18660425335182618
Epoch :  3 training_loss =  2.20975154126718 test_loss =  2.2223439869743635 train_accur =  0.23177083333333334 test_accur =  0.19913025889967637
Epoch :  4 training_loss =  2.179867829612064 test_loss =  2.20036003543622 train_accur =  0.24479166666666666 test_accur =  0.21208969024503005
Epoch :  5 training_loss =  2.1545609267310883 test_loss =  2.1776446236693245 train_accur =  0.265625 test_accur =  0.22659500693481277
Epoch :  6 training_loss =  2.1288969656303323 test_loss =  2.155657838780315 train_accur =  0.29296875 test_accur =  0.24354195561719832
Epoch :  7 training_loss =  2.104601244657802 test_loss =  2.1343042379132053 train_accur =  0.3229166666666667 test_accur =  0.261427993527

Epoch :  58 training_loss =  1.0167767020800276 test_loss =  1.2048859741142548 train_accur =  0.875 test_accur =  0.7413603791030976
Epoch :  59 training_loss =  1.0011870888293823 test_loss =  1.192068212749601 train_accur =  0.8802083333333334 test_accur =  0.7438886962552012
Epoch :  60 training_loss =  0.9858799673348386 test_loss =  1.1790851329976746 train_accur =  0.8815104166666666 test_accur =  0.7459691400832178
Epoch :  61 training_loss =  0.9714486670079439 test_loss =  1.166901486403053 train_accur =  0.8828125 test_accur =  0.748136269070735
Epoch :  62 training_loss =  0.9573588904149427 test_loss =  1.1547232206957727 train_accur =  0.8880208333333334 test_accur =  0.7499855524734166
Epoch :  63 training_loss =  0.9446966980492532 test_loss =  1.1429974488728567 train_accur =  0.8893229166666666 test_accur =  0.7517770457697642
Epoch :  64 training_loss =  0.9308842279483178 test_loss =  1.1311852577394383 train_accur =  0.890625 test_accur =  0.7538141470180305
Epoch 

Epoch :  116 training_loss =  0.49687370389564267 test_loss =  0.7729182380742229 train_accur =  0.9283854166666666 test_accur =  0.8004652103559871
Epoch :  117 training_loss =  0.49198859690804164 test_loss =  0.7691338873295916 train_accur =  0.9283854166666666 test_accur =  0.8006674757281553
Epoch :  118 training_loss =  0.4869838388564155 test_loss =  0.7654407874091701 train_accur =  0.9283854166666666 test_accur =  0.8011875866851595
Epoch :  119 training_loss =  0.48186028076980647 test_loss =  0.7618471808198817 train_accur =  0.9296875 test_accur =  0.801490984743412
Epoch :  120 training_loss =  0.47729734764152576 test_loss =  0.758347622803246 train_accur =  0.9296875 test_accur =  0.8020255432269995
Epoch :  121 training_loss =  0.47250272835760987 test_loss =  0.7548618742452348 train_accur =  0.93359375 test_accur =  0.8028346047156727
Epoch :  122 training_loss =  0.4681136178741182 test_loss =  0.7513921725695862 train_accur =  0.9348958333333334 test_accur =  0.8030

Epoch :  173 training_loss =  0.3095961371967615 test_loss =  0.6315941338538648 train_accur =  0.94921875 test_accur =  0.8160829865926953
Epoch :  174 training_loss =  0.30755468427674737 test_loss =  0.6300653517624253 train_accur =  0.94921875 test_accur =  0.8162130143319464
Epoch :  175 training_loss =  0.30534029635157095 test_loss =  0.62847761467616 train_accur =  0.94921875 test_accur =  0.8163719371243643
Epoch :  176 training_loss =  0.3032824216231974 test_loss =  0.6270593202932431 train_accur =  0.94921875 test_accur =  0.8164008321775312
Epoch :  177 training_loss =  0.30110016341478574 test_loss =  0.6255003748272393 train_accur =  0.94921875 test_accur =  0.8166030975496995
Epoch :  178 training_loss =  0.29897786905907564 test_loss =  0.6240424105580296 train_accur =  0.94921875 test_accur =  0.8167042302357836
Epoch :  179 training_loss =  0.29696131934379977 test_loss =  0.6226435116162009 train_accur =  0.94921875 test_accur =  0.8167909153952844
Epoch :  180 trai

Epoch :  230 training_loss =  0.21975266119190692 test_loss =  0.5671788330099161 train_accur =  0.96484375 test_accur =  0.8229455617198336
Epoch :  231 training_loss =  0.21857583200706215 test_loss =  0.5663766435943901 train_accur =  0.96484375 test_accur =  0.8229889042995839
Epoch :  232 training_loss =  0.21742970983162102 test_loss =  0.5655820879284776 train_accur =  0.9661458333333334 test_accur =  0.8231622746185853
Epoch :  233 training_loss =  0.21669540721907138 test_loss =  0.5647787039634442 train_accur =  0.9661458333333334 test_accur =  0.823220064724919
Epoch :  234 training_loss =  0.21514490709581166 test_loss =  0.5640781712769928 train_accur =  0.9661458333333334 test_accur =  0.8234367776236708
Epoch :  235 training_loss =  0.21391058577389155 test_loss =  0.5632852014678792 train_accur =  0.9661458333333334 test_accur =  0.823509015256588
Epoch :  236 training_loss =  0.21283967682666008 test_loss =  0.5625176429270372 train_accur =  0.9661458333333334 test_acc

Epoch :  287 training_loss =  0.16323237028566445 test_loss =  0.5316076023541894 train_accur =  0.97265625 test_accur =  0.8254160887656034
Epoch :  288 training_loss =  0.16223410914803918 test_loss =  0.5311478814427653 train_accur =  0.97265625 test_accur =  0.8256183541377716
Epoch :  289 training_loss =  0.1618012861697724 test_loss =  0.5305810918073472 train_accur =  0.97265625 test_accur =  0.8254594313453537
Epoch :  290 training_loss =  0.16045817617794464 test_loss =  0.5300752198050821 train_accur =  0.97265625 test_accur =  0.825661696717522
Epoch :  291 training_loss =  0.15974215874122027 test_loss =  0.5296028078539154 train_accur =  0.97265625 test_accur =  0.825791724456773
Epoch :  292 training_loss =  0.15907425687260154 test_loss =  0.529163233710972 train_accur =  0.97265625 test_accur =  0.8259073046694406
Epoch :  293 training_loss =  0.15837123449325882 test_loss =  0.5286975824421852 train_accur =  0.97265625 test_accur =  0.8260084373555248
Epoch :  294 trai

Epoch :  344 training_loss =  0.1288750879466272 test_loss =  0.5101138204745327 train_accur =  0.9817708333333334 test_accur =  0.8284211742949606
Epoch :  345 training_loss =  0.12822197919609468 test_loss =  0.5094592324481269 train_accur =  0.9830729166666666 test_accur =  0.8285512020342117
Epoch :  346 training_loss =  0.1277233501725179 test_loss =  0.5091482158165046 train_accur =  0.984375 test_accur =  0.8286523347202959
Epoch :  347 training_loss =  0.1272178658588114 test_loss =  0.508863337844746 train_accur =  0.9856770833333334 test_accur =  0.8287390198797966
Epoch :  348 training_loss =  0.12672001948096145 test_loss =  0.5085940627648619 train_accur =  0.9856770833333334 test_accur =  0.82875346740638
Epoch :  349 training_loss =  0.126234502626304 test_loss =  0.5083104726920168 train_accur =  0.9856770833333334 test_accur =  0.8287679149329634
Epoch :  350 training_loss =  0.12577185144527886 test_loss =  0.5080214124687561 train_accur =  0.9856770833333334 test_acc

Epoch :  400 training_loss =  0.10323354795417393 test_loss =  0.49623461991465473 train_accur =  0.9895833333333334 test_accur =  0.8306605409153953
Epoch :  401 training_loss =  0.1028494748963171 test_loss =  0.4960241371877655 train_accur =  0.9895833333333334 test_accur =  0.8307327785483125
Epoch :  402 training_loss =  0.10249301215069133 test_loss =  0.4958380735759679 train_accur =  0.9895833333333334 test_accur =  0.8307761211280629
Epoch :  403 training_loss =  0.10214458459018921 test_loss =  0.4956425877541129 train_accur =  0.9895833333333334 test_accur =  0.8308194637078132
Epoch :  404 training_loss =  0.10178873662282713 test_loss =  0.4954477099145283 train_accur =  0.9895833333333334 test_accur =  0.8309639389736477
Epoch :  405 training_loss =  0.10142978906005742 test_loss =  0.4952540626616389 train_accur =  0.9895833333333334 test_accur =  0.831166204345816
Epoch :  406 training_loss =  0.10107043240661741 test_loss =  0.4950658420361414 train_accur =  0.98958333

Epoch :  456 training_loss =  0.08589542016623486 test_loss =  0.48710407157376556 train_accur =  0.9921875 test_accur =  0.8317730004623208
Epoch :  457 training_loss =  0.08564192798279159 test_loss =  0.48703239198762016 train_accur =  0.9921875 test_accur =  0.8318452380952381
Epoch :  458 training_loss =  0.08537409633246333 test_loss =  0.48682917876580556 train_accur =  0.9921875 test_accur =  0.8318596856218216
Epoch :  459 training_loss =  0.08506390152933524 test_loss =  0.4867540368408986 train_accur =  0.9921875 test_accur =  0.8319319232547387
Epoch :  460 training_loss =  0.08475955549950183 test_loss =  0.4866306179607879 train_accur =  0.9921875 test_accur =  0.832004160887656
Epoch :  461 training_loss =  0.08449949041771329 test_loss =  0.4865346056995603 train_accur =  0.9921875 test_accur =  0.8320619509939898
Epoch :  462 training_loss =  0.08424791111999441 test_loss =  0.4863797100963739 train_accur =  0.9921875 test_accur =  0.8321486361534906
Epoch :  463 train

Epoch :  512 training_loss =  0.07318225753094622 test_loss =  0.4801326195425453 train_accur =  0.9947916666666666 test_accur =  0.833420018492834
Epoch :  513 training_loss =  0.07298878400855618 test_loss =  0.4799992003011782 train_accur =  0.9947916666666666 test_accur =  0.8334344660194175
Epoch :  514 training_loss =  0.07279518711047897 test_loss =  0.47987611254773754 train_accur =  0.9947916666666666 test_accur =  0.8335211511789181
Epoch :  515 training_loss =  0.07260072141905699 test_loss =  0.4797520435338714 train_accur =  0.9947916666666666 test_accur =  0.8335644937586685
Epoch :  516 training_loss =  0.07240385373587749 test_loss =  0.4796452668521742 train_accur =  0.9947916666666666 test_accur =  0.8336656264447526
Epoch :  517 training_loss =  0.07220919154521854 test_loss =  0.479544730411491 train_accur =  0.9947916666666666 test_accur =  0.8338245492371706
Epoch :  518 training_loss =  0.0720156177652937 test_loss =  0.47944545189680554 train_accur =  0.99479166

Epoch :  569 training_loss =  0.06267086828597053 test_loss =  0.47487681589819924 train_accur =  0.99609375 test_accur =  0.8343591077207582
Epoch :  570 training_loss =  0.062495973193060446 test_loss =  0.47480346145576535 train_accur =  0.99609375 test_accur =  0.8343735552473417
Epoch :  571 training_loss =  0.062332091544701054 test_loss =  0.474736518495351 train_accur =  0.99609375 test_accur =  0.8343735552473417
Epoch :  572 training_loss =  0.062192182404756824 test_loss =  0.4746679182204719 train_accur =  0.99609375 test_accur =  0.834416897827092
Epoch :  573 training_loss =  0.062033527698105204 test_loss =  0.47460596270001926 train_accur =  0.99609375 test_accur =  0.8346047156726768
Epoch :  574 training_loss =  0.0618752691269943 test_loss =  0.4745698183910703 train_accur =  0.99609375 test_accur =  0.8346336107258437
Epoch :  575 training_loss =  0.06172003452473278 test_loss =  0.47453934115347923 train_accur =  0.99609375 test_accur =  0.83457582061951
Epoch :  5

Epoch :  626 training_loss =  0.05380995891573939 test_loss =  0.47151599171950265 train_accur =  0.9973958333333334 test_accur =  0.8350092464170135
Epoch :  627 training_loss =  0.053680454754710824 test_loss =  0.4714751251805989 train_accur =  0.9973958333333334 test_accur =  0.8349370087840962
Epoch :  628 training_loss =  0.05355256636715815 test_loss =  0.47143320797355465 train_accur =  0.9973958333333334 test_accur =  0.834864771151179
Epoch :  629 training_loss =  0.053426317919346114 test_loss =  0.47138889495548236 train_accur =  0.9973958333333334 test_accur =  0.8348792186777624
Epoch :  630 training_loss =  0.05330219395519183 test_loss =  0.47134230165148505 train_accur =  0.9973958333333334 test_accur =  0.834864771151179
Epoch :  631 training_loss =  0.05318182980469929 test_loss =  0.47129633102446433 train_accur =  0.9973958333333334 test_accur =  0.8348069810448451
Epoch :  632 training_loss =  0.053063354488506714 test_loss =  0.47126481180924135 train_accur =  0.

Epoch :  681 training_loss =  0.04758522171636685 test_loss =  0.4691930260360904 train_accur =  0.9973958333333334 test_accur =  0.8350814840499307
Epoch :  682 training_loss =  0.047475466307892 test_loss =  0.46931080890147703 train_accur =  0.9973958333333334 test_accur =  0.8350381414701803
Epoch :  683 training_loss =  0.04736876046816753 test_loss =  0.4693185469882767 train_accur =  0.9973958333333334 test_accur =  0.8350814840499307
Epoch :  684 training_loss =  0.04726350185605452 test_loss =  0.4691265431472488 train_accur =  0.9973958333333334 test_accur =  0.8351392741562644
Epoch :  685 training_loss =  0.047157977768239365 test_loss =  0.469037680108261 train_accur =  0.9973958333333334 test_accur =  0.8351681692094314
Epoch :  686 training_loss =  0.047051103700165756 test_loss =  0.46898170220370644 train_accur =  0.9973958333333334 test_accur =  0.8351392741562644
Epoch :  687 training_loss =  0.046939826139275255 test_loss =  0.4689350624320719 train_accur =  0.99739

Epoch :  736 training_loss =  0.04214210112155463 test_loss =  0.467355093347777 train_accur =  0.9973958333333334 test_accur =  0.8358183079056866
Epoch :  737 training_loss =  0.042057130900908506 test_loss =  0.4673343142885077 train_accur =  0.9973958333333334 test_accur =  0.8358905455386038
Epoch :  738 training_loss =  0.04197145154337537 test_loss =  0.4673123098429361 train_accur =  0.9973958333333334 test_accur =  0.8358905455386038
Epoch :  739 training_loss =  0.04188727331460346 test_loss =  0.4672886425996311 train_accur =  0.9973958333333334 test_accur =  0.8358760980120203
Epoch :  740 training_loss =  0.041803765855837755 test_loss =  0.4672642848350743 train_accur =  0.9973958333333334 test_accur =  0.8357894128525196
Epoch :  741 training_loss =  0.04172058826096059 test_loss =  0.46723999843148584 train_accur =  0.9973958333333334 test_accur =  0.835702727693019
Epoch :  742 training_loss =  0.04163747775881274 test_loss =  0.46721644884698327 train_accur =  0.99739

Epoch :  791 training_loss =  0.03773979266564515 test_loss =  0.4663385784689366 train_accur =  0.9973958333333334 test_accur =  0.8352693018955155
Epoch :  792 training_loss =  0.037666583712464376 test_loss =  0.4663228989160282 train_accur =  0.9973958333333334 test_accur =  0.835254854368932
Epoch :  793 training_loss =  0.03759633636976807 test_loss =  0.4663115265520548 train_accur =  0.9973958333333334 test_accur =  0.8353270920018493
Epoch :  794 training_loss =  0.037529002207900994 test_loss =  0.46630510252929736 train_accur =  0.9973958333333334 test_accur =  0.8353559870550162
Epoch :  795 training_loss =  0.03746457294071434 test_loss =  0.4663096394705143 train_accur =  0.9973958333333334 test_accur =  0.8353704345815997
Epoch :  796 training_loss =  0.03739680055456636 test_loss =  0.46631498021582224 train_accur =  0.9973958333333334 test_accur =  0.8353993296347665
Epoch :  797 training_loss =  0.0373287411828586 test_loss =  0.46628886716239953 train_accur =  0.9973

Epoch :  846 training_loss =  0.03389080448495707 test_loss =  0.4655742183131607 train_accur =  0.9986979166666666 test_accur =  0.83583275543227
Epoch :  847 training_loss =  0.03383494172897028 test_loss =  0.4655509654025777 train_accur =  0.9986979166666666 test_accur =  0.83583275543227
Epoch :  848 training_loss =  0.03377991754045194 test_loss =  0.4655287364204206 train_accur =  0.9986979166666666 test_accur =  0.8359049930651873
Epoch :  849 training_loss =  0.03372537346117161 test_loss =  0.46550755187607423 train_accur =  0.9986979166666666 test_accur =  0.8359049930651873
Epoch :  850 training_loss =  0.033671778116085946 test_loss =  0.46548528887018376 train_accur =  0.9986979166666666 test_accur =  0.8359194405917707
Epoch :  851 training_loss =  0.033618374676412495 test_loss =  0.46545888502327865 train_accur =  0.9986979166666666 test_accur =  0.8359049930651873
Epoch :  852 training_loss =  0.033563512620585 test_loss =  0.4654288111479937 train_accur =  0.99869791

Epoch :  907 training_loss =  0.030699201266934668 test_loss =  0.46524178362656277 train_accur =  1.0 test_accur =  0.8370896902450301
Epoch :  908 training_loss =  0.03065133133184902 test_loss =  0.4652393472357153 train_accur =  1.0 test_accur =  0.8371763754045307
Epoch :  909 training_loss =  0.030603652757323095 test_loss =  0.4652385711237516 train_accur =  1.0 test_accur =  0.8373497457235322
Epoch :  910 training_loss =  0.03055621289685974 test_loss =  0.46523889641860017 train_accur =  1.0 test_accur =  0.8374075358298659
Epoch :  911 training_loss =  0.03050894226204743 test_loss =  0.4652354894817512 train_accur =  1.0 test_accur =  0.8374219833564494
Epoch :  912 training_loss =  0.030461705214812642 test_loss =  0.46522879047543847 train_accur =  1.0 test_accur =  0.8374797734627831
Epoch :  913 training_loss =  0.030414496101642242 test_loss =  0.4652257733386834 train_accur =  1.0 test_accur =  0.8375231160425335
Epoch :  914 training_loss =  0.030367153131997615 test

Epoch :  968 training_loss =  0.02773389480932189 test_loss =  0.4651112981281309 train_accur =  1.0 test_accur =  0.8366418169209431
Epoch :  969 training_loss =  0.027693860381987574 test_loss =  0.46510820613212894 train_accur =  1.0 test_accur =  0.8365840268146093
Epoch :  970 training_loss =  0.027654254921425134 test_loss =  0.4651110668822527 train_accur =  1.0 test_accur =  0.8365840268146093
Epoch :  971 training_loss =  0.027614805402806752 test_loss =  0.46511894738318316 train_accur =  1.0 test_accur =  0.8365984743411928
Epoch :  972 training_loss =  0.02757544492408557 test_loss =  0.46513020827324664 train_accur =  1.0 test_accur =  0.8365984743411928
Epoch :  973 training_loss =  0.027536213091911333 test_loss =  0.4651450754705373 train_accur =  1.0 test_accur =  0.8365840268146093
Epoch :  974 training_loss =  0.027497121811368548 test_loss =  0.46516732639060937 train_accur =  1.0 test_accur =  0.8365695792880259
Epoch :  975 training_loss =  0.027458244324092176 te

In [13]:
# parameters = {"W1": np.random.randn(400, 784) * np.sqrt(1. / 784),
#               "b1": np.zeros((400, 1)) * np.sqrt(1. / 784),
#               "W2": np.random.randn(400, 400) * np.sqrt(1. / 400),
#               "b2": np.zeros((400, 1)) * np.sqrt(1. / 400),
#               "W3": np.random.randn(digits, 400) * np.sqrt(1. / 400),
#               "b3": np.zeros((digits, 1)) * np.sqrt(1. / 400)}


In [14]:
# def forward(X, parameters):
#     inoutput = {}
#     inoutput["hiddenlayer1_output_temp"] = np.matmul(parameters["W1"], X) + parameters["b1"]
#     inoutput["hiddenlayer1_output"] = sigmoid(inoutput["hiddenlayer1_output_temp"])
    
#     inoutput["hiddenlayer2_output_temp"] = np.matmul(parameters["W2"], inoutput["hiddenlayer1_output"]) + parameters["b2"]
#     inoutput["hiddenlayer2_output"] = sigmoid(inoutput["hiddenlayer2_output_temp"])
    
#     inoutput["outputlayer_output_temp"] = np.matmul(parameters["W3"], inoutput["hiddenlayer2_output"]) + parameters["b3"]
#     inoutput["outputlayer_output"] = softmax(inoutput["outputlayer_output_temp"])
#     #inoutput["outputlayer_output"] = y_hat

#     return inoutput

In [15]:
# def back_propagation(X, Y, parameters, inoutput, m_batch):
#     CE_gradient = inoutput["outputlayer_output"] - Y

#     W3_gradient = (1. / m_batch) * np.matmul(CE_gradient, inoutput["hiddenlayer2_output"].T)
#     b3_gradient = (1. / m_batch) * np.sum(CE_gradient, axis=1, keepdims=True)

#     # ---

#     outputlayer_backward_output = np.matmul(parameters["W3"].T, CE_gradient)
#     hiddenlayer2_backward_input = outputlayer_backward_output * sigmoid_gradient(inoutput["hiddenlayer2_output_temp"])

#     W2_gradient = (1. / m_batch) * np.matmul(hiddenlayer2_backward_input, inoutput["hiddenlayer1_output"].T)
#     b2_gradient = (1. / m_batch) * np.sum(hiddenlayer2_backward_input, axis=1, keepdims=True)

#     # ---

#     hiddenlayer2_backward_output = np.matmul(parameters["W2"].T, hiddenlayer2_backward_input)
#     hiddenlayer1_backward_input = hiddenlayer2_backward_output * sigmoid_gradient(inoutput["hiddenlayer1_output_temp"])

#     W1_gradient = (1. / m_batch) * np.matmul(hiddenlayer1_backward_input, X.T)
#     b1_gradient = (1. / m_batch) * np.sum(hiddenlayer1_backward_input, axis=1, keepdims=True)

#     Wb_gradients = {"W1_gradient": W1_gradient, "b1_gradient": b1_gradient, "W2_gradient": W2_gradient,
#                      "b2_gradient": b2_gradient, "W3_gradient": W3_gradient, "b3_gradient": b3_gradient}

#     return Wb_gradients

In [16]:
# if __name__ == "__main__":
#     epoch = 3
#     batch_size = 64
#     TrainError = []
#     TestError = []
#     for i in range(epoch):
       
#         # shuffle training set
# #         permutation = np.random.permutation(X_train.shape[1])
# #         X_train_shuffled = X_train[:, permutation]
# #         Y_train_shuffled = Y_train[:, permutation]
    
#         batch_num = len(X_train) // batch_size
#         predicts = []
#         golds = []
#         predicts_test = []
#         golds_test = []
#         learning_rate = 0.03
        
        
#         for j in range(batch_num):
#             begin = j * batch_size
#             end = min(begin + batch_size, X_train.shape[1] - 1)
#             X = X_train[:, begin:end]
#             Y = Y_train[:, begin:end]
#             m_batch = end - begin
            
#             inoutput = forward(X, parameters)
#             Wb_gradients = back_propagation(X, Y, parameters, inoutput, m_batch)
            
#             W1_gradient = Wb_gradients["W1_gradient"]
#             b1_gradient = Wb_gradients["b1_gradient"]
#             W2_gradient = Wb_gradients["W2_gradient"]
#             b2_gradient = Wb_gradients["b2_gradient"]
#             W3_gradient = Wb_gradients["W3_gradient"]
#             b3_gradient = Wb_gradients["b3_gradient"]
            
#             parameters["W1"] = parameters["W1"] - learning_rate * W1_gradient
#             parameters["b1"] = parameters["b1"] - learning_rate * b1_gradient
#             parameters["W2"] = parameters["W2"] - learning_rate * W2_gradient
#             parameters["b2"] = parameters["b2"] - learning_rate * b2_gradient
#             parameters["W3"] = parameters["W3"] - learning_rate * W3_gradient
#             parameters["b3"] = parameters["b3"] - learning_rate * b3_gradient
            
            
          
#         inoutput = forward(X_train, parameters)
#         #print(Y.shape)
        
#         #print(inoutput["outputlayer_output"].shape)
#         train_loss = cross_entropy(Y_train, inoutput["outputlayer_output"])

#         predicts += np.argmax(inoutput["outputlayer_output"], axis=0).tolist()
#         golds += np.argmax(Y_train, axis=0).tolist()
        
#         inoutput = forward(X_test, parameters)
#         #print(Y.shape)
        
#         #print(inoutput["outputlayer_output"].shape)
#         test_loss = cross_entropy(Y_test, inoutput["outputlayer_output"])

#         predicts_test += np.argmax(inoutput["outputlayer_output"], axis=0).tolist()
#         golds_test += np.argmax(Y_test, axis=0).tolist()
        
#         print("Epoch {}: training loss = {},  test loss = {}, Train_accur = {},Test_accur = {}".format(
#             i + 1, train_loss, test_loss, evaluation(predicts, golds), evaluation(predicts_test, golds_test)))

#         TrainError.append(1 - evaluation(predicts, golds))
#         TestError.append(1 - evaluation(predicts_test, golds_test))
        
        
# #         new_x_axis = np.arange(0,500, 5)
# #         fig, ax = plt.subplots(1, 1)
# #         print(TrainError.shape)
# #         print(new_x_axis.shape)
# #         ax.plot(new_x_axis, TrainError)
              
        
# #     with open("Train_error_rate.json", mode="w") as stream:
# #         json.dump(TrainError, stream)

# #     with open("Test_error_rate.json", mode="w") as stream:
# #         json.dump(TestError, stream)