In [173]:
%matplotlib inline
import matplotlib.pyplot as plt

import numpy as np

from sklearn import datasets
from sklearn import model_selection

import sys

#load mnist datasets
mnist = datasets.fetch_mldata("MNIST original", data_home=".")

In [210]:
#input data & label preprocessing
X_train, X_test, y_train, y_test = \
    model_selection.train_test_split(mnist.data, mnist.target, test_size = 1/7)

X_train = X_train / 255.
X_test = X_test/ 255.

#add bias
b_train = np.ones((len(X_train), 1))
b_test = np.ones((len(X_test), 1))
X_train = np.append(X_train, b_train, axis = 1)
X_test = np.append(X_test, b_test, axis = 1)

#convert to one-hot label
Y_train = [[0 if i != y_train[j] else 1 for i in range(10)] for j in range(len(y_train))]
Y_test = [[0 if i != y_test[j] else 1 for i in range(10)] for j in range(len(y_test))]
Y_train = np.array(Y_train)
Y_test = np.array(Y_test)

In [265]:
#weight init (-1 ~ 1)
weight = (1.0 - (-1.0)) * np.random.rand(3, 28*28+1, 28*28+1) -1

#define learning rate
lr = 0.001

#define mu for momentum
mu = 0.8

#define alpha, g for RMSProp
alpha = 0.9
g = np.zeros((3, 28*28+1, 28*28+1))

#define epoch
epoch = 20

#define batch size
batch_size = 200

In [107]:
#fully connected layer
def myFCLayer(output_unit, input_data, layer_no):
    w = weight[layer_no, 0:output_unit, 0:len(input_data)]
    z = np.dot(w, input_data)

    return z

In [97]:
#activation
def myActivation(z, activation):
    if activation == "ReLU":
        y = myReLU(z)
        
    elif activation == "softmax":
        y = mySoftmax(len(z), z)
        
    return y

In [232]:
#activation function - ReLU
def myReLU(z):
    y = np.where(z > 0, z, 0)
    
    return y

#activation function - softmax function
"""def mySoftmax(unit_no, input_data):
    e = np.e
    y = []
    for i in range(unit_no):
        s = 0
        for j in range(unit_no):
            s += e ** (input_data[j] - max(input_data))
        y.append((e ** (input_data[i] - max(input_data))) / s)

    return y"""
def mySoftmax(unit_no, input_data):
    c = np.max(input_data)
    exp_a = np.exp(input_data - c)
    sum = np.sum(exp_a)
    y = exp_a / sum
    
    return y

In [262]:
#drop out layer
def myDropOut(input_data, late):
    index = np.random.randint(0, int(len(input_data) * late))
    domask = np.ones(len(input_data))
    domask[index] = 0
    
    return domask


In [77]:
#calculate accuracy
def myCalcAcc(est, label_data):
    est = np.array(est)
    label_data = np.array(label_data)
    
    a = np.argmax(est, axis=1) - np.argmax(label_data, axis=1)
    acc = np.sum(a == 0)
    acc = acc / label_data.shape[0]
    
    return acc

In [240]:
#loss function - Cross Entropy
def myCrossEntropy(est, t):
    E = 0
    for i in range(len(t)):
        Ei = 0
        for j in range(len(t[0])):
            Ei -= t[i][j] * np.log(est[i][j] + 1e-8)
        E += Ei
    
    return E

In [129]:
#calculate δ for CrossEntropy + SoftMax
def myCalcDeltaOut(est, label_data):
    d = est - label_data
    
    return d

def myCalcDelta(delta, step, z):
    z = np.array(z)
    
    wd = np.dot(delta, weight[-step, 0:len(delta[0]), 0:len(z[0])])
    fz = np.where(z > 0, 1, 0)
    d = wd * fz
    
    return d

In [153]:
def myCalcDE(delta, x):
    DE = np.empty((0, len(x[0])))
    for i in range(len(delta[0])):
        nw = np.array([np.dot(delta[:,i], x)])
        DE = np.append(DE, nw, axis=0)

    return DE

In [169]:
#RMSprop
def myWeightUpdate_RMSProp(DE, layer_no):
    for i in range(len(DE)):
        g_t = g[layer_no - 1, 0:len(DE), 0:len(DE[0])]
        w = weight[layer_no - 1, 0:len(DE), 0:len(DE[0])]
        
        g_t[i] = (alpha * g_t[i]) + (1 - alpha) * (DE[i] ** 2)
        w[i] -= (lr / (np.sqrt(g_t[i]) + 1e-8)) * DE[i]
        
        g[layer_no -1, 0:len(DE), 0:len(DE[0])] = g_t
        weight[layer_no - 1, 0:len(DE), 0:len(DE[0])] = w

In [67]:
#Get Mini Batch
def myGetBatch(X, Y, size):
    Y = np.array(Y)
    rnd_index = np.arange(X.shape[0])
    np.random.shuffle(rnd_index)
    X_shuf = X[rnd_index, :]
    Y_shuf = Y[rnd_index, :]
    
    for i in range(0, X.shape[0], size):
        X_batch = X_shuf[i : i + size]
        Y_batch = Y_shuf[i : i + size]
        
        yield X_batch, Y_batch

In [269]:
#main
t_loss = []
test_acc_list = []
test_loss_list = []

for ep in range(epoch):
    train_acc = 0
    train_E = 0
    cnt = 0
    
    #training
    for X_batch, Y_batch in myGetBatch(X_train, Y_train, batch_size):
        est = []
        z1list = []
        x2list = []
        z2list = []
        x3list = []
        z3list = []
        domask1 = []
        domask2 = []
        
        for x1 in X_batch:
            #FC1
            z1 = myFCLayer(512, x1, 0)
            z1 = np.append(z1, 1)           #add bias
            z1list.append(z1)
            x2 = myActivation(z1, "ReLU")
            
            #DO1
            dm1 = myDropOut(x2, 0.2)
            domask1.append(dm1)
            x2 *= dm1
            x2list.append(x2)
            
            #FC2
            z2 = myFCLayer(512, x2, 1)
            z2 = np.append(z2, 1)           #add bias
            z2list.append(z2)
            x3 = myActivation(z2, "ReLU")
            
            #DO2
            dm2 = myDropOut(x3, 0.2)
            domask2.append(dm2)
            x3 *= dm2
            x3list.append(x3)
            
            #FC3
            z3 = myFCLayer(10, x3, 2)
            z3list.append(z3)
            z3 = np.array(z3)
            y = myActivation(z3, "softmax")
            est.append(y)
        
        train_acc += myCalcAcc(est, Y_batch)            
        train_E += myCrossEntropy(est, Y_batch)
        
        #back propagation
        d3 = myCalcDeltaOut(est, Y_batch)
        d2 = myCalcDelta(d3, 1, z2list)
        d1 = myCalcDelta(d2, 2, z1list)
        
        DE3 = myCalcDE(d3, x3list)
        DE2 = myCalcDE(d2, x2list)
        DE1 = myCalcDE(d1, X_batch)
        
        myWeightUpdate_RMSProp(DE3, 3)
        myWeightUpdate_RMSProp(DE2, 2)
        myWeightUpdate_RMSProp(DE1, 1)
        
        cnt += len(X_batch)
        sys.stdout.write("\r%d/%d" % (cnt, len(X_train)))
        
    train_acc = train_acc / (X_train.shape[0] / batch_size)
    train_E = train_E / X_train.shape[0]
    
    
    #testing
    test_est = []
    test_z1list = []
    test_x2list = []
    test_z2list = []
    test_x3list = []
    test_z3list = []
    
    for test_x1 in X_test:
        test_z1 = myFCLayer(512, test_x1, 0)
        test_z1 = np.append(test_z1, 1)           #add bias
        test_z1list.append(test_z1)
        test_x2 = myActivation(test_z1, "ReLU")
        test_x2list.append(test_x2)
        
        test_z2 = myFCLayer(512, test_x2, 1)
        test_z2 = np.append(test_z2, 1)           #add bias
        test_z2list.append(test_z2)
        test_x3 = myActivation(test_z2, "ReLU")
        test_x3list.append(test_x3)
        
        test_z3 = myFCLayer(10, test_x3, 2)
        test_z3list.append(test_z3)
        test_z3 = np.array(test_z3)
        test_y = myActivation(test_z3, "softmax")
        test_est.append(test_y)
        
    test_acc = myCalcAcc(test_est, Y_test)
    test_acc_list.append(test_acc)
    test_E = myCrossEntropy(test_est, Y_test) / X_test.shape[0]
    test_loss_list.append(test_E)
        
    print("  train_acc = " + str(train_acc), end = "")
    print("  train_loss = " + str(train_E), end = "")
    print("  test_acc = " + str(test_acc), end = "")
    print("  test_loss = " + str(test_E))
    
plt.plot(range(epoch), test_acc_list)
plt.show()

200/60000

400/60000

600/60000

800/60000

1000/60000

1200/60000

1400/60000

1600/60000

1800/60000

2000/60000

2200/60000

2400/60000

2600/60000

2800/60000

3000/60000

3200/60000

3400/60000

3600/60000

3800/60000

4000/60000

4200/60000

4400/60000

4600/60000

4800/60000

5000/60000

5200/60000

5400/60000

5600/60000

5800/60000

6000/60000

6200/60000

6400/60000

6600/60000

6800/60000

7000/60000

7200/60000

7400/60000

7600/60000

7800/60000

8000/60000

8200/60000

8400/60000

8600/60000

8800/60000

9000/60000

9200/60000

9400/60000

9600/60000

9800/60000

10000/60000

10200/60000

10400/60000

10600/60000

10800/60000

11000/60000

11200/60000

11400/60000

11600/60000

11800/60000

12000/60000

12200/60000

12400/60000

12600/60000

12800/60000

13000/60000

13200/60000

13400/60000

13600/60000

13800/60000

14000/60000

14200/60000

14400/60000

14600/60000

14800/60000

15000/60000

15200/60000

15400/60000

15600/60000

15800/60000

16000/60000

16200/60000

16400/60000

16600/60000

16800/60000

17000/60000

17200/60000

17400/60000

17600/60000

17800/60000

18000/60000

18200/60000

18400/60000

18600/60000

18800/60000

19000/60000

19200/60000

19400/60000

19600/60000

19800/60000

20000/60000

20200/60000

20400/60000

20600/60000

20800/60000

21000/60000

21200/60000

21400/60000

21600/60000

21800/60000

22000/60000

22200/60000

22400/60000

22600/60000

22800/60000

23000/60000

23200/60000

23400/60000

23600/60000

23800/60000

24000/60000

24200/60000

24400/60000

24600/60000

24800/60000

25000/60000

25200/60000

25400/60000

25600/60000

25800/60000

26000/60000

26200/60000

26400/60000

26600/60000

26800/60000

27000/60000

27200/60000

27400/60000

27600/60000

27800/60000

28000/60000

28200/60000

28400/60000

28600/60000

28800/60000

29000/60000

29200/60000

29400/60000

29600/60000

29800/60000

30000/60000

30200/60000

30400/60000

30600/60000

30800/60000

31000/60000

31200/60000

31400/60000

31600/60000

31800/60000

32000/60000

32200/60000

32400/60000

32600/60000

32800/60000

33000/60000

33200/60000

33400/60000

33600/60000

33800/60000

34000/60000

34200/60000

34400/60000

34600/60000

34800/60000

35000/60000

35200/60000

35400/60000

35600/60000

35800/60000

36000/60000

36200/60000

36400/60000

36600/60000

36800/60000

37000/60000

37200/60000

37400/60000

37600/60000

37800/60000

38000/60000

38200/60000

38400/60000

38600/60000

38800/60000

39000/60000

39200/60000

39400/60000

39600/60000

39800/60000

40000/60000

40200/60000

40400/60000

40600/60000

40800/60000

41000/60000

41200/60000

41400/60000

41600/60000

41800/60000

42000/60000

42200/60000

42400/60000

42600/60000

42800/60000

43000/60000

43200/60000

43400/60000

43600/60000

43800/60000

44000/60000

44200/60000

44400/60000

44600/60000

44800/60000

45000/60000

45200/60000

45400/60000

45600/60000

45800/60000

46000/60000

46200/60000

46400/60000

46600/60000

46800/60000

47000/60000

47200/60000

47400/60000

47600/60000

47800/60000

48000/60000

48200/60000

48400/60000

48600/60000

48800/60000

49000/60000

49200/60000

49400/60000

49600/60000

49800/60000

50000/60000

50200/60000

50400/60000

50600/60000

50800/60000

51000/60000

51200/60000

51400/60000

51600/60000

51800/60000

52000/60000

52200/60000

52400/60000

52600/60000

52800/60000

53000/60000

53200/60000

53400/60000

53600/60000

53800/60000

54000/60000

54200/60000

54400/60000

54600/60000

54800/60000

55000/60000

55200/60000

55400/60000

55600/60000

55800/60000

56000/60000

56200/60000

56400/60000

56600/60000

56800/60000

57000/60000

57200/60000

57400/60000

57600/60000

57800/60000

58000/60000

58200/60000

58400/60000

58600/60000

58800/60000

59000/60000

59200/60000

59400/60000

59600/60000

59800/60000

60000/60000

  train_acc = 0.8238999999999996  train_loss = 3.1061290184747956  test_acc = 0.8985  test_loss = 17528.166635873193


200/60000

400/60000

600/60000

800/60000

1000/60000

1200/60000

1400/60000

1600/60000

1800/60000

2000/60000

2200/60000

2400/60000

2600/60000

2800/60000

3000/60000

3200/60000

3400/60000

3600/60000

3800/60000

4000/60000

4200/60000

4400/60000

4600/60000

4800/60000

5000/60000

5200/60000

5400/60000

5600/60000

5800/60000

6000/60000

6200/60000

6400/60000

6600/60000

6800/60000

7000/60000

7200/60000

7400/60000

7600/60000

7800/60000

8000/60000

8200/60000

8400/60000

8600/60000

8800/60000

9000/60000

9200/60000

9400/60000

9600/60000

9800/60000

10000/60000

10200/60000

10400/60000

10600/60000

10800/60000

11000/60000

11200/60000

11400/60000

11600/60000

11800/60000

12000/60000

12200/60000

12400/60000

12600/60000

12800/60000

13000/60000

13200/60000

13400/60000

KeyboardInterrupt: 