# Dataset Load

In [45]:
from tensorflow.keras import datasets
import pandas as pd
import numpy as np
from tensorflow.keras.utils import to_categorical
import random
import matplotlib.pyplot as plt

#load & split data
data  = datasets.mnist.load_data() #((data, label)_train, (data, label)_test)
(x_train, y_train),(x_test,y_test) = data
x_train = x_train.astype(np.float16)
y_train = y_train.astype(np.float16)
x_test = x_test.astype(np.float16)
y_test = y_test.astype(np.float16)

In [46]:
idx = []

# 0~9에 해당하는 y_test 내의 인덱스 저장 -> idx
for id in range(10):
    for i in range(y_test.shape[0]):
        if y_test[i] == id:
            idx.append(i)
            break

# Preprocessing

In [47]:
print(x_train.shape, y_train.shape, x_test.shape, y_test.shape)
#plt.imshow(x_train[10])

(60000, 28, 28) (60000,) (10000, 28, 28) (10000,)


In [48]:
#차원 변경 28*28 -> 784
x_train = np.reshape(x_train, (60000,784)).T
x_test = np.reshape(x_test, (10000,784)).T


#연속성 제거 one-hot
y_train = to_categorical(y_train).reshape(60000,10).T
y_test= to_categorical(y_test)


In [49]:
print(x_train.shape, y_train.shape, x_test.shape, y_test.shape)
print(y_train[:,3])

(784, 60000) (10, 60000) (784, 10000) (10000, 10)
[0. 1. 0. 0. 0. 0. 0. 0. 0. 0.]


# Activaion ftn

In [153]:
#Relu -googling/softmax
class Relu:
    def __init__(self):
        self.mask = None
      
    def forward(self,x):
        self.mask = (x <= 0)
        out = x.copy()    
        out[self.mask] = 0 
        return out
    
    def backward(self, dout):
        self.mask[self.mask > 0] = 1
        return self.mask * dout

#softmax
class Softmax: #input 10*60000
    def __ini__(self):
        self.exp_a = ""
        self.sum_exp_a = ""
    
    def forward(self,a):
        c = np.max(a, axis=0).reshape(1,-1)
        c_array = np.tile(c, reps=[10,1])
        self.exp_a = np.exp(a-c_array)
        self.sum_exp_a = np.reciprocal(np.tile(self.exp_a.sum(axis=0).reshape(1,-1), reps=[10,1]))
        y=np.multiply(self.exp_a,self.sum_exp_a)
        return y
    
    def backward(self,a):
        c = np.max(a, axis=0).reshape(1,-1)
        c_array = np.tile(c, reps=[10,1])
        self.exp_a = np.exp(a-c_array)
        self.sum_exp_a = np.reciprocal(np.tile(self.exp_a.sum(axis=0).reshape(1,-1), reps=[10,1]))
        round = self.sum_exp_a - (self.sum_exp_a **2)
        return round

# Modeling & Training

In [154]:
W_1 = np.random.rand(300,784)
b_1 = np.random.rand(300,1) 
W_2 = np.random.rand(10,300) * 0.002
b_2 = np.random.rand(10,1) * 0.002

In [161]:
#epoch function
class Epoch:
    def __init__(self):
        self.Z= ""
        self.Y= ""
        self.E= ""
        
    def forward(self,X):
        self.Z = Relu1.forward(np.matmul(W_1,X)+b_1) #784*60k -> 300*60k
        self.Y = Softmax1.forward(np.matmul(W_2,self.Z)+b_2) #300*60k -> 10*60k
        self.E = np.sum((self.Y-y_train)*(self.Y-y_train)) * 0.5 # scalar
        print('Error= ',self.E)
        return self.E

    def backward(self):
        delta = self.Y-y_train
        #b2
        grad_b2 = -np.vdot(delta, Softmax1.backward(np.matmul(W_2,self.Z)+b_2))
        #W2
        tmp_Z = np.copy(self.Z)
        tmp_Z = tmp_Z.reshape(300,-1).T
        tmp = delta * Softmax1.backward(np.matmul(W_2,self.Z)+b_2) #10*60k
        
        grad_W2 = tmp[:,0]*tmp_Z[:,0].T
        for i in range(1, 60000):
        grad_W2 = np.stack(grad_W2, tmp[:,i]*tmp_Z[:,i].T,axis=2)
        #   w*1  * z1,...z300
        #의도
        #w11 ... w1300 z1, z2 ,... z300 -> S1*z1, S1*z2, ... S1300*z300 (tmp[:,0]이 S1,...S10)
        #w21 ... w2300 
        #...
        #w101 ... w10300
        
        #실제: Z가 6만개나 있음
        grad_W2 = -np.sum(tmp_Z * tmp)
        #b1
        tmp_W_2 = np.copy(W_2)
        tmp_W_2 = tmp_W_2.reshape(-1,300).T
        tmp2 = delta * Softmax1.backward(np.matmul(W_2,self.Z)+b_2)
        #print(np.sum(tmp2**2)==0)
        grad_b1 = -np.sum(np.matmul(tmp_W_2,tmp2) * Relu1.backward(np.matmul(W_1,X)+b_1)) #300*60k ->scalar
        #W1
        tmp_X = np.copy(X)
        tmp_X = tmp_X.reshape(784,-1).T
        grad_W1 = -np.sum(np.matmul(np.matmul(tmp_W_2,tmp2) * Relu1.backward(np.matmul(W_1,X)+b_1),tmp_X))
        return grad_b2,grad_W2,grad_b1,grad_W1

# Training

In [162]:
print('processing')
learningrate = 0.3
batch = np.arange(60000)

#[epoch1]
np.random.shuffle(batch)
X = x_train.T[batch].T
y_train = y_train.T[batch].T
Relu1 = Relu()
Softmax1 = Softmax()
Epoch1 = Epoch()
Epoch1.forward(X)
theta = Epoch1.backward()
b_1 -= learningrate * theta[2]
b_2 -= learningrate * theta[0]
W_1 -= learningrate * theta[3]
W_2 -= learningrate * theta[1]
for i in range(4):
    print(theta[i])
#[epoch2]

processing
Error=  54081.99999582856
(10, 60000)


ValueError: operands could not be broadcast together with shapes (60000,300) (10,60000) 

In [160]:
theta

(-1.2176098616780567e-21, -4.1379774348235665e-16, -0.0, -0.0)

In [41]:
#연습장
Relu_test = Relu()
a = np.array([[-1,-2,-3],[1,2,3]])
print(Relu_test.forward(a))
print(Relu_test.backward(a))
print(Epoch.Y)

[[0 0 0]
 [1 2 3]]
[[-1 -2 -3]
 [ 0  0  0]]


AttributeError: type object 'Epoch' has no attribute 'Y'

In [229]:
batch = np.arange(5)
np.random.shuffle(batch)
a = np.arange(20).reshape(-1,4)[batch].reshape(4,-1)
print(a.T)

[[ 4  1 14 19]
 [ 5  2 15  8]
 [ 6  3 16  9]
 [ 7 12 17 10]
 [ 0 13 18 11]]


In [232]:
a.T

array([[ 4,  1, 14, 19],
       [ 5,  2, 15,  8],
       [ 6,  3, 16,  9],
       [ 7, 12, 17, 10],
       [ 0, 13, 18, 11]])