# Dataset Load

In [19]:
from tensorflow.keras import datasets
import pandas as pd
import numpy as np
from tensorflow.keras.utils import to_categorical
import random

#load & split data
data  = datasets.mnist.load_data() #((data, label)_train, (data, label)_test)
(x_train, y_train),(x_test,y_test) = data

In [20]:
idx = []

# 0~9에 해당하는 y_test 내의 인덱스 저장 -> idx
for id in range(10):
    for i in range(y_test.shape[0]):
        if y_test[i] == id:
            idx.append(i)
            break

# Preprocessing

In [21]:
print(x_train.shape, y_train.shape, x_test.shape, y_test.shape)

(60000, 28, 28) (60000,) (10000, 28, 28) (10000,)


In [22]:
#차원 변경 28*28 -> 784
x_train = np.reshape(x_train, (784,60000))
x_test = np.reshape(x_test, (784,10000))


#연속성 제거 one-hot
y_train = to_categorical(y_train).reshape(10,60000)
y_test= to_categorical(y_test).reshape(10,10000)

# Activaion ftn

In [23]:
#Relu -googling/softmax
class Relu:
    def __init__(self):
        self.mask = None
      
    def forward(self,x):
        self.mask = (x <= 0)
        out = x.copy()    
        out[self.mask] = 0 
        return out
    
    def backward(self,dout):
        dout[self.mask] = 0
        dx = dout 
        return dx

#softmax
class Softmax:
    def __ini__(self):
        self.exp_a = ""
        self.sum_exp_a = ""
    
    def forward(self,a):
        c = np.max(a, axis=0).reshape(1,-1)
        c_array = np.tile(c, reps=[10,1])
        self.exp_a = np.exp(a-c_array)
        self.sum_exp_a = np.reciprocal(np.tile(self.exp_a.sum(axis=0).reshape(1,-1), reps=[10,1]))
        y=np.multiply(self.exp_a,self.sum_exp_a)
        return y
    
    def backward(self,a):
        round = self.sum_exp_a - (self.sum_exp_a **2)
        return round

# Modeling & Training

In [24]:
W_1 = np.random.rand(300,784)
b_1 = np.random.rand(300,1) 
W_2 = np.random.rand(10,300) 
b_2 = np.random.rand(10,1)

In [25]:
#epoch function
class Epoch:
    def __init__(self):
        self.Z= ""
        self.Y= ""
        self.E= ""
        
    def forward(self,X):
        self.Z = Relu.forward(np.matmul(W_1,X)+b_1) #784*60k -> 300*60k
        self.Y = Softmax.forward(np.matmul(W_2,self.Z)+b_2) #300*60k -> 10*60k
        self.E = np.sum((self.Y-y_train)*(self.Y-y_train)) * 0.5 # scalar
        return self.E

    def backward(self):
        learning_rate = 0.05
        delta = self.Y-y_train
        #b2
        grad_b2 = -np.vdot(delta, Softmax.backward(np.matmul(W_2,self.Z)+b_2))
        #W2
        tmp_Z = np.copy(self.Z)
        tmp_Z = tmp_Z.reshape(-1,300)
        tmp = np.sum(delta * Softmax.backward(np.matmul(W_2,self.Z)+b_2),axis=0).reshape(-1,1) ##### 이게 문제!
        #print(np.sum(tmp**2)==0)
        grad_W2 = -np.sum(tmp_Z * tmp)
        #b1
        tmp_W_2 = np.copy(W_2)
        tmp_W_2 = tmp_W_2.reshape(300,-1)
        tmp2 = delta * Softmax.backward(np.matmul(W_2,self.Z)+b_2) ##### 이게 문제!
        #print(np.sum(tmp2**2)==0)
        grad_b1 = -np.sum(np.matmul(tmp_W_2,tmp2) * Relu.backward(np.matmul(W_1,X)+b_1)) #300*60k ->scalar
        #W1
        tmp_X = np.copy(X)
        tmp_X = tmp_X.reshape(-1,784)
        grad_W1 = -np.sum(np.matmul(np.matmul(tmp_W_2,tmp2) * Relu.backward(np.matmul(W_1,X)+b_1),tmp_X))
        return grad_b2,grad_W2,grad_b1,grad_W1

# Backpropagation

In [26]:
print('processing')
learningrate = 0.05
#[epoch1]
X = x_train
np.random.shuffle(X)
Relu = Relu()
Softmax = Softmax()
Epoch = Epoch()
Epoch.forward(X)
print(b_1,b_2,W_1,W_2)
b_1 -= learningrate * Epoch.backward()[2]
b_2 -= learningrate * Epoch.backward()[0]
W_1 -= learningrate * Epoch.backward()[3]
W_2 -= learningrate * Epoch.backward()[1]
print(b_1,b_2,W_1,W_2)

processing
[[0.4052682 ]
 [0.50148786]
 [0.37119066]
 [0.03124872]
 [0.56229772]
 [0.90662182]
 [0.11239901]
 [0.98509087]
 [0.89469281]
 [0.89476334]
 [0.96832889]
 [0.95200997]
 [0.34002832]
 [0.9865415 ]
 [0.41087366]
 [0.00800447]
 [0.86011237]
 [0.4192075 ]
 [0.92188345]
 [0.50872288]
 [0.93950605]
 [0.20339709]
 [0.2821457 ]
 [0.34518522]
 [0.38500589]
 [0.35174303]
 [0.60405743]
 [0.62997808]
 [0.039343  ]
 [0.33311432]
 [0.64220273]
 [0.69763057]
 [0.16862729]
 [0.48065492]
 [0.43828819]
 [0.24242951]
 [0.03773937]
 [0.20902847]
 [0.03348429]
 [0.04153771]
 [0.81221205]
 [0.46245128]
 [0.49555077]
 [0.83592035]
 [0.15985599]
 [0.8592081 ]
 [0.09541742]
 [0.72300697]
 [0.39451122]
 [0.20339306]
 [0.21560529]
 [0.28185783]
 [0.9862001 ]
 [0.72230499]
 [0.62846012]
 [0.284905  ]
 [0.70357484]
 [0.42036312]
 [0.54317394]
 [0.20441908]
 [0.8894325 ]
 [0.56894364]
 [0.44794019]
 [0.99441901]
 [0.73095868]
 [0.05812478]
 [0.95892212]
 [0.64974379]
 [0.44408306]
 [0.33717374]
 [0.36625

In [27]:
# W_1, W_2, b_1, b_2
learning rate=0.01
h=0.0001
E(W_1+h)-E(W_1-h) / 2h

SyntaxError: invalid syntax (Temp/ipykernel_8644/3486009183.py, line 2)

In [155]:
#연습장
a = np.ones([5,10]) * 5
a

array([[5., 5., 5., 5., 5., 5., 5., 5., 5., 5.],
       [5., 5., 5., 5., 5., 5., 5., 5., 5., 5.],
       [5., 5., 5., 5., 5., 5., 5., 5., 5., 5.],
       [5., 5., 5., 5., 5., 5., 5., 5., 5., 5.],
       [5., 5., 5., 5., 5., 5., 5., 5., 5., 5.]])