## 오차역전파법(Backpropagation)

In [429]:
class AddNode:
    def __init__(self):
        pass
    
    def forward(self,x,y):
        self.x = x
        self.y = y
        out = self.x+self.y
        return out
    
    def backward(self, dout):
        dx = dout*1
        dy = dout*1
        return dx, dy
    
class MulNode:
    def __init__(self):
        self.x = None
        self.y = None
        
    def forward(self,x,y):
        self.x = x
        self.y = y
        out = self.x*self.y
        return out
    
    def backward(self, dout):
        dx = self.y*dout
        dy = self.x*dout
        return dx, dy
        

In [430]:
apple = 100
apple_num = 2
banana = 300
banana_num = 5
tax = 1.1

In [431]:
### 흐름
# ### forward
# apple_buy_result = apple_buy.forward(apple_price,apple_num)
# banana_buy_result = banana_buy.forward(banana_price,banana_num)
# apple_total = MulNode(apple_buy_result,tax)
# apple_total_price = apple_total.forward(apple_buy_result,tax)
# banana_total = MulNode(banana_buy,tax)
# banana_total_price = banana_total.forward(banana_buy_result,tax)

# total_price = AddNode()
# total_price_result = total_price.forward(apple_total_price,banana_total_price)
# total_price_result

# ### backward
# total_price.backward(total_price_result)
# print(apple_total_price)
# print(banana_total_price)

In [432]:
# mul_apple_layer = MulNode()
# mul_banana_layer = MulNode()

# apple_price = mul_apple_layer.forward(apple_price, apple_num)
# banana_price = mul_banana_layer.forward(banana_price, banana_num)

# apple_total_price = mul_apple_layer.forward(apple_price,tax)
# banana_total_price = mul_banana_layer.forward(banana_price,tax)

# dout = 1
# dx, dy = mul_apple_layer.backward(dout)

In [433]:
mul_apple_layer = MulNode()
mul_apple_tax_layer = MulNode()

In [434]:
apple_price = mul_apple_layer.forward(apple, apple_num)

In [435]:
apple_total_price = mul_apple_tax_layer.forward(apple_price, tax)

In [436]:
apple_total_price

220.00000000000003

In [437]:
dout = 1
dapple, dtax = mul_apple_tax_layer.backward(dout)

In [438]:
print(dapple)
print(dtax)

1.1
200


In [439]:
mul_banana_layer = MulNode()
mul_banana_tax_layer = MulNode()

In [440]:
banana_price = mul_banana_layer.forward(banana,banana_num)
banana_total_price = mul_banana_tax_layer.forward(banana_price,tax)

In [441]:
dout = 1
dbanana, dtax = mul_banana_tax_layer.backward(dout)

In [442]:
print(dbanana)
print(dtax)

1.1
1500


In [443]:
total_layer = AddNode()

In [444]:
total_layer.forward(apple_total_price,banana_total_price)

1870.0000000000002

In [445]:
total_layer.backward(dout=1)

(1, 1)

In [446]:
print(total_layer.x)
print(total_layer.y)

220.00000000000003
1650.0000000000002


In [447]:
mul_apple_layer = MulNode()
mul_banana_layer = MulNode()
add_apple_banana_layer = AddNode()
mul_tax_layer = MulNode()

In [448]:
apple_price = mul_apple_layer.forward(apple, apple_num)
banana_price = mul_banana_layer.forward(banana, banana_num)

In [449]:
all_price = add_apple_banana_layer.forward(apple_price, banana_price)

In [450]:
total_price = mul_tax_layer.forward(all_price,tax)

In [451]:
print(f'apple_price = {apple_price}')
print(f'banana_price = {banana_price}')
print(f'all_price = {all_price}')
print(f'total_price = {total_price}')


apple_price = 200
banana_price = 1500
all_price = 1700
total_price = 1870.0000000000002


In [452]:
### 역전파(backward)

In [453]:
dout = 1
dall_price,dtax = mul_tax_layer.backward(dout)

In [454]:
dall_price

1.1

In [455]:
dapple_price, dbanana_price = add_apple_banana_layer.backward(dall_price)

In [456]:
dapple_price

1.1

In [457]:
dapple, dapple_num = mul_apple_layer.backward(dapple_price)


In [458]:
print(dapple)
print(dapple_num)

2.2
110.00000000000001


In [459]:
dbanana, dbanana_num = mul_banana_layer.backward(dbanana_price)

In [460]:
print(dbanana)
print(dbanana_num)

5.5
330.0


## Relu, Sigmoid, Affine, Loss

In [461]:
import numpy as np

def sigmoid(x):
    return 1/(1+np.exp(-x))

def relu(x):
    return np.maximum(0,x)

def softmax(x):
    c = np.max(x,axis=1).reshape(-1,1)
    x = x-c
    return np.exp(x)/np.sum(np.exp(x),axis=1).reshape(-1,1)

def categorical_crossentropy(y,t):
    return np.mean(-t*np.log(y))

In [462]:
### Relu, Sigmoid, Affine, Loss

class Relu:
    def __init__(self):
        self.mask = None    
        
    def forward(self,x):
        self.mask = (x <=0)
        out = x.copy()
        out[self.mask] = 0
        return out
    
    def backward(self,dout):
        dout[self.mask] = 0
        dx = dout
        return dx
    
class Sigmoid:
    def __init__(self):
        self.out = None
    
    def forward(self,x):
        out = sigmoid(x)
        self.out = out
        return self.out
    def backward(self,dout):
        dx = ((1-self.out)*self.out)*dout
        return dx
    
class Affine:
    def __init__(self,W,b):
        self.W = W
        self.b = b
        self.x = None
        self.origin_shape = None
        self.dW = None
        self.db = None
    
    def forward(self,x):
        self.origin_shape = x.shape
        self.x = x
        out = np.dot(x, self.W) + self.b
        return out
    
    def backward(self,dout):
        dx = np.dot(dout, self.W.T)
        self.dW = np.dot(self.x.T,dout)
        self.db = np.sum(dout, axis=0)
        dx = dx.reshape(self.origin_shape)
        return dx, self.dW, self.db
    
class Loss:
    def __init__(self):
        self.loss = None
        self.y = None
        self.t = None
    
    def forward(self,y,t):
        self.y = y
        self.t = t
        self.loss = categorical_crossentropy(self.y, self.t)
        return self.loss
    
    def backward(self,dout=1):
        dx = (self.y - self.t)*dout
        return dx

In [463]:
### Loss

In [464]:
y = softmax(np.random.randn(10,3))

In [465]:
t = np.random.randn(10,3)

In [485]:
x = np.argmax(t, axis=1)

array([0, 1, 1, 0, 2, 0, 0, 1, 0, 0], dtype=int64)

In [467]:
t = np.zeros((10,3))

In [468]:
for i,k in enumerate(x):
    t[i,k] = 1

In [469]:
t

array([[1., 0., 0.],
       [0., 1., 0.],
       [0., 1., 0.],
       [1., 0., 0.],
       [0., 0., 1.],
       [1., 0., 0.],
       [1., 0., 0.],
       [0., 1., 0.],
       [1., 0., 0.],
       [1., 0., 0.]])

In [470]:
loss = Loss()

In [471]:
loss.forward(y,t)

0.3092250740422319

In [472]:
loss.backward()

array([[-0.60193667,  0.46131013,  0.14062654],
       [ 0.25801639, -0.31100402,  0.05298763],
       [ 0.26164856, -0.5011283 ,  0.23947974],
       [-0.93653697,  0.8562314 ,  0.08030557],
       [ 0.36332935,  0.11718473, -0.48051408],
       [-0.55249862,  0.19700154,  0.35549708],
       [-0.72137032,  0.56866095,  0.15270937],
       [ 0.23173544, -0.45011806,  0.21838263],
       [-0.31850913,  0.08170915,  0.23679998],
       [-0.5560295 ,  0.17538724,  0.38064226]])

In [473]:
### Affine
x = np.random.randn(5,3)

In [474]:
x.shape

(5, 3)

In [475]:
Affine1 = Affine(np.random.randn(5,3), np.zeros(3))

In [476]:
Affine1.forward(np.random.randn(10,5))

array([[ 0.9952255 ,  1.31638928,  0.10974955],
       [ 1.43652352,  0.7541536 ,  0.50820024],
       [-0.7675275 , -0.44622255,  1.27726307],
       [ 0.0483651 , -1.64967221,  0.67961135],
       [-0.57849289,  0.86040119,  0.3778074 ],
       [ 1.60507065,  0.33549679,  2.68918334],
       [ 2.41308522, -0.3694054 ,  1.88481149],
       [ 0.87798884, -0.54419171, -1.7786682 ],
       [-0.46924229,  0.50414078,  0.13709583],
       [ 1.31418245, -1.60724165,  0.63895824]])

In [477]:
Affine1.backward(np.random.randn(10,3))

(array([[-0.62502815, -0.05009918,  0.14206121,  0.62231826, -0.74339735],
        [ 2.70601115, -1.52977035, -2.33826405,  1.32331269, -0.81715925],
        [-0.93929663,  2.19405454,  3.95566913,  0.21639372, -0.01395006],
        [-0.83665236,  0.48166431,  1.75257699,  2.62798755, -2.53922974],
        [ 0.77755775, -0.01917469, -0.11807045, -0.17035672,  0.3535316 ],
        [-0.35474639,  0.63306353,  1.1182717 , -0.01554209,  0.0461038 ],
        [-0.13581947, -1.85539419, -3.80525844, -1.65932618,  1.12980908],
        [ 0.96207648,  0.51033318,  1.18524053,  0.96935777, -0.54404444],
        [-0.16881486, -1.44629388, -3.14322558, -1.83007102,  1.35803914],
        [-0.27767976,  1.16139286,  2.3122343 ,  0.79157993, -0.5734986 ]]),
 array([[-0.30809947,  0.38710442,  3.99936561],
        [-1.33086661, -1.88619164, -2.55838973],
        [-0.56397364,  1.93015819,  3.97202808],
        [ 1.87112698,  1.25131227,  0.51974032],
        [ 0.46275545,  1.0998927 ,  6.37753685]]),
 

In [478]:
### Relu
x = np.array([[-1,0],[1,2],[2,0]])
y = np.array([[7,6],[-2,5],[4,1]])

In [479]:
x[x <= 0] = 0
x

array([[0, 0],
       [1, 2],
       [2, 0]])

In [480]:
relu_d = Relu()
relu_d.forward(x)

array([[0, 0],
       [1, 2],
       [2, 0]])

In [481]:
relu_d.backward(y)

array([[ 0,  0],
       [-2,  5],
       [ 4,  0]])

In [482]:
### Sigmoid
xx = Sigmoid()

In [483]:
xx.forward(np.random.randn(2,3))

array([[0.72676713, 0.35400786, 0.26124522],
       [0.46120929, 0.22076657, 0.27524621]])

In [484]:
xx.backward(np.random.randn(2,3))

array([[-0.13148143,  0.55731909, -0.13566062],
       [-0.22497639,  0.10944083, -0.30710277]])

In [486]:
## Class

In [None]:
### 내일 코드 다시 시작
class Network:
    def __init__(self, weight_decay):
        self.layer = {}
        self.weight_decay = weight_decay
    
    def add(self,x1,x2,activation):
        activation_dict = {
            "sigmoid" : Sigmoid,
            "relu" : Relu,
            "softmax" : Loss
        }
        w = np.random.randn(x1,x2)
        b = np.zeros(x2)
        activ = 'activation'+str(len(self.layers)+1)
        Affine = 'Affine'+str(len(self.layers)+1)
        self.layers[Affine_layer] = Affine(w,b)
        self.layers[activation_layer] = activation_dict[activation]()

    def predict(self,x):
        length = int(len(self.layer)/3)+1
        y = x.copy()
        for i in range(1,length):
            w = "W"+str(i)
            b = "b"+str(i)
            a = "activation"+str(i)
            y = np.dot(y,self.layer[w]) + self.layer[b]
            y = self.layer[a](y)
        return y
    
    def loss(self,x,t):
        y = self.predict(x)
        y = softmax(y)
        err = categorical_crossentropy(t,y) 
        self.err = err
        return self.err
    
    def accuracy(self,x,t):
        y = self.predict(x)
        y = np.argmax(y,axis=1)
        t = np.argmax(t,axis=1)
        self.acc = np.sum(y==t)/t.size
        return self.acc
    
    def gradient(self,x,t):
        lr = self.lr
        W_loss = lambda W : self.loss(x,t)
        length = int(len(self.layer)/3)+1
        y = x.copy()
        for i in range(1,length):
            w = "W"+str(i)
            b = "b"+str(i)
            dW = numerical_gradient(W_loss,self.layer[w])
            db = numerical_gradient(W_loss,self.layer[b])
            self.layer[w] -= dW*lr
            self.layer[b] -= db*lr
        
    def fit(self,x,t,epochs,lr):
        self.lr = lr
        self.history = {}
        loss = []
        accuracy = []
        for epoch in range(epochs):
            self.gradient(x,t)
            loss.append(self.err)
            accuracy.append(self.accuracy(x,t))
            if epoch % 100 == 0:
                print(f'loss : {self.err} === accuracy : {self.accuracy(x,t)}')
        self.history["loss"] = loss
        self.history["accuracy"] = accuracy