In [1]:
import numpy as np

## 5.4 단순한 계층 구현하기

### 5.4.1 곱셉 계층

In [2]:
class MulLayer:
    def __init__(self):
        self.x = None
        self.y = None
        
    def forward(self, x, y):
        #x, y가 역전파에 사용되므로 인스턴스 변수로 저장
        self.x = x
        self.y = y
        out = x * y
        return out
    
    def backward(self, dout):
        dx = dout * self.y
        dy = dout * self.x
        return dx, dy

In [3]:
apple = 100
apple_num = 2
tax = 1.1

#계층들
mul_apple_layer = MulLayer()
mul_tax_layer = MulLayer()

#순전파
apple_price = mul_apple_layer.forward(apple, apple_num)
price = mul_tax_layer.forward(apple_price, tax)

print(price)

#역전파
dprice = 1
dapple_price, dtax = mul_tax_layer.backward(dprice)
dapple, dapple_num = mul_apple_layer.backward(dapple_price)

print(dapple, dapple_num, dtax)

220.00000000000003
2.2 110.00000000000001 200


### 5.4.2 덧셈 계층

In [4]:
class AddLayer:
    def __init__(self):
        #x, y가 사용되지 않으므로 변수 저장 X
        #self.x = None
        #self.y = None
        pass
        
    def forward(self, x, y):
        #x, y가 사용되지 않으므로 변수 저장 X
        #self.x = x
        #self.y = y
        out = x + y
        return out
    
    def backward(self, dout):
        dx = dout * 1
        dy = dout * 1
        return dx, dy

In [5]:
apple = 100
apple_num = 2
orange = 150
orange_num = 3
tax = 1.1

#계층들
mul_apple_layer = MulLayer()
mul_orange_layer = MulLayer()
add_apple_orange_layer = AddLayer()
mul_tax_layer = MulLayer()

#순전파
apple_price = mul_apple_layer.forward(apple, apple_num)
orange_price = mul_orange_layer.forward(orange, orange_num)

total_price = add_apple_orange_layer.forward(apple_price, orange_price)

final_total_price = mul_tax_layer.forward(total_price, tax)

print(final_total_price)

#역전파
dfinal_total_price = 1

dtotal_price, dtax = mul_tax_layer.backward(dfinal_total_price)

dapple_price, dorange_price = add_apple_orange_layer.backward(dtotal_price)

dorange, dorange_num = mul_orange_layer.backward(dorange_price)
dapple, dapple_num = mul_apple_layer.backward(dapple_price)

print(dapple_num, dapple, dorange, dorange_num, dtax)

715.0000000000001
110.00000000000001 2.2 3.3000000000000003 165.0 650


## 5.5 활성화 함수 계층 구현하기

### 5.5.1 ReLU 계층

In [6]:
class Relu:
    def __init__(self):
        self.mask = None
    
    def forward(self, x):
        self.mask = (x<=0)
        out = x.copy()
        out[self.mask] = 0
        return out
    
    def backward(self, dout):
        dout[self.mask] = 0
        dx = dout
        return dx

### 5.5.2 Sigmoid 계층

In [7]:
class Sigmoid:
    def __iniit__(self):
        self.y = None
    
    def forward(self, x):
        out = 1 / (1+np.exp(-x))
        self.out = out
        
        return out
    
    def backward(dout):
        dx = dout * (1. - self.out) * self.out
        return dx

## 5.6 Affine/Softmax 계층 구현하기

### 5.6.1 Affine 계층

In [10]:
X = np.random.rand(2)
W = np.random.rand(2,3)
B = np.random.rand(3)

In [14]:
Y = np.dot(X, W) + B

In [37]:
np.dot(X, W)

array([0.93046963, 0.66103671, 0.67929215])

In [39]:
np.dot(W.T, X)

array([0.93046963, 0.66103671, 0.67929215])

In [40]:
np.dot(X.T, W)

array([0.93046963, 0.66103671, 0.67929215])

### 5.6.2 배치용 Affine 계층

In [42]:
class Affine:
    def __init__(self, W, b):
        self.W = W
        self.b = b
        self.X = None
        self.dW = None
        self.db = None
        
    def forward(self, x):
        self.x = x
        out = np.dot(x, self.W) + self.b
        return out
    
    def backward(self, dout):
        dx = np.dot(dout, self.W.T)
        self.dW = np.dot(self.x.T, dout)
        self.db = np.sum(dout, axis=0)
        return dx

In [51]:
a = Affine(W, B)
a.forward(X)
a.backward(1)
a.dW, a.db

(array([0.95060619, 0.12366168]), 1)

### 5.6.3 Softmax-with-Loss 계층

In [91]:
import os

cwd = os.getcwd()
os.chdir('deep-learning-from-scratch-master')
from common.functions import softmax, cross_entropy_error
os.chdir(cwd)

In [85]:
import inspect
inspect.getsourcelines(softmax)[0]

['def softmax(x):\n',
 '    if x.ndim == 2:\n',
 '        x = x.T\n',
 '        x = x - np.max(x, axis=0)\n',
 '        y = np.exp(x) / np.sum(np.exp(x), axis=0)\n',
 '        return y.T \n',
 '\n',
 '    x = x - np.max(x) # 오버플로 대책\n',
 '    return np.exp(x) / np.sum(np.exp(x))\n']

In [89]:
A = np.array([1., 2., 3.])
P = softmax(A)
np.sum(P)

0.9999999999999999

In [94]:
class SoftmaxWithLoss:
    def __init__(self):
        self.loss = None
        self.y = None
        self.x = None
        
    def forward(self, x, t):
        self.t = t
        self.y = softmax(x)
        self.loss = cross_entropy_error(self.y, self.t)
        return self.loss
    
    def backward(self, dout=1):
        batch_size = self.t.shape[0]
        dx = (self.y - self.t) / batch_size
        return dx

## 5.7 오차역전파법 구현하기

### 5.7.2 오차역전파법을 적용한 신경망 구현하기