# 5장 역전파

## 단순한 계층 구현하기

### 곱셈 계층

In [3]:
class Mullayer:
    def __init__(self):
        self.x = None
        self.y = None
    
    def forward(self,x,y):
        self.x = x
        self.y = y
        out = x*y
        
        return out
    
    def backward(self, dout):
        dx = dout * self.y
        dy = dout * self.x
        
        return dx, dy

In [4]:
apple = 100
apple_num =2
tax = 1.1

mul_apple_layer = Mullayer()
mul_tax_layer = Mullayer()

apple_price = mul_apple_layer.forward(apple,apple_num)
price = mul_tax_layer.forward(apple_price,tax)

print(price)

220.00000000000003


In [5]:
dprice = 1
dapple_price,dtax = mul_tax_layer.backward(dprice)
dapple,dapple_num = mul_apple_layer.backward(dapple_price)

print(dapple,dapple_num,dtax)

2.2 110.00000000000001 200


### 덧셈 계층

In [6]:
100 * 2

200

In [11]:
class Addlayer:
    def __init__(self):
        pass
    
    def forward(self,x,y):
        out = x+y
        return out
    def backward(self, dout):
        dx = dout * 1
        dy = dout * 1
        return dx, dy

In [10]:
200 * 1.1

220.00000000000003

In [19]:
apple = 100
apple_num = 2
orange = 150
orange_num = 3
tax = 1.1

mul_apple_layer = Mullayer()
mul_orange_layer = Mullayer()
add_layer = Addlayer()
mul_tax_layer = Mullayer()

In [20]:
# 순전파

apple_price = mul_apple_layer.forward(apple,apple_num)
orange_price = mul_orange_layer.forward(orange,orange_num)
all_price = add_layer.forward(apple_price,orange_price)
price = mul_tax_layer.forward(all_price,tax)

print(price)

715.0000000000001


In [21]:
# 역전파

dprice = 1
dall_price, dtax = mul_tax_layer.backward(dprice)
dapple_price,dorange_price = add_layer.backward(dall_price)
dapple,dapple_num = mul_apple_layer.backward(dapple_price)
dorange,dorange_num = mul_orange_layer.backward(dorange_price)

print(dapple,dapple_num,dorange,dorange_num,dtax)

2.2 110.00000000000001 3.3000000000000003 165.0 650


## 활성화 함수 계층 구현

### ReLU

In [23]:
class Relu:
    def __init__(self):
        self.mask = None
    def forward(self, x):
        self.mask = (x<=0)
        out = x.copy()
        out[self.mask] = 0
        return out
    def backward(self,dout):
        dout[self.mask] = 0
        dx = dout
        return dx

In [24]:
import numpy as np

In [25]:
x = np.array([[1.0,-0.5],[-2.0,3.0]])
print(x)

[[ 1.  -0.5]
 [-2.   3. ]]


In [26]:
mask = (x<=0)
print(x[mask])

[-0.5 -2. ]


### Sigmoid

In [28]:
class Sigmoid:
    def __init__(self):
        self.out = None
    def forward(self,x):
        out = 1/(1+np.exp(-x))
        self.out = out
        return out
    def backward(self, dout):
        dx = dout * (1.0 - self.out) * self.out
        return dx

### Afiine

In [34]:
X = np.random.rand(2)
W = np.random.rand(2,3)
B = np.random.rand(3)

display(X.shape)
display(W.shape)
display(B.shape)

(2,)

(2, 3)

(3,)

In [36]:
Y = np.dot(X,W) + B
Y

array([0.48632955, 0.1419793 , 0.83704809])

In [39]:
class Affine:
    def __init__(self, W, b):
        self.W = W
        self.b = b
        self.x = None
        self.dW = None
        self.db = None
        
    def forward(self, x):
        self.x = x
        out = np.dot(x, self.W) + self.b
        return out
    
    def backward(self, dout):
        dx = np.dot(dout,self.W.T)
        self.dW = np.dot(self.x.T,dout)
        self.db = np.sum(dout,axis=0)
        return dx

### Softmax-with-Loss

In [40]:
class SoftmaxWithLoss:
    def __init__(self):
        self.loss = None
        self.y = None
        self.x = None
    def forward(self, x, t):
        self.t = t
        self.y = softmax(x)
        self.loss = cross_entropy_error(self.y,self.t)
        return self.loss
    def backward(self,dout=1):
        batch_size = self.t.shape[0]
        dx = (self.y - self.t)/batch_size
        return dx

## 오차역전파법 구현하기

# 6장 학습관련 기술들

## 매개변수 갱신

### 확률적 경사하강법

In [42]:
class SGD:
    def __init__(self,lr=0.01):
        self.lr = lr
    def update(self,params, grads):
        for key in params.keys():
            params[key] -= self.lr * grads[key]

### 모멘텀

In [46]:
class Momentum:
    def __init__(self, lr=0.01, momentum=0.9):
        self.lr = lr
        self.momentum = momentum
        self.v = None
    def upgrade(self, param,grads):
        if self.v is None:
            self.v = {}
            for key, val in params.items():
                self.v[key]=np.zeros_like(val)
            
            for key in params.keys():
                self.v[key] = self.momentum * self.v[key] - self.lr * grads[key]
                params[key] += self.v[key]

### AdaGrad

In [47]:
class AdaGrad:
    def __init__(self, lr = 0.01):
        self.lr = lr
        self.h = None
    def update(self, params, grads):
        if self.h in None:
            self.h = {}
            for key, val in params.items():
                self.h[key] = np.zeros_like(val)
        for key in params.keys():
            self.h[key] += grads[key] * grads[key]
            params[key] -= self.lr * grads[key]/(np.sqrt(self.h[key]) +1e-7)

### Adam

## 가중치의 초깃값

# 7장 CNN

In [49]:
x = np.random.rand(10,1,28,28)
x.shape

(10, 1, 28, 28)

In [50]:
x[0].shape

(1, 28, 28)