In [3]:
import numpy as np
import import_ipynb
from common.functions import softmax, cross_entropy_error

class MatMul:
    def __init__(self, W):
        self.params = [W]
        self.grads = [np.zeros_like(W)]
        self.x = None
        
    def forward(self, x):
        W, = self.params
        out = np.matmul(x, W)
        self.x = x
        return out
    
    def backward(self, dout):
        W, = self.params
        dx = np.matmul(dout, W.T)
        dW = np.matmul(self.x.T, dout)
        self.grads[0][...] = dW # grads[0]에 dW를 깊은 복사 함
        
        return dx
    
    

importing Jupyter notebook from functions.ipynb


In [22]:
class Sigmoid:
    def __init__(self):
        self.params, self.grads = [], []
        self.out = None
        
    def forward(self, x):
        out = 1 / (1 + np.exp(-x))
        self.out = out
        return out
        
    def backward(self, dout):
        dx = dout * (1.0 - self.out) * self.out
        return dx

In [25]:
class Affine:
    def __init__(self, W, b):
        self.params = [W, b]
        self.grads = [np.zeros_like(W), np.zeros_like(b)]
        self.x = None
        
    def forward(self, x):
        W, b = self.params
        out = np.dot(x, W) + b
        self.x = x
        return out
    
    def backward(self, dout):
        W, b = self.params
        dx = np.dot(dout, W.T)
        dW = np.dot(self.x.T, dout)
        db = np.sum(dout, axis = 0)
        
        self.grads[0][...] = dW
        self.grads[1][...] = db
        return dx

In [None]:
class SoftmaxWithLoss:
    def __init__(self):
        self.params, self.grads = [], []
        self.y = None # softmax
        self.t = None # 정답 레이블
        
    def forward(self, x, t):
        self.t = t
        self.y = softmax(x)
        
        # 정답 레이블이 원-핫 벡터일 경우 정답의 인덱스로 변환
        if self.t.size == self.y.size:
            self.t = self.t.argmax(axis=1)
            
        loss = cross_entropy_error(self.y, self.t)
        
        return loss
    
    def backward(self, dout=1):
        batch_size = self.t.shape[0]
        
        dx = self.y.copy()
        dx[np.arange(batch_size), self.t] -= 1
        dx *= dout
        dx = dx / batch_size
        
        return dx
    
        

In [2]:
class Embedding:
    def __init__(self, W):
        self.params=[W]
        self.grads= [np.zeros_like(W)]
        self.idx = None
        
    def forward(self, idx):
        W, = self.params
        self.idx = idx
        out = W[idx]
        
        return out
    
    # 안좋은 예시(idx에 중복이 있으면 )
#     def backward(self, dout):
#         dW = self.grads
#         dW[...] = 0
#         dW[self.idx] = dout
        
#         return None

    def backward(self, dout):
        dW = self.grads
        dW[...] = 0

        np.add.at(dW, self.idx, dout)
        # np.add.at(A, idx, B) => B를 A의 idx번쨰 행에 더해줌
            
        return None

In [None]:
class EmbeddingDot:
    def __init__(self, W):
        self.embed = Embedding(W)
        self.params = self.embed.params
        self.grads = self.embed.grads
        self.cache = None
        
    def forward(self, h, idx):
        target_W = self.embed.forward(idx)
        out = np.sum(target_W * h, axis = 1)
        
        self.cache = (h, target_W)
        
        return out