In [1]:
import numpy as np

In [2]:
class ReLU:
    def __init__(self):
        self.mask = None
        
    def forward(self, x):
        self.mask = (x <= 0)
        out = x.copy()
        out[self.mask] = 0
        return out
    
    def backward(self,dout):
        dout[self.mask] = 0
        dx = dout
        return dx

In [3]:
x = np.array([[1.0, -0.5],[-2.0, 3.0]])

x_relu = ReLU()

x_relu_for = x_relu.forward(x)

print(x_relu_for)

[[1. 0.]
 [0. 3.]]


In [4]:
x_relu_back = x_relu.backward(x)

In [5]:
print(x_relu_back)

[[1. 0.]
 [0. 3.]]


In [6]:
class Sigmoid:
    def __init__(self):
        self.out = None
    
    def forward(self, x):
        out = 1/(1+np.exp(-x))
        self.out = out
        return out
    
    def backward(self, dout):
        dx = dout*self.out*(1.0-self.out)
        return dx

In [13]:
class ELU:
    def __init__(self):
        self.x = None
        self.mask = None
        
    def forward(self, x):
        self.x = x
        self.mask = (x <= 0)
        out = x.copy()
        for_x = 0.01*(np.exp(x)-1)
        out[self.mask] = for_x[self.mask]
        return out
    
    def backward(self, dout):
        dx = dout
        derivate_x = 0.01*np.exp(self.x)
        dx[self.mask] = derivate_x[self.mask]
        return dx

In [24]:
class Affine:
    def __init__(self, W, b):
        self.W = W
        self.b = b
        self.x = None
        self.dW = None
        self.db = None
        
    def forward(self,x):
        self.x = x
        out = np.dot(x,self.W)+self.b
        return out
        
    def backward(self,dout):
        dx = np.dot(dout,self.W.T)
        self.dW = np.dot(self.x.T,dout)
        self.db = np.sum(dout,axis=0)
        return dx

In [30]:
emb = np.random.rand(3,4)
weight = np.random.rand(4,5)
bias = np.random.rand(5)
derivative = np.random.rand(3,5)

In [31]:
layer = Affine(weight,bias)
layer.forward(emb).shape

(3, 5)

In [32]:
layer.backward(derivative).shape

(3, 4)

In [33]:
layer.dW.shape

(4, 5)

In [34]:
layer.db.shape

(5,)

In [1]:
import sys, os
sys.path.append(os.pardir)

In [4]:
from common.functions import *

class SoftmaxWithLoss:
    def __init__(self):
        self.loss = None
        self.y = None
        self.t = None
        
    def forward(self, x, t):
        self.t = t
        self.y = softmax(x)
        self.loss = cross_entropy_error(self.y, self.t)
        return self.loss
    
    def backward(self, dout=1):
        batch_size = self.t.shape[0]
        dx = (self.y - self.t)/batch_size
        return dx

In [9]:
score = np.random.rand(3,4)
target = np.array([[0,1,0,0],[0,1,0,0],[0,1,0,0]])
soft = SoftmaxWithLoss()
soft.forward(emb,target)

1.2300031265889657

In [10]:
soft.backward()

array([[ 0.08850587, -0.22413481,  0.07513075,  0.06049819],
       [ 0.06004503, -0.25402506,  0.07025151,  0.12372853],
       [ 0.06830107, -0.2265386 ,  0.05745919,  0.10077834]])