# CH. 01

## 신경망 복습


In [1]:
import numpy as np

In [2]:
x = np.array([1, 2, 3])
x.__class__

numpy.ndarray

In [3]:
x.shape

(3,)

In [4]:
x.ndim

1

In [5]:
W = np.array([[1, 2, 3], [4, 5, 6]])
W.shape

(2, 3)

In [6]:
W.ndim

2

In [7]:
# Element-wise operation
W = np.array([[1, 2, 3], [4, 5, 6]])
X = np.array([[0, 1, 2], [3, 4, 5]])
W + X

array([[ 1,  3,  5],
       [ 7,  9, 11]])

In [8]:
W * X

array([[ 0,  2,  6],
       [12, 20, 30]])

In [9]:
# 브로드 캐스트
A = np.array([[1,2], [3,4]])
A * 10

array([[10, 20],
       [30, 40]])

In [10]:
A = np.array([[1, 2], [3, 4]])
b = np.array([10, 20])
A*b

array([[10, 40],
       [30, 80]])

In [11]:
a = np.array([1, 2, 3])
b = np.array([4, 5, 6])
np.dot(a, b)

32

In [12]:
A = np.array([[1, 2], [3, 4]])
B = np.array([[5, 6], [7, 8]])
np.matmul(A, B)

array([[19, 22],
       [43, 50]])

In [13]:
np.dot(A, B)

array([[19, 22],
       [43, 50]])

## 신경망의 추론

In [14]:
def sigmoid(x):
    return 1 / (1 + np.exp(-x))

In [15]:
W1 = np.random.randn(4,2)
b1 = np.random.randn(4,1)
W2 = np.random.randn(3,4)
b2 = np.random.randn(3,1)
x = np.random.randn(2,10)

h = np.matmul(W1, x) + b1
a = sigmoid(h)
s = np.matmul(W2, a) + b2

In [16]:
s.shape

(3, 10)

In [17]:
s

array([[-0.86399562,  0.19097012, -0.43698411,  0.04683068, -0.76847792,
        -0.91083207,  0.05576384, -0.43116762, -0.61418208, -0.5636651 ],
       [ 1.25399826, -0.31722766, -0.96949429, -0.97391886,  1.49002522,
         0.00600647,  0.51839542, -0.85441505,  1.85812396,  0.38058409],
       [ 0.98239305,  0.90810805,  1.51397253,  1.15359988,  0.84983196,
         1.36375747,  0.70377252,  1.46863265,  0.51368407,  1.0528487 ]])

In [18]:
# 계층 클래스화
class Sigmoid:
    def __init__(self):
        self.params = []
    
    def forward(self, x):
        return 1 / (1 + np.exp(-x))
    
class Affine:
    def __init__(self, W, b):
        self.params = [W, b]
        
    def forward(self, x):
        W, b = self.params
        output = np.matmul(x, W) + b
        return output

In [19]:
class TwoLayerNet:
    def __init__(self, input_size, hidden_size, output_size):
        I, H, O = input_size, hidden_size, output_size
        
        # initialize weight, bias
        W1 = np.random.randn(I, H)
        b1 = np.random.randn(H)
        W2 = np.random.randn(H, O)
        b2 = np.random.randn(O)
        
        # layer 생성
        self.layers = [
            Affine(W1, b1),
            Sigmoid(),
            Affine(W2, b2)
        ]
        
        # 각 layer마다 weight 리스트에 저장
        self.params = []
        for layer in self.layers:
            self.params += layer.params
    
    def predict(self, x):
        for layer in self.layers:
            x = layer.forward(x)
        return x
            

In [20]:
x = np.random.randn(10, 2)
model = TwoLayerNet(2, 4, 3)
s = model.predict(x)

In [21]:
s.shape

(10, 3)

In [22]:
D, N = 8, 7
x = np.random.randn(1,D)
x

array([[ 0.58023856,  1.19694725, -1.7069864 ,  0.04313172,  0.7533321 ,
         0.77423369,  0.81637602, -0.49477838]])

In [23]:
y = np.repeat(x, N, axis=0)
y

array([[ 0.58023856,  1.19694725, -1.7069864 ,  0.04313172,  0.7533321 ,
         0.77423369,  0.81637602, -0.49477838],
       [ 0.58023856,  1.19694725, -1.7069864 ,  0.04313172,  0.7533321 ,
         0.77423369,  0.81637602, -0.49477838],
       [ 0.58023856,  1.19694725, -1.7069864 ,  0.04313172,  0.7533321 ,
         0.77423369,  0.81637602, -0.49477838],
       [ 0.58023856,  1.19694725, -1.7069864 ,  0.04313172,  0.7533321 ,
         0.77423369,  0.81637602, -0.49477838],
       [ 0.58023856,  1.19694725, -1.7069864 ,  0.04313172,  0.7533321 ,
         0.77423369,  0.81637602, -0.49477838],
       [ 0.58023856,  1.19694725, -1.7069864 ,  0.04313172,  0.7533321 ,
         0.77423369,  0.81637602, -0.49477838],
       [ 0.58023856,  1.19694725, -1.7069864 ,  0.04313172,  0.7533321 ,
         0.77423369,  0.81637602, -0.49477838]])

In [24]:
class MatMul:
    def __init__(self, W):
        self.params = [W]
        self.grads = [np.zeros_like(W)]
        self.x = None
    
    def forward(self, x):
        W, = self.params
        output = np.matmul(x, W)
        self.x = x
        return output
    
    def backward(self, dout):
        W, = self.params
        dx = np.matmul(dout, W.T)
        dW = np.matmul(self.x.T, dout)
        # deep copy of dW
        self.grads[0][...] = dW
        return dx

In [25]:
class Sigmoid:
    def __init__(self):
        self.params, self.grads = [], []
        self.out = None
    
    def forward(self, x):
        output = 1 / (1 + np.exp(-x))
        self.out = out
        return out
    
    def backward(self, dout):
        dx = dout * (1.0 - self.out) * self.out
        return dx
    

class Affine:
    def __init__(self, W, b):
        self.params = [W, b]
        self.grads = [np.zeros_like(W), np.zeros_like(b)]
        self.x = None
        self.matmul = MatMul(W)
        
    def forward(self, x):
        W, b = self.params
        out = self.matmul.forward(x) + b
        self.x = x
        return out
    
    def backward(self, dout):
        W, b = self.params
        dx = self.matmul.backward(dout)
        dW = self.matmul.grads[0]
        db = np.sum(dot, axis=0)
        
        self.grads[0][...] = dW
        self.grads[1][...] = db
        return dx

In [26]:
W.shape

(2, 3)

In [27]:
x = np.random.randn(3, 2)
x.shape

(3, 2)

In [28]:
MatMul(W).forward(x)

array([[  5.69221648,   7.70194268,   9.71166889],
       [  4.10253048,   5.04469133,   5.98685218],
       [-10.86033978, -13.66496122, -16.46958266]])