# perceptron

In [None]:
import numpy as np

def AND(x1, x2):
    x = np.array([x1, x2])
    w = np.array([0.5, 0.5])
    b = -0.7
    tmp = np.sum(w*x) + b
    if tmp <= 0:
        return 0
    else:
        return 1


def NAND(x1, x2):
    x = np.array([x1, x2])
    w = np.array([-0.5, -0.5])
    b = 0.7
    tmp = np.sum(w*x) + b
    if tmp <= 0:
        return 0
    else:
        return 1


def OR(x1, x2):
    x = np.array([x1, x2])
    w = np.array([0.5, 0.5])
    b = -0.2
    tmp = np.sum(w*x) + b
    if tmp <= 0:
        return 0
    else:
        return 1


def XOR(x1, x2):
    s1 = NAND(x1, x2)
    s2 = OR(x1, x2)
    return AND(s1, s2)

# activate function

In [1]:
import numpy as np

def step_function(x):
    y = x > 0
    return y.astype(np.int)


def sigmoid(x):
    return 1 / (1 + np.exp(-x))


def relu(x):
    return np.maximum(0, x)

def identity_function(x):
    return x

# basic forward network

In [None]:
import activate_func as af
import numpy as np

def init_network():
    network = {}
    network['W1'] = np.array([[0.1, 0.3, 0.5], [0.2, 0.4, 0.6]])
    network['b1'] = np.array([0.1, 0.2, 0.3])
    network['W2'] = np.array([[0.1, 0.4], [0.2, 0.5], [0.3, 0.6]])
    network['b2'] = np.array([0.1, 0.2])
    network['W3'] = np.array([[0.1, 0.3], [0.2, 0.4]])
    network['b3'] = np.array([0.1, 0.2])
    
    return network


def forward(network, x):
    W1, W2, W3 = network['W1'], network['W2'], network['W3']
    b1, b2, b3 = network['b1'], network['b2'], network['b3']
    
    a1 = np.dot(x, W1) + b1
    z1 = af.sigmoid(a1)
    a2 = np.dot(z1, W2) + b2
    z2 = af.sigmoid(a2)
    a3 = np.dot(z2, W3) + b3
    y = af.identity_function(a3)
    
    return y


network = init_network()
x = np.array([1.0, 0.5])
y = forward(network, x)
print(y)

# softmax function

In [6]:
import numpy as np

def softmax(a):
    c = np.max(a)          # overflow를 막기위한 c
    exp_a = np.exp(x-c)    # overflow 대책
    sum_exp_a = np.sum(exp_a)
    y = exp_a / sum_exp_a
    
    return y

a = np.array([0.3, 2.9, 4.0])
y = softmax(a)
print(y)            # softmax의 출력은 0에서 1사이
print(np.sum(y))    # sorfmax의 출력 총합은 1

[0.62245933 0.37754067]
1.0


# mean square error, MSE
### 평균 제곱 오차

In [11]:
import numpy as np

def mean_square_error(y, t):
    return 0.5 * np.sum((y - t)**2)


t = [0, 0, 1, 0, 0, 0, 0, 0, 0, 0]    # 정답은 '2'

# 예1 : '2'일 확률이 가장 높다고 추정함 (0.6)
y = [0.1, 0.05, 0.6, 0.0, 0.05, 0.1, 0.0, 0.1, 0.0, 0.0]
print(mean_square_error(np.array(y), np.array(t)))

# 예1 : '7'일 확률이 가장 높다고 추정함 (0.6)
y = [0.1, 0.05, 0.1, 0.0, 0.05, 0.1, 0.0, 0.6, 0.0, 0.0]
print(mean_square_error(np.array(y), np.array(t)))


0.09750000000000003
0.5975


# cross entropy error
### 교차 엔트로피 오차

In [13]:
import numpy as np

def cross_entropy_error(y, t):
    delta = 1e-7    # 마이너스 무한대가 발생하지 않게(log0은 마이너스 무한대)
    return -np.sum(t * np.log(y+delta))

t = [0, 0, 1, 0, 0, 0, 0, 0, 0, 0]    # 정답은 '2'

# 예1 : '2'일 확률이 가장 높다고 추정함 (0.6)
y = [0.1, 0.05, 0.6, 0.0, 0.05, 0.1, 0.0, 0.1, 0.0, 0.0]
print(cross_entropy_error(np.array(y), np.array(t)))

# 예1 : '7'일 확률이 가장 높다고 추정함 (0.6)
y = [0.1, 0.05, 0.1, 0.0, 0.05, 0.1, 0.0, 0.6, 0.0, 0.0]
print(cross_entropy_error(np.array(y), np.array(t)))

0.510825457099338
2.302584092994546


In [14]:
# batch 용

def cross_entropy_error(y, t):
    if y.ndim == 1:
        t = t.reshape(1, t.size)
        y = y.reshape(1, y.size)
        
    batch_size = y.shape[0]
    delta = 1e-7    # 마이너스 무한대가 발생하지 않게(log0은 마이너스 무한대)
    return -np.sum(t * np.log(y+delta)) / batch_size



# numerical differentiation

In [None]:
def numerical_diff(f, x):
    h = 1e-4
    return (f(x + h) - f(x - h)) / (2 * h)

# numerical gradient

In [None]:
def numerical_gradient(f, x):
    h = 1e-4
    grad = np.zeros_like(x)
    
    for idx in range(x.size):
        tmp_val = x[idx]

        x[idx] = tmp_val + h
        fxh1 = f(x)
        
        x[idx] = tmp_val - h
        fxh2 = f(x)
        
        grad[idx] = (fxh1 - fxh2) / (2 * h)
        x[idx] = tmp_val
        
    return grad
        
        
        

# gradient descent method

In [None]:
def gradient_descent(f, init_x, lr = 0.01, step_num = 100):
    x = init_x
    
    for i in range(step_num):
        grad = numerical_gradient(f, x)
        x -= lr * grad
    return x

# 5장. Relu

In [1]:
class Relu:
    def __init__(self):
        self.mask = None

    def forward(self, x):
        self.mask = (x <= 0)
        out = x.copy()
        out[self.mask] = 0

        return out

    def backward(self, dout):
        dout[self.mask] = 0
        dx = dout

        return dx

In [8]:
import numpy as np

x = np.array([[1.0, -0.5], [-2.0, 3.0]])
print(x)

mask = (x <= 0)
print(mask)
out = x.copy()
print(out)
out[mask] = 0
print(out)

[[ 1.  -0.5]
 [-2.   3. ]]
[[False  True]
 [ True False]]
[[ 1.  -0.5]
 [-2.   3. ]]
[[1. 0.]
 [0. 3.]]


# 5장. Sigmoid

In [9]:
class Sigmoid:
    def __init__(self):
        self.out = None

    def forward(self, x):
        out = sigmoid(x)
        self.out = out
        return out

    def backward(self, dout):
        dx = dout * (1.0 - self.out) * self.out

        return dx

In [3]:
import numpy as np

x = np.array([[1.0, -0.5], [-2.0, 3.0]])
print(x)
print(x.T)

[[ 1.  -0.5]
 [-2.   3. ]]
[[ 1.  -2. ]
 [-0.5  3. ]]


# 5장. softmax with loss

In [None]:
class SoftmaxWithLoss:
    def __init__(self):
        self.loss = None # 손실함수
        self.y = None    # softmax의 출력
        self.t = None    # 정답 레이블(원-핫 인코딩 형태)
        
    def forward(self, x, t):
        self.t = t
        self.y = softmax(x)
        self.loss = cross_entropy_error(self.y, self.t)
        
        return self.loss

    def backward(self, dout=1):
        batch_size = self.t.shape[0]
        if self.t.size == self.y.size: # 정답 레이블이 원-핫 인코딩 형태일 때
            dx = (self.y - self.t) / batch_size
        else:
            dx = self.y.copy()
            dx[np.arange(batch_size), self.t] -= 1
            dx = dx / batch_size
        
        return dx

# 6장. SGD

In [1]:
class SGD:
    def __init__(self, lr = 0.01):
        self.lr = lr
        
    def update(self, params, grads):
        for key in params.keys():
            params[key] -= self.lr * grads[key]


# 7장. Convolution / Pooling 계층 구현하기

In [4]:
import numpy as np
x = np.random.rand(10, 1, 28, 28)
x.shape

(10, 1, 28, 28)

In [7]:
# 첫번째 데이터에 접근

print(x[0].shape)
print(x[1].shape)

(1, 28, 28)
(1, 28, 28)


In [8]:
# 첫채널의 공간데이터에 접근

print(x[0,0])

[[0.54353695 0.13030893 0.02417657 0.48686067 0.16826257 0.83958999
  0.25509598 0.7520964  0.23645525 0.17347679 0.41322507 0.43653024
  0.06750827 0.79011532 0.7925863  0.98229363 0.50698569 0.60114948
  0.93648243 0.69014654 0.88683663 0.05243858 0.60997879 0.4411566
  0.24295623 0.27322101 0.90460819 0.59909398]
 [0.86191265 0.96661665 0.86547308 0.60897101 0.50133344 0.96831068
  0.5352628  0.54863207 0.40867483 0.62021991 0.81840523 0.82465252
  0.01407059 0.66464455 0.46914214 0.67637312 0.24061253 0.85746297
  0.15970225 0.55129987 0.46822393 0.56961995 0.99615019 0.20516008
  0.06646618 0.54024268 0.66298263 0.76381249]
 [0.97614002 0.72495471 0.87696946 0.3700411  0.93896818 0.741998
  0.28790611 0.63325967 0.02889955 0.65768748 0.62990137 0.25814957
  0.97765707 0.50108083 0.69254495 0.36098893 0.68268782 0.50306135
  0.88731011 0.18851732 0.72566238 0.10749937 0.3219408  0.7330195
  0.41784611 0.00979457 0.73085735 0.40013737]
 [0.8223654  0.97120548 0.99303596 0.28233759 0

In [None]:
# im2col(image to column) 함수 확인

def im2col(input_data, filter_h, filter_w, stride=1, pad=0):
    """다수의 이미지를 입력받아 2차원 배열로 변환한다(평탄화).
    
    Parameters
    ----------
    input_data : 4차원 배열 형태의 입력 데이터(이미지 수, 채널 수, 높이, 너비)
    filter_h : 필터의 높이
    filter_w : 필터의 너비
    stride : 스트라이드
    pad : 패딩
    
    Returns
    -------
    col : 2차원 배열
    """
    N, C, H, W = input_data.shape
    out_h = (H + 2*pad - filter_h)//stride + 1
    out_w = (W + 2*pad - filter_w)//stride + 1

    img = np.pad(input_data, [(0,0), (0,0), (pad, pad), (pad, pad)], 'constant')
    col = np.zeros((N, C, filter_h, filter_w, out_h, out_w))

    for y in range(filter_h):
        y_max = y + stride*out_h
        for x in range(filter_w):
            x_max = x + stride*out_w
            col[:, :, y, x, :, :] = img[:, :, y:y_max:stride, x:x_max:stride]

    col = col.transpose(0, 4, 5, 1, 2, 3).reshape(N*out_h*out_w, -1)
    return col

In [25]:
# test 

a = np.array([[[[1, 2], [3, 4]], [[5,6], [7,8]]], [[[1, 2], [3, 4]], [[5,6], [7,8]]]])
print(a)

pad_img = np.pad(a, [(0,0), (0,0), (1, 1), (1, 1)], "constant")
print(pad_img)

[[[[1 2]
   [3 4]]

  [[5 6]
   [7 8]]]


 [[[1 2]
   [3 4]]

  [[5 6]
   [7 8]]]]
[[[[0 0 0 0]
   [0 1 2 0]
   [0 3 4 0]
   [0 0 0 0]]

  [[0 0 0 0]
   [0 5 6 0]
   [0 7 8 0]
   [0 0 0 0]]]


 [[[0 0 0 0]
   [0 1 2 0]
   [0 3 4 0]
   [0 0 0 0]]

  [[0 0 0 0]
   [0 5 6 0]
   [0 7 8 0]
   [0 0 0 0]]]]


In [26]:
class Pooling:
    def __init__(self, pool_h, pool_w, stride=1, pad=0):
        self.pool_h = pool_h
        self.pool_w = pool_w
        self.stride = stride
        self.pad = pad
        
        self.x = None
        self.arg_max = None

    def forward(self, x):
        N, C, H, W = x.shape
        out_h = int(1 + (H - self.pool_h) / self.stride)
        out_w = int(1 + (W - self.pool_w) / self.stride)

        col = im2col(x, self.pool_h, self.pool_w, self.stride, self.pad)
        col = col.reshape(-1, self.pool_h*self.pool_w)

        arg_max = np.argmax(col, axis=1)
        out = np.max(col, axis=1)
        out = out.reshape(N, out_h, out_w, C).transpose(0, 3, 1, 2)

        self.x = x
        self.arg_max = arg_max

        return out

    def backward(self, dout):
        dout = dout.transpose(0, 2, 3, 1)
        
        pool_size = self.pool_h * self.pool_w
        dmax = np.zeros((dout.size, pool_size))
        dmax[np.arange(self.arg_max.size), self.arg_max.flatten()] = dout.flatten()
        dmax = dmax.reshape(dout.shape + (pool_size,)) 
        
        dcol = dmax.reshape(dmax.shape[0] * dmax.shape[1] * dmax.shape[2], -1)
        dx = col2im(dcol, self.x.shape, self.pool_h, self.pool_w, self.stride, self.pad)
        
        return dx

In [30]:
all_size_list = [784] + [100, 50] + [10]
print(all_size_list)

[784, 100, 50, 10]


In [4]:
import sys, os
kk = sys.path
for i in kk:
    print(i)
sys.path.append(os.pardir)


C:\Users\heaven\Anaconda3\envs\tensor\python35.zip
C:\Users\heaven\Anaconda3\envs\tensor\DLLs
C:\Users\heaven\Anaconda3\envs\tensor\lib
C:\Users\heaven\Anaconda3\envs\tensor
C:\Users\heaven\Anaconda3\envs\tensor\lib\site-packages
C:\Users\heaven\Anaconda3\envs\tensor\lib\site-packages\win32
C:\Users\heaven\Anaconda3\envs\tensor\lib\site-packages\win32\lib
C:\Users\heaven\Anaconda3\envs\tensor\lib\site-packages\Pythonwin
C:\Users\heaven\Anaconda3\envs\tensor\lib\site-packages\IPython\extensions
C:\Users\heaven\.ipython
..


In [6]:
input_dim = (1, 28, 28)
input_size = input_dim[1]
print(input_size)

28
