## Perceptron

### Perceptron 수식 : $y = \sigma(\displaystyle\sum_{i=1}^{3} w_{i}x_{i} + b)$

In [3]:
import numpy as np

X = np.array([1, 2, 3])
W = np.array([4, 5, 6])
B = 10
X.shape



(3,)

In [6]:
mul = X * W
matmul = np.matmul(X, W)

mul, matmul

(array([ 4, 10, 18]), 32)

>### Perceptron 연산 : np.matmul() 이용
> * s = np.matmul(X, W) + b
>> $S=X\cdot W + b= \begin{bmatrix}x_{1}&x_{2}&x_{3}\end{bmatrix}\begin{bmatrix}w_{1}\\w_{2}\\w_{3}\end{bmatrix} + b = x_{1}w_{1} + x_{2}w_{2} + x_{3}w_{3} + b $

# Linear Regression

>### [예제 1] Hypothesis, Cost Function of Linear Regression

In [14]:
# Input and Labels
x_input = np.array([1, 2, 3, 4, 5, 6, 7, 8, 9, 10], dtype= np.float32)
labels = np.array([3, 5, 7, 9, 11, 13, 15, 17, 19, 21], dtype= np.float32)
x_input

array([ 1.,  2.,  3.,  4.,  5.,  6.,  7.,  8.,  9., 10.], dtype=float32)

>### Hypothesis : Linear Equation
>### $h(x) = wx + b$

In [15]:
# Hypothesis : Linear Function
def Hypothesis(x):    
    return W*x + B

>### Cost Function : Mean Squared Error (MSE)
>### $\sum_{i=1}^{n}(h(x_{i})-y_{i})^{2}$

In [16]:
# Cost : Mean Squared Error
def Cost():
    return np.mean((Hypothesis(x_input) - labels)**2)

>### Test : Hypothesis, Cost Function of Linear Regression

In [17]:
# w,b 값에 따른 Hypothesis, Cost 테스트
W, B = 2, 2
res_cost = Cost()
print("cost(W= 2. B= 2): {}".format(res_cost))
W, B = -2, 23
res_cost = Cost()
print("cost(W=-2. B=23): {}".format(res_cost))
W, B =1, 0
res_cost = Cost()
print("cost(W=-2. B=23): {}".format(res_cost))

cost(W= 2. B= 2): 1.0
cost(W=-2. B=23): 132.0
cost(W=-2. B=23): 50.5


In [None]:
test_list = np.arange(10)
test_list

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [18]:
for W in range(10):
    for B in range(10):
        res_cost = Cost()
        print(f"cost(W= {W}. B= {B}): {res_cost}")

cost(W= 0. B= 0): 177.0
cost(W= 0. B= 1): 154.0
cost(W= 0. B= 2): 133.0
cost(W= 0. B= 3): 114.0
cost(W= 0. B= 4): 97.0
cost(W= 0. B= 5): 82.0
cost(W= 0. B= 6): 69.0
cost(W= 0. B= 7): 58.0
cost(W= 0. B= 8): 49.0
cost(W= 0. B= 9): 42.0
cost(W= 1. B= 0): 50.5
cost(W= 1. B= 1): 38.5
cost(W= 1. B= 2): 28.5
cost(W= 1. B= 3): 20.5
cost(W= 1. B= 4): 14.5
cost(W= 1. B= 5): 10.5
cost(W= 1. B= 6): 8.5
cost(W= 1. B= 7): 8.5
cost(W= 1. B= 8): 10.5
cost(W= 1. B= 9): 14.5
cost(W= 2. B= 0): 1.0
cost(W= 2. B= 1): 0.0
cost(W= 2. B= 2): 1.0
cost(W= 2. B= 3): 4.0
cost(W= 2. B= 4): 9.0
cost(W= 2. B= 5): 16.0
cost(W= 2. B= 6): 25.0
cost(W= 2. B= 7): 36.0
cost(W= 2. B= 8): 49.0
cost(W= 2. B= 9): 64.0
cost(W= 3. B= 0): 28.5
cost(W= 3. B= 1): 38.5
cost(W= 3. B= 2): 50.5
cost(W= 3. B= 3): 64.5
cost(W= 3. B= 4): 80.5
cost(W= 3. B= 5): 98.5
cost(W= 3. B= 6): 118.5
cost(W= 3. B= 7): 140.5
cost(W= 3. B= 8): 164.5
cost(W= 3. B= 9): 190.5
cost(W= 4. B= 0): 133.0
cost(W= 4. B= 1): 154.0
cost(W= 4. B= 2): 177.0
cost(W=

>### [예제 2] Gradient Descent of Linear Regression

In [21]:
# Input and Labels
x_input = np.array([1, 2, 3, 4, 5, 6, 7, 8, 9, 10], dtype= np.float32)
labels = np.array([3, 5, 7, 9, 11, 13, 15, 17, 19, 21], dtype= np.float32)

# Weights, Biases
W = np.random.normal()
B = np.random.normal()
W, B

(0.6845557716179558, -0.09976794339087067)

>### Hypothesis : Linear Equation
>### $h(x) = wx + b$

In [22]:
# Hypothesis : Linear Function
def Hypothesis(x):    
    return W*x + B 

>### Cost Function : Mean Squared Error (MSE)
>### $\sum_{i=1}^{n}(h(x_{i})-y_{i})^{2}$

In [23]:
# Cost : Mean Squared Error 
def Cost():
    return np.mean((Hypothesis(x_input) - labels)**2)

>### Gradient
>### $\frac{\partial}{\partial w}cost(w, b) = \frac{1}{m}  \sum_{i=1}^{m}(x_{i}(x_{i}w+(b-y_{i})))$
>### $\frac{\partial}{\partial b}cost(w, b) = \frac{1}{m}  \sum_{i=1}^{m}(x_{i}w - y_{i} + b)$


In [28]:
#장점 : 속도가 빠르고,정확하다 단점: 진짜로 미분해줘야함
# def Gradient(x, y):
#     return np.mean(x*(x*W+(B-y))), np.mean((W*x-y+B))

def Gradient(x, y):
    global W, B
    pres_w, pres_b = W, B # W,B backup
    delta = 5e-7  #아주 작은 값, lim x->0의 역할

    W = pres_w + delta
    cost_p = Cost()
    W = pres_w - delta
    cost_m = Cost()
    grad_w = (cost_p-cost_m)/(2*delta)
    W = pres_w # w restore

    B = pres_b + delta
    cost_p = Cost()
    B = pres_b - delta
    cost_m = Cost()
    grad_b = (cost_p-cost_m)/(2*delta)

    B = pres_b # b restore
    return grad_w, grad_b

In [29]:
#직접 적어봄
# def Gradient(x,y):
#     return np.mean(x(W*x +(B-y))), np.mean(W*x +(B-y))


# def Gradient(x,y):
#     global W, B
#     prev_W = W; prev_B = B
#     delta = 5e-7

#     W = prev_W + delta
#     cost_plus = Cost()
#     W = prev_W - delta
#     cost_minus = Cost()
#     grad_w = (cost_plus - cost_minus) / (2*delta)
#     W = prev_W

#     B = prev_B + delta
#     cost_plus = Cost()
#     B = prev_B - delta
#     cost_minus = Cost()
#     grad_b = (cost_plus - cost_minus) / (2*delta)
#     B = prev_B

#     return grad_w, grad_b

>### Training
>### $\mu$ : Learning rate
>### $w = w - \mu\frac{\partial}{\partial w}cost(w, b)$
>### $b = b - \mu\frac{\partial}{\partial b}cost(w, b)$

In [38]:
def Gradient(x,y):
    return np.mean(x*(W*x +(B-y))), np.mean(W*x +(B-y))

In [39]:
%%time

# Parameter Set
epochs = 500
learning_rate = 0.005

# 학습 (Training)
for cnt in range(0, epochs+1):
    if cnt % (epochs//20) == 0:
        print("[{0[0]:>5}] cost = {0[1]:>10.4}, W = {1[0]:0>7.4}, B = {1[1]:>7.4}".format([cnt, Cost()], [W, B]))

    #갱신하는 부분
    grad_w, grad_b = Gradient(x_input, labels)
    W -= learning_rate * grad_w
    B -= learning_rate * grad_b

[    0] cost =  2.254e-08, W = 00002.0, B =  0.9997
[   25] cost =  2.133e-08, W = 00002.0, B =  0.9997
[   50] cost =  2.029e-08, W = 00002.0, B =  0.9997
[   75] cost =  1.921e-08, W = 00002.0, B =  0.9997
[  100] cost =  1.825e-08, W = 00002.0, B =  0.9997
[  125] cost =  1.732e-08, W = 00002.0, B =  0.9997
[  150] cost =  1.646e-08, W = 00002.0, B =  0.9997
[  175] cost =  1.565e-08, W = 00002.0, B =  0.9997
[  200] cost =  1.481e-08, W = 00002.0, B =  0.9997
[  225] cost =  1.403e-08, W = 00002.0, B =  0.9997
[  250] cost =  1.331e-08, W = 00002.0, B =  0.9998
[  275] cost =  1.264e-08, W = 00002.0, B =  0.9998
[  300] cost =  1.201e-08, W = 00002.0, B =  0.9998
[  325] cost =  1.138e-08, W = 00002.0, B =  0.9998
[  350] cost =  1.078e-08, W = 00002.0, B =  0.9998
[  375] cost =  1.018e-08, W = 00002.0, B =  0.9998
[  400] cost =  9.692e-09, W = 00002.0, B =  0.9998
[  425] cost =  9.198e-09, W = 00002.0, B =  0.9998
[  450] cost =  8.744e-09, W = 00002.0, B =  0.9998
[  475] cost

In [40]:
%%time

epochs = 500
learning_rate = 0.01

for cnt in range(0,epochs+1):
    if cnt%20 == 0:
        print(f"[{cnt}]  cost = {Cost():.5f}, W = {W:.5f}, B = {B:.5f}")
    
    grad_w, grad_b = Gradient(x_input,labels)
    W -= (grad_w)*learning_rate
    B -= (grad_b)*learning_rate

W, B

[0]  cost = 0.00000, W = 2.00003, B = 0.99981
[20]  cost = 0.00000, W = 2.00003, B = 0.99982
[40]  cost = 0.00000, W = 2.00003, B = 0.99982
[60]  cost = 0.00000, W = 2.00002, B = 0.99983
[80]  cost = 0.00000, W = 2.00002, B = 0.99984
[100]  cost = 0.00000, W = 2.00002, B = 0.99984
[120]  cost = 0.00000, W = 2.00002, B = 0.99985
[140]  cost = 0.00000, W = 2.00002, B = 0.99986
[160]  cost = 0.00000, W = 2.00002, B = 0.99986
[180]  cost = 0.00000, W = 2.00002, B = 0.99987
[200]  cost = 0.00000, W = 2.00002, B = 0.99987
[220]  cost = 0.00000, W = 2.00002, B = 0.99988
[240]  cost = 0.00000, W = 2.00002, B = 0.99988
[260]  cost = 0.00000, W = 2.00002, B = 0.99989
[280]  cost = 0.00000, W = 2.00002, B = 0.99989
[300]  cost = 0.00000, W = 2.00001, B = 0.99990
[320]  cost = 0.00000, W = 2.00001, B = 0.99990
[340]  cost = 0.00000, W = 2.00001, B = 0.99991
[360]  cost = 0.00000, W = 2.00001, B = 0.99991
[380]  cost = 0.00000, W = 2.00001, B = 0.99991
[400]  cost = 0.00000, W = 2.00001, B = 0.9999

(2.0000096173300874, 0.9999331893981257)