### 빅데이터 활용 AI 설계
# 경사하강법 (Gradient Descent)

In [1]:
import numpy as np
import matplotlib.pyplot as plt

### 입력값이 1개일 때의 선형회귀

In [12]:
X = [1,2]
y = [2,3]

$$ \hat{y} = [w+b, 2w+b] \\
Loss = {1 \over 2} ((w+b-2)^2 + (2w+b-3)^2) \\
 = {1 \over 2} (5w^2 + 2b^2 + 13 + 6wb -16w -10b) $$
 
$$ {\partial Loss \over \partial w} = {1 \over 2} (10w + 6b - 16) = 5w + 3b -8 \\
{\partial Loss \over \partial b} = {1 \over 2} (4b + 6w - 10) = 3w + 2b -5 $$

$$ w = w - lr \cdot {\partial Loss \over \partial w} \\
b = b - lr \cdot {\partial Loss \over \partial b} $$

In [13]:
w = 0
b = 0
lr = 0.1

In [7]:
### 첫번째 값 수정
dw = -lr * (5*w+3*b-8)
db = -lr * (3*w+2*b-5)
w += dw
b += db

w,b

(0.8, 0.5)

In [14]:
for i in range(500):
    dw = -lr * (5*w+3*b-8)
    db = -lr * (3*w+2*b-5)
    w += dw
    b += db
    print(i, w, b)

0 0.8 0.5
1 1.05 0.6599999999999999
2 1.127 0.7129999999999999
3 1.1496000000000002 0.7322999999999998
4 1.15511 0.7409599999999998
5 1.155267 0.7462349999999998
6 1.153763 0.7504078999999998
7 1.1517591300000003 0.7541974199999998
8 1.1496203390000002 0.7578301969999998
9 1.1474611104 0.7613780558999997
10 1.14531713843 0.7648641115999998
11 1.143199335735 0.7682961477509999
12 1.1411108235422 0.7716771174802999
13 1.1390522765270101 0.7750084469215799
14 1.1370236041870312 0.7782910745791608
15 1.1350244797197673 0.7815257784072193
16 1.1330545063377178 0.7847132788098452
17 1.1311132695259052 0.7878542711465609
18 1.1292003534189843 0.7909494360594772
19 1.127315345891649 0.7939994428218865
20 1.1254578400992585 0.7970049504900145
21 1.123627434902625 0.7999666083622341
22 1.1218237349426423 0.8028850562189999
23 1.1200463506056213 0.8057609244924072
24 1.1182948979550884 0.8085948344122393
25 1.1165689986538725 0.8113873981432649
26 1.1148682798839569 0.8141392189184502
27 1.113192

325 1.0014180731215951 0.9977055094907263
326 1.0013973837135797 0.9977389856561025
327 1.001376996159959 0.9977719734108081
328 1.001356906056737 0.9978044798806588
329 1.001337109064171 0.9978365120875059
330 1.0013176009058338 0.9978680769507534
331 1.0012983773676907 0.9978991812888526
332 1.0012794342971896 0.9979298318207749
333 1.0012607676023624 0.997960035167463
334 1.0012423732509421 0.9979897978532617
335 1.0012242472694926 0.9980191263073267
336 1.0012063857425484 0.9980480268650136
337 1.0011887848117702 0.9980765057692463
338 1.0011714406751113 0.9981045691718661
339 1.0011543495859958 0.9981322231349595
340 1.0011375078525102 0.9981594736321688
341 1.0011209118366045 0.998186326549982
342 1.0011045579533076 0.9982127876890043
343 1.0010884426699524 0.9982388627652112
344 1.0010725625054129 0.9982645574111833
345 1.0010569140293515 0.9982898771773228
346 1.001041493861479 0.9983148275330528
347 1.0010262986708236 0.9983394138679985
348 1.0010113251750121 0.998363641493151

### 선형회귀 일반해

$$ \hat{y}_i = \sum_j (w_j \cdot x_{ij}) + b \\
Loss_i = (y_i - \hat{y}_i)^2 \\
 = (y_i - \sum_j (w_j \cdot x_{ij}) - b)^2 \\
Loss = {1 \over N} \sum_i (y_i - \sum_j (w_j \cdot x_{ij}) - b)^2 $$

$$ {\partial Loss \over \partial w_j} = -{2 \over N} \sum_i (y_i - \hat{y}_i) \cdot x_{ij} \\
{\partial Loss \over \partial b} = -{2 \over N} \sum_i (y_i - \hat{y}_i) $$

In [15]:
import numpy.linalg as lin

from sklearn.datasets import load_iris

In [None]:
iris = load_iris()

In [29]:
X = iris.data[:,:3]
Y = iris.data[:,[3]]

In [38]:
W = [[0],[0],[0]]
b = 0

In [20]:
pred_y = X @ W + b
pred_y.shape

(150, 1)

In [27]:
lr = 0.1

dW = lr * 2/len(X) * (X.T @ (Y-pred_y))
db = lr * 2/len(X) * np.sum(Y-pred_y)
dW, db

(array([[1.50353333],
        [0.70870667],
        [1.15862667]]), 0.23973333333333338)

In [48]:
W = [[0],[0],[0]]
b = 0
lr = 0.01

for i in range(50000):
    pred_y = X @ W + b
    
    dW = lr * 2/len(X) * (X.T @ (Y-pred_y))
    db = lr * 2/len(X) * np.sum(Y-pred_y)
    
    W += dW
    b += db
    loss = ((Y-pred_y)**2).sum()/len(Y)
    
    if i%1000==0: print('%05d> Loss=%.7f' % (i,loss), W.ravel(), b)
        
print('%05d> Loss=%.7f' % (i,loss), W.ravel(), b)

00000> Loss=2.0153333 [0.15035333 0.07087067 0.11586267] 0.023973333333333336
01000> Loss=0.0372840 [-0.15531449  0.11219354  0.48849655] -0.07072938119012089
02000> Loss=0.0361124 [-0.20986612  0.18742747  0.51988612] -0.10025360348510202
03000> Loss=0.0359727 [-0.22376833  0.21059521  0.52847014] -0.12229623455047198
04000> Loss=0.0359326 [-0.22597933  0.21826314  0.53041822] -0.14027897633693442
05000> Loss=0.0359084 [-0.22500287  0.22124136  0.53048921] -0.1554689376159767
06000> Loss=0.0358910 [-0.22331204  0.22273619  0.53007624] -0.16846161638499474
07000> Loss=0.0358781 [-0.22160526  0.22370766  0.52958074] -0.1796235240788863
08000> Loss=0.0358685 [-0.22006158  0.224451    0.52911279] -0.18922708932192006
09000> Loss=0.0358614 [-0.21871052  0.22506357  0.52869767] -0.19749415308223706
10000> Loss=0.0358562 [-0.21754072  0.22558292  0.52833663] -0.20461197571894238
11000> Loss=0.0358523 [-0.21653155  0.22602772  0.52802468] -0.21074069121229652
12000> Loss=0.0358494 [-0.2156620

- 정답 : [-0.21027133,  0.22877721,  0.52608818], -0.2487235860244572

### 로지스틱 회귀

In [49]:
def sigmoid(t):
    return 1/(1+np.exp(-t))

$$ \sigma(t) = { 1 \over 1+e^{-t} } \\
\sigma'(t) = { e^{-t} \over (1+e^{-t})^2 } = \sigma (1 - \sigma) $$
<br>

$$ u_i = \sum_j (w_j \cdot x_{ij}) + b \\
\hat{y}_i = \sigma(u_i) \\
Loss_i = (y_i - \hat{y}_i)^2 \\
 = (y_i - \sigma(u_i))^2 \\
Loss = {1 \over N} \sum_i (y_i - \sigma(u_i))^2 $$
<br>

$$ {\partial Loss \over \partial w_j} = -{2 \over N} \sum_i (y_i - \sigma(u_i)) \cdot \sigma'(u_i) \cdot {\partial u_i \over \partial w_j} \\
 = -{2 \over N} \sum_i (y_i - \sigma(u_i)) \cdot \sigma(u_i) \cdot (1 - \sigma(u_i)) \cdot x_{ij} \\
 = -{2 \over N} \sum_i (y_i - \hat{y_i}) \cdot \hat{y_i} \cdot (1 - \hat{y_i}) \cdot x_{ij} \\
{\partial Loss \over \partial b} = -{2 \over N} \sum_i (y_i - \hat{y_i}) \cdot \hat{y_i} \cdot (1 - \hat{y_i}) $$

In [50]:
iris = load_iris()

In [57]:
X = iris.data[:100]
Y = iris.target[:100].reshape(-1,1)

X.shape, Y.shape

((100, 4), (100, 1))

In [68]:
W = [[0],[0],[0],[0]]
b = 0

In [59]:
pred_y = np.vectorize(sigmoid)(X @ W + b)
pred_y.shape

(100, 1)

In [60]:
lr = 0.01

dW = lr * 2/len(X) * (X.T @ ((Y-pred_y)*pred_y*(1-pred_y)))
db = lr * 2/len(X) * np.sum((Y-pred_y)*pred_y*(1-pred_y))
dW, db

(array([[ 0.0011625],
        [-0.00081  ],
        [ 0.003495 ],
        [ 0.0013525]]), 0.0)

In [75]:
W = [[0],[0],[0],[0]]
b = 0
lr = 0.01

for i in range(30000):
    pred_y = np.vectorize(sigmoid)(X @ W + b)
    
    dW = lr * 2/len(X) * (X.T @ ((Y-pred_y)*pred_y*(1-pred_y)))
    db = lr * 2/len(X) * np.sum((Y-pred_y)*pred_y*(1-pred_y))
    
    W += dW
    b += db
    loss = ((Y-pred_y)**2).sum()/len(Y)
    
    if i%1000==0: print('%05d> Loss=%.7f' % (i,loss), W.ravel(), b)
        
print('%05d> Loss=%.7f' % (i,loss), W.ravel(), b)

00000> Loss=0.2500000 [ 0.0011625 -0.00081    0.003495   0.0013525] 0.0
01000> Loss=0.0255751 [-0.17673589 -0.65090705  0.99700758  0.42833903] -0.11374921900840183
02000> Loss=0.0124896 [-0.2278973  -0.83047181  1.27035285  0.54982062] -0.1460603206590239
03000> Loss=0.0082614 [-0.2568841  -0.93343607  1.42787308  0.62089009] -0.164504072288996
04000> Loss=0.0061888 [-0.27704494 -1.00555742  1.53854178  0.6713531 ] -0.1773552755157711
05000> Loss=0.0049596 [-0.29248337 -1.06104342  1.62387872  0.71059024] -0.18719650704544286
06000> Loss=0.0041456 [-0.3049884  -1.10613118  1.69335483  0.7427556 ] -0.19516203425897136
07000> Loss=0.0035665 [-0.31549741 -1.14410714  1.75196885  0.77005271] -0.20184885081906243
08000> Loss=0.0031331 [-0.32456178 -1.17691354  1.80267876  0.79379108] -0.20760915051004278
09000> Loss=0.0027963 [-0.33253282 -1.20579304  1.847379    0.81481255] -0.21266780614713432
10000> Loss=0.0025269 [-0.33964794 -1.23158847  1.88735555  0.83369055] -0.2171770049253904
110

- LogisticRegression 적용

In [65]:
from sklearn.linear_model import LogisticRegression

In [73]:
model = LogisticRegression()
model.fit(X, Y.ravel())

LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
          intercept_scaling=1, max_iter=100, multi_class='ovr', n_jobs=1,
          penalty='l2', random_state=None, solver='liblinear', tol=0.0001,
          verbose=0, warm_start=False)

In [74]:
model.coef_, model.intercept_

(array([[-0.40731745, -1.46092371,  2.24004724,  1.00841492]]),
 array([-0.26048137]))