In [1]:
import tensorflow as tf
import numpy as np

## Multi-variable linear regression
    - 다변량 선형 회귀함수

* Matrix multiplication 이용 (dot product: 내적)


![image.png](attachment:image.png)
     - 이론과 표현방법에서 차이가 있음 (내적 계산법 때문)

### Example : 2 variables with Matrix

In [13]:
x_data = [
    [1., 0., 3., 0., 5.],
    [0., 2., 0., 4., 0.]
]
y_data  = [1, 2, 3, 4, 5]

W = tf.Variable(tf.random.uniform((1, 2), -1.0, 1.0))
b = tf.Variable(tf.random.uniform((1,), -1.0, 1.0))

learning_rate = tf.Variable(0.001)

print("     i, cost.numpy(), W.numpy()[0][0], W.numpy()[0][1], b.numpy()[0]\n")
for i in range(1000+1):
    with tf.GradientTape() as tape:
        hypothesis = tf.matmul(W, x_data) + b       # (1, 2) * (2, 5) = (1, 5)
        cost = tf.reduce_mean(tf.square(hypothesis - y_data))

        W_grad, b_grad = tape.gradient(cost, [W, b])
        W.assign_sub(learning_rate * W_grad)
        b.assign_sub(learning_rate * b_grad)
    
    if i % 50 == 0:
        print("{:5} | {:13.6f} | {:13.4f} | {:13.4f} | {:13.6f}".format(
            i, cost.numpy(), W.numpy()[0][0], W.numpy()[0][1], b.numpy()[0]))

     i, cost.numpy(), W.numpy()[0][0], W.numpy()[0][1], b.numpy()[0]

    0 |      1.871500 |        0.5008 |        0.7113 |      0.109362
   50 |      0.444546 |        0.7327 |        0.7912 |      0.190253
  100 |      0.117729 |        0.8401 |        0.8392 |      0.228519
  150 |      0.040049 |        0.8899 |        0.8687 |      0.245941
  200 |      0.020316 |        0.9130 |        0.8873 |      0.253042
  250 |      0.014689 |        0.9239 |        0.8993 |      0.254958
  300 |      0.012747 |        0.9293 |        0.9073 |      0.254228
  350 |      0.011867 |        0.9321 |        0.9129 |      0.252129
  400 |      0.011331 |        0.9339 |        0.9168 |      0.249316
  450 |      0.010921 |        0.9351 |        0.9198 |      0.246131
  500 |      0.010565 |        0.9362 |        0.9220 |      0.242758
  550 |      0.010236 |        0.9371 |        0.9239 |      0.239298
  600 |      0.009924 |        0.9381 |        0.9255 |      0.235808
  650 |      0.00962

### Hypothesis without b
- b 삭제하는 대신 변수 2개이지만 열(column) 추가

In [19]:
x_data = [
    [1., 1., 1., 1., 1.], # bias(b)
    [1., 0., 3., 0., 5.], 
    [0., 2., 0., 4., 0.]
]
y_data  = [1, 2, 3, 4, 5]

W = tf.Variable(tf.random.uniform((1, 3), -1.0, 1.0)) # [1, 3]으로 변경하고, b 삭제

learning_rate = 0.001
optimizer = tf.keras.optimizers.SGD(learning_rate)

print("     i, cost.numpy(), W.numpy()[0][0], W.numpy()[0][1], b.numpy()[0]\n")
for i in range(1000+1):
    with tf.GradientTape() as tape:
        hypothesis = tf.matmul(W, x_data) # b가 없다
        cost = tf.reduce_mean(tf.square(hypothesis - y_data))

    grads = tape.gradient(cost, [W])
    optimizer.apply_gradients(grads_and_vars=zip(grads,[W]))
    if i % 50 == 0:
        print("{:5} | {:13.6f} | {:13.4f} | {:13.4f} | {:13.4f}".format(
            i, cost.numpy(), W.numpy()[0][0], W.numpy()[0][1], W.numpy()[0][2]))

     i, cost.numpy(), W.numpy()[0][0], W.numpy()[0][1], b.numpy()[0]

    0 |      8.116693 |       -0.7343 |        0.2426 |        0.6033
   50 |      1.963493 |       -0.5459 |        0.7065 |        0.7966
  100 |      0.516823 |       -0.4479 |        0.9183 |        0.9124
  150 |      0.158433 |       -0.3948 |        1.0138 |        0.9828
  200 |      0.062294 |       -0.3644 |        1.0558 |        1.0260
  250 |      0.033582 |       -0.3456 |        1.0736 |        1.0525
  300 |      0.023830 |       -0.3330 |        1.0804 |        1.0687
  350 |      0.019996 |       -0.3237 |        1.0823 |        1.0785
  400 |      0.018208 |       -0.3163 |        1.0822 |        1.0843
  450 |      0.017188 |       -0.3101 |        1.0813 |        1.0875
  500 |      0.016479 |       -0.3045 |        1.0801 |        1.0890
  550 |      0.015905 |       -0.2993 |        1.0788 |        1.0895
  600 |      0.015393 |       -0.2944 |        1.0775 |        1.0893
  650 |      0.01491

## Custom Gradient
* tf.train.GradientDescentOptimizer(): optimizer
* optimizer.apply_gradients(): update

In [23]:
# Multi-variable linear regression (1)

X = tf.constant([[1., 2.], 
                 [3., 4.]])
y = tf.constant([[1.5], [3.5]])

W = tf.Variable(tf.random.normal((2, 1)))      # 임의의 값 또는 random으로 부여
b = tf.Variable(tf.random.normal((1,)))

# Create an optimizer
optimizer = tf.keras.optimizers.SGD(learning_rate=0.01)

n_epoch = 1000+1
print("epoch | cost")
for i in range(n_epoch):
    
    # Use tf.GradientTape() to record the gradient of the cost function
    with tf.GradientTape() as tape:
        y_pred = tf.matmul(X, W) + b
        cost = tf.reduce_mean(tf.square(y_pred - y))

    # calculates the gradients of the loss
    grads = tape.gradient(cost, [W, b])
    
    # updates parameters (W and b)
    optimizer.apply_gradients(grads_and_vars=zip(grads, [W, b]))
    if i % 50 == 0:
        print("{:5} | {:10.6f}".format(i, cost.numpy()))

epoch | cost
    0 |   3.360607
   50 |   0.018925
  100 |   0.012939
  150 |   0.008847
  200 |   0.006049
  250 |   0.004135
  300 |   0.002827
  350 |   0.001933
  400 |   0.001322
  450 |   0.000904
  500 |   0.000618
  550 |   0.000422
  600 |   0.000289
  650 |   0.000197
  700 |   0.000135
  750 |   0.000092
  800 |   0.000063
  850 |   0.000043
  900 |   0.000030
  950 |   0.000020
 1000 |   0.000014


## 예제) Predicting exam score
    - regression using three inputs (x1, x2, x3)
![image.png](attachment:image.png)

In [32]:
tf.random.set_seed(0)

##### 변수
x1 = [ 73.,  93.,  89.,  96.,  73.]

x2 = [ 80.,  88.,  91.,  98.,  66.]

x3 = [ 75.,  93.,  90., 100.,  70.]

Y  = [152., 185., 180., 196., 142.]

##### weights
w1 = tf.Variable(10.)

w2 = tf.Variable(10.)

w3 = tf.Variable(10.)

b  = tf.Variable(10.)

##### 가설
hypothesis = w1 * x1 +  w2 * x2 + w3 * x3 + b

In [33]:
# Matrix 사용하지 않고, 각각 w를 부여함 (업데이트 할 때도 각각 정의해야함)

# data and label
x1 = [ 73.,  93.,  89.,  96.,  73.]
x2 = [ 80.,  88.,  91.,  98.,  66.]
x3 = [ 75.,  93.,  90., 100.,  70.]
Y  = [152., 185., 180., 196., 142.]

# weights
w1 = tf.Variable(10.)           # w1 = tf.Variable(tf.random.normal((1,))) 와 동일
w2 = tf.Variable(10.)
w3 = tf.Variable(10.)
b  = tf.Variable(10.)

learning_rate = 0.000001

print("    i,      cost\n")
for i in range(1000+1):
    # tf.GradientTape() to record the gradient of the cost function
    with tf.GradientTape() as tape:
        hypothesis = w1 * x1 +  w2 * x2 + w3 * x3 + b
        cost = tf.reduce_mean(tf.square(hypothesis - Y))
    # calculates the gradients of the cost
    w1_grad, w2_grad, w3_grad, b_grad = tape.gradient(cost, [w1, w2, w3, b])
    
    # update w1,w2,w3 and b
    w1.assign_sub(learning_rate * w1_grad)
    w2.assign_sub(learning_rate * w2_grad)
    w3.assign_sub(learning_rate * w3_grad)
    b.assign_sub(learning_rate * b_grad)

    if i % 50 == 0:
      print("{:5} | {:12.4f}".format(i, cost.numpy()))

    i,      cost

    0 | 5793889.5000
   50 |   64291.1484
  100 |     715.2902
  150 |       9.8462
  200 |       2.0152
  250 |       1.9252
  300 |       1.9210
  350 |       1.9177
  400 |       1.9145
  450 |       1.9114
  500 |       1.9081
  550 |       1.9050
  600 |       1.9018
  650 |       1.8986
  700 |       1.8955
  750 |       1.8923
  800 |       1.8892
  850 |       1.8861
  900 |       1.8829
  950 |       1.8798
 1000 |       1.8767


## Multi-variable linear regression : Matrix 사용
    - numpy 사용하여 matrix로 변수 x와 label y를 묶어줌
    - W = tf.Variable 에서도 행렬 (3, 1)로 선언
    - 가설 함수의 return값에 matmul 함수

In [35]:
data = np.array([
    # X1,   X2,    X3,   y
    [ 73.,  80.,  75., 152. ],
    [ 93.,  88.,  93., 185. ],
    [ 89.,  91.,  90., 180. ],
    [ 96.,  98., 100., 196. ],
    [ 73.,  66.,  70., 142. ]
], dtype=np.float32)

# slice data
X = data[:, :-1]
y = data[:, [-1]]

W = tf.Variable(tf.random.normal((3, 1)))
b = tf.Variable(tf.random.normal((1,)))

learning_rate = 0.000001

# hypothesis, prediction function
def predict(X):
    return tf.matmul(X, W) + b

print("epoch | cost")

n_epochs = 2000
for i in range(n_epochs+1):
    # tf.GradientTape() to record the gradient of the cost function
    with tf.GradientTape() as tape:
        cost = tf.reduce_mean((tf.square(predict(X) - y)))

    # calculates the gradients of the loss
    W_grad, b_grad = tape.gradient(cost, [W, b])

    # updates parameters (W and b)
    W.assign_sub(learning_rate * W_grad)
    b.assign_sub(learning_rate * b_grad)
    
    if i % 100 == 0:
        print("{:5} | {:10.4f}".format(i, cost.numpy()))

epoch | cost
    0 |  1798.2894
  100 |     2.2888
  200 |     2.0632
  300 |     2.0587
  400 |     2.0542
  500 |     2.0498
  600 |     2.0453
  700 |     2.0409
  800 |     2.0366
  900 |     2.0322
 1000 |     2.0279
 1100 |     2.0236
 1200 |     2.0194
 1300 |     2.0151
 1400 |     2.0108
 1500 |     2.0066
 1600 |     2.0024
 1700 |     1.9982
 1800 |     1.9940
 1900 |     1.9899
 2000 |     1.9857


In [41]:
# weight 값 확인
W.numpy()

array([[ 1.6652976 ],
       [ 0.59528816],
       [-0.25148827]], dtype=float32)

In [42]:
b.numpy()

array([1.0683131], dtype=float32)

In [45]:
# 가설함수 결과 = 실제 값 예측
tf.matmul(X, W) + b

<tf.Tensor: shape=(5, 1), dtype=float32, numpy=
array([[151.39648],
       [184.93794],
       [180.81708],
       [194.1263 ],
       [144.31987]], dtype=float32)>

## Predict

In [50]:
# labels, 실제값
Y 

[152.0, 185.0, 180.0, 196.0, 142.0]

In [51]:
predict(X).numpy() # prediction, 예측값

array([[151.39648],
       [184.93794],
       [180.81708],
       [194.1263 ],
       [144.31987]], dtype=float32)

In [52]:
# 새로운 데이터에 대한 예측

predict([[ 89.,  95.,  92.],[ 84.,  92.,  85.]]).numpy()

array([[182.69525],
       [174.3433 ]], dtype=float32)