# 02. Multi-variable Linear Regression

In [1]:
import tensorflow as tf
import numpy as np

tf.__version__

'2.5.0'

## 2-1. Simple Example (2 variables)

In [2]:
tf.random.set_seed(1234)        # for reproducibility

In [3]:
x1_data = [1, 0, 3, 0, 5]
x2_data = [0, 2, 0, 4, 0]
y_data  = [1, 2, 3, 4, 5]

# tf.random.uniform(shape=[1], minval=-10.0, maxval=10.0)
W1 = tf.Variable(tf.random.uniform([1], -10.0, 10.0))   
W2 = tf.Variable(tf.random.uniform([1], -10.0, 10.0))
b  = tf.Variable(tf.random.uniform([1], -10.0, 10.0))

In [4]:
learning_rate = tf.Variable(0.001)

for i in range(1000+1):
    with tf.GradientTape() as tape:
        hypothesis = W1 * x1_data + W2 * x2_data + b
        cost = tf.reduce_mean(tf.square(hypothesis - y_data))
    W1_grad, W2_grad, b_grad = tape.gradient(cost, [W1, W2, b])
    W1.assign_sub(learning_rate * W1_grad)
    W2.assign_sub(learning_rate * W2_grad)
    b.assign_sub(learning_rate * b_grad)

    if i % 100 == 0:
        print("{:5} | {:10.6f} | {:10.4f} | {:10.4f} | {:10.6f}".format(
          i, cost.numpy(), W1.numpy()[0], W2.numpy()[0], b.numpy()[0]))

    0 |  62.234009 |     0.7571 |    -3.4614 |   1.957940
  100 |  12.134546 |     0.4957 |    -1.3739 |   2.437738
  200 |   3.236942 |     0.3824 |    -0.4835 |   2.586154
  300 |   1.602640 |     0.3436 |    -0.0952 |   2.600542
  400 |   1.253230 |     0.3385 |     0.0820 |   2.560854
  500 |   1.134123 |     0.3478 |     0.1702 |   2.499999
  600 |   1.059052 |     0.3629 |     0.2205 |   2.431620
  700 |   0.994950 |     0.3801 |     0.2547 |   2.361372
  800 |   0.935795 |     0.3980 |     0.2815 |   2.291592
  900 |   0.880347 |     0.4157 |     0.3050 |   2.223218
 1000 |   0.828216 |     0.4332 |     0.3267 |   2.156609


## 2-2. Simple Example (2 variables with Matrix)

In [5]:
x_data = [[1., 0., 3., 0., 5.],
          [0., 2., 0., 4., 0.]]
y_data  = [1, 2, 3, 4, 5]

print('x_data dim.:', tf.shape(x_data).numpy())
print('y_data dim.:', tf.shape(y_data).numpy())

x_data dim.: [2 5]
y_data dim.: [5]


In [6]:
# W 차원 (1, 2) : W * (2, 5)  = (1, 5) 
W = tf.Variable(tf.random.uniform([1, 2], -1.0, 1.0))
b = tf.Variable(tf.random.uniform([1], -1.0, 1.0))

In [7]:
print(W.numpy())
print(b.numpy())

[[0.81983495 0.33535123]]
[-0.9917586]


In [8]:
learning_rate = tf.Variable(0.001)

for i in range(1000+1):
    with tf.GradientTape() as tape:
        hypothesis = tf.matmul(W, x_data) + b               # (1, 2) * (2, 5) = (1, 5)
        cost = tf.reduce_mean(tf.square(hypothesis - y_data))

        W_grad, b_grad = tape.gradient(cost, [W, b])
        W.assign_sub(learning_rate * W_grad)
        b.assign_sub(learning_rate * b_grad)
    
    if i % 100 == 0:
        print("{:5} | {:10.6f} | {:10.4f} | {:10.4f} | {:10.6f}".format(
            i, cost.numpy(), W.numpy()[0][0], W.numpy()[0][1], b.numpy()[0]))

    0 |   5.203093 |     0.8259 |     0.3430 |  -0.987531
  100 |   0.717020 |     1.1153 |     0.8421 |  -0.733672
  200 |   0.165334 |     1.1583 |     1.0409 |  -0.638880
  300 |   0.076656 |     1.1571 |     1.1196 |  -0.593822
  400 |   0.058726 |     1.1503 |     1.1492 |  -0.566144
  500 |   0.052911 |     1.1442 |     1.1586 |  -0.545225
  600 |   0.049367 |     1.1390 |     1.1596 |  -0.527248
  700 |   0.046372 |     1.1344 |     1.1573 |  -0.510745
  800 |   0.043614 |     1.1302 |     1.1536 |  -0.495121
  900 |   0.041029 |     1.1262 |     1.1494 |  -0.480125
 1000 |   0.038600 |     1.1224 |     1.1451 |  -0.465647


## 2-3. Simple Example (Hypothesis without b)

- 별도의 bias 변수를 설정하지 않고, W 가중치 변수에 반영.
- tf1.x 버전에서 optimizer로 사용된 tf.train.GradientDescentOptimizer는 tf.keras.optimizers.SGD로 변경됨

In [9]:
# 앞의 코드에서 bias(b)를 행렬에 추가
x_data = [[1., 1., 1., 1., 1.],     # bias를 '1'로 설정했으나, W가 곱해지고 열 원소가 더해지면
          [1., 0., 3., 0., 5.],     # 결과적으로 변수 b 원소가 더해진 것과 동일한 상황.
          [0., 2., 0., 4., 0.]]
y_data  = [1, 2, 3, 4, 5]

W = tf.Variable(tf.random.uniform([1, 3], -1.0, 1.0))  # [1, 3]으로 변경하고 X_data와 연산을 하면, bias가 반영됨.

learning_rate = 0.001
optimizer = tf.keras.optimizers.SGD(learning_rate)    #tf.1x버전의 tf.train.GradientDescentOptimizer를 대체

for i in range(1000+1):
    with tf.GradientTape() as tape:
        hypothesis = tf.matmul(W, x_data)                # bias 없음
        cost = tf.reduce_mean(tf.square(hypothesis - y_data))

    grads = tape.gradient(cost, [W])
    optimizer.apply_gradients(grads_and_vars=zip(grads,[W]))
    if i % 100 == 0:
        print("{:5} | {:10.6f} | {:10.4f} | {:10.4f} | {:10.4f}".format(
            i, cost.numpy(), W.numpy()[0][0], W.numpy()[0][1], W.numpy()[0][2]))

    0 |   0.793786 |    -0.2423 |     0.9282 |     0.6723
  100 |   0.113464 |    -0.1494 |     1.0171 |     0.8834
  200 |   0.020052 |    -0.1161 |     1.0288 |     0.9690
  300 |   0.004773 |    -0.1021 |     1.0278 |     1.0039
  400 |   0.002062 |    -0.0949 |     1.0257 |     1.0179
  500 |   0.001515 |    -0.0904 |     1.0242 |     1.0233
  600 |   0.001350 |    -0.0870 |     1.0231 |     1.0251
  700 |   0.001257 |    -0.0841 |     1.0222 |     1.0254
  800 |   0.001180 |    -0.0815 |     1.0215 |     1.0251
  900 |   0.001110 |    -0.0790 |     1.0208 |     1.0245
 1000 |   0.001044 |    -0.0766 |     1.0201 |     1.0238


## 2.4 Simple Example (Custom Gradient)

- tf1.x 버전에서 optimizer로 사용된 tf.train.GradientDescentOptimizer는 tf.keras.optimizers.SGD로 변경됨
- optimizer.apply_gradients(): update

In [10]:
# Multi-variable linear regression (1)

X = tf.constant([[1., 2.], 
                 [3., 4.]])
y = tf.constant([[1.5], [3.5]])

W = tf.Variable(tf.random.normal([2, 1]))
b = tf.Variable(tf.random.normal([1]))

# Create an optimizer
optimizer = tf.keras.optimizers.SGD(learning_rate=0.01)   #tf.1x버전의 tf.train.GradientDescentOptimizer를 대체

n_epoch = 1000+1
print("epoch | cost")
for i in range(n_epoch):
    # Use tf.GradientTape() to record the gradient of the cost function
    with tf.GradientTape() as tape:
        y_pred = tf.matmul(X, W) + b
        cost = tf.reduce_mean(tf.square(y_pred - y))

    # calculates the gradients of the loss
    grads = tape.gradient(cost, [W, b])
    
    # updates parameters (W and b)
    optimizer.apply_gradients(grads_and_vars=zip(grads, [W, b]))
    if i % 100 == 0:
        print("{:5} | {:10.6f}".format(i, cost.numpy()))

epoch | cost
    0 |  11.468436
  100 |   0.171258
  200 |   0.080056
  300 |   0.037423
  400 |   0.017494
  500 |   0.008178
  600 |   0.003823
  700 |   0.001787
  800 |   0.000835
  900 |   0.000390
 1000 |   0.000183


## 2-5. More Examples (Predicting exam score)

hypothesis = w1 * x1 +  w2 * x2 + w3 * x3 + b

In [11]:
# data and label
x1 = [ 73.,  93.,  89.,  96.,  73.]
x2 = [ 80.,  88.,  91.,  98.,  66.]
x3 = [ 75.,  93.,  90., 100.,  70.]
Y  = [152., 185., 180., 196., 142.]

# weights
w1 = tf.Variable(10.)
w2 = tf.Variable(10.)
w3 = tf.Variable(10.)
b  = tf.Variable(10.)

learning_rate = 0.000001

for i in range(1000+1):
    # tf.GradientTape() to record the gradient of the cost function
    with tf.GradientTape() as tape:
        hypothesis = w1 * x1 +  w2 * x2 + w3 * x3 + b
        cost = tf.reduce_mean(tf.square(hypothesis - Y))
    # calculates the gradients of the cost
    w1_grad, w2_grad, w3_grad, b_grad = tape.gradient(cost, [w1, w2, w3, b])
    
    # update w1,w2,w3 and b
    w1.assign_sub(learning_rate * w1_grad)
    w2.assign_sub(learning_rate * w2_grad)
    w3.assign_sub(learning_rate * w3_grad)
    b.assign_sub(learning_rate * b_grad)

    if i % 100 == 0:
      print("{:5} | {:12.4f}".format(i, cost.numpy()))

    0 | 5793889.5000
  100 |     715.2903
  200 |       2.0152
  300 |       1.9210
  400 |       1.9145
  500 |       1.9081
  600 |       1.9018
  700 |       1.8955
  800 |       1.8892
  900 |       1.8829
 1000 |       1.8767


- Matrix 및 hypothesis 함수 사용

In [12]:
data = np.array([
    # X1,   X2,    X3,   y
    [ 73.,  80.,  75., 152. ],
    [ 93.,  88.,  93., 185. ],
    [ 89.,  91.,  90., 180. ],
    [ 96.,  98., 100., 196. ],
    [ 73.,  66.,  70., 142. ]
    ], dtype=np.float32)

# slice data
X = data[:, :-1]
y = data[:, [-1]]

W = tf.Variable(tf.random.normal([3, 1]))
b = tf.Variable(tf.random.normal([1]))

learning_rate = 0.000001

# hypothesis, prediction function
def predict(X):
    return tf.matmul(X, W) + b

print("epoch | cost")

n_epochs = 2000
for i in range(n_epochs+1):
    # tf.GradientTape() to record the gradient of the cost function
    with tf.GradientTape() as tape:
        cost = tf.reduce_mean((tf.square(predict(X) - y)))

    # calculates the gradients of the loss
    W_grad, b_grad = tape.gradient(cost, [W, b])

    # updates parameters (W and b)
    W.assign_sub(learning_rate * W_grad)
    b.assign_sub(learning_rate * b_grad)
    
    if i % 100 == 0:
        print("{:5} | {:10.4f}".format(i, cost.numpy()))

epoch | cost
    0 |  3658.0347
  100 |    19.8423
  200 |    19.2920
  300 |    19.1901
  400 |    19.0889
  500 |    18.9882
  600 |    18.8881
  700 |    18.7885
  800 |    18.6894
  900 |    18.5908
 1000 |    18.4929
 1100 |    18.3954
 1200 |    18.2984
 1300 |    18.2020
 1400 |    18.1061
 1500 |    18.0108
 1600 |    17.9158
 1700 |    17.8215
 1800 |    17.7276
 1900 |    17.6343
 2000 |    17.5414


- predict

In [13]:
Y # labels, 실제값

[152.0, 185.0, 180.0, 196.0, 142.0]

In [14]:
predict(X).numpy() # prediction, 예측값

array([[156.95924],
       [180.79514],
       [182.22328],
       [198.26012],
       [136.05217]], dtype=float32)

In [15]:
# 새로운 데이터에 대한 예측

predict([[ 89.,  95.,  92.],[ 84.,  92.,  85.]]).numpy()

array([[188.56612],
       [179.72858]], dtype=float32)

- Reference : https://github.com/deeplearningzerotoall/TensorFlow/blob/master/tf_2.x/lab-04-1-Multi-variable-Linear-Regression--Regression-eager.ipynb