# Linear Regression

>### [예제 7] Multi Variable Linear Regression (TensorFlow)

>### Load modules

In [1]:
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
import pandas as pd

print("NumPy Version :{}".format(np.__version__))
print("TensorFlow Version :{}".format(tf.__version__))
print("Matplotlib Version :{}".format(plt.matplotlib.__version__))
print("Pandas Version :{}".format(pd.__version__))

NumPy Version :1.19.5
TensorFlow Version :2.4.0
Matplotlib Version :3.2.2
Pandas Version :1.1.5


In [2]:
colab=True
try:
  from google.colab import drive
except:
  colab =False
if colab : 
    drive.mount('/content/drive')
    print('g-drive mounted.')
else : print('local drive.')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
g-drive mounted.


In [3]:
if colab : 
  file_path = '/content/drive/My Drive/files/blood.csv'
else : 
  files_path = 'c:/files/blood.csv'

In [4]:
df = pd.read_csv(file_path)

df['DIS'].replace([1,2,3,4],[1,0,1,0], inplace=True )

df.sort_values(by=['DIS','BTH_G'], inplace=True)

In [5]:
input = np.array(df[['BTH_G', 'BMI', 'FBS']], dtype= np.float32)
target = np.array(df['DIS'], dtype= np.float32).reshape(-1,1)

In [17]:
# DIS열의 0,1 개수 확인
dis = df['DIS']
print(dis.value_counts())
dis.value_counts()[0]

0    903488
1     96512
Name: DIS, dtype: int64


903488

In [7]:
# BAL: 0, 1 label 개수가 균형 => True
BAL = True
if BAL == False:
  x_input = input[::5000]
  labels = target[::5000]
else:
  x_input = np.zeros((200,3),dtype=np.float32)
  labels = np.zeros((200,1),dtype=np.float32)

  x_input[:100] = input[:300000:3000]
  x_input[100:200] = input[900000:1000000:1000] 

  labels[:100] = target[:300000:3000] 
  labels[100:200] = target[900000:1000000:1000] 

> ### Input and Label

In [9]:
#Input(AGE) and Labels(BP)
# x_input = tf.constant([[25,22],[25,26],[25,30],[35,22],[35,26],[35,30],[45,22],[45,26],[45,30],[55,22],[55,26],[55,30],[65,22],[65,26],[65,30],[73,22],[73,26],[73,30]], dtype= tf.float32)
# labels = tf.constant([[118],[125],[130],[118],[126],[123],[120],[124],[130],[122],[125],[130],[127],[130],[130],[125.5],[130],[138]], dtype= tf.float32)

W = tf.Variable(tf.random.normal((3, 1)), dtype=tf.float32)
B = tf.Variable(tf.random.normal(()), dtype=tf.float32)

In [10]:
# Min Max Scaler
x_input_org = x_input # 나중에 원본데이터 확인을 위해 백업
x_min, x_max = np.min(x_input, axis=0), np.max(x_input, axis=0)
print(x_min.shape)
x_input = (x_input-x_min)/(x_max-x_min)
# x_input.shape, x_input

(3,)


>### Hypothesis : Linear Equation (Multi-Variable)
>### $X = \begin{bmatrix} x_{11} & x_{12} \\ x_{21} & x_{22} \\ x_{31} & x_{32} \\ \end{bmatrix}$
>### $W = \begin{bmatrix} w_{1} \\ w_{2} \\\end{bmatrix}$
>### $H(x) = XW + b$

In [11]:
# Hypothesis
def Hypothesis(x):
  return tf.sigmoid(tf.matmul(x ,W) + B)

>### Cost Function : Mean Squared Error (MSE)
>### $\sum_{i=1}^{n}(h(x_{i})-y_{i})^{2}$

In [12]:
def Cost():
  return -tf.reduce_mean(labels * tf.math.log(Hypothesis(x_input)) + (1 - labels) * tf.math.log(1 - Hypothesis(x_input)))

### 학습 (Training)

In [13]:
%%time
# Parameter Set
epochs = 10000
learning_rate = 0.1
optimizer = tf.keras.optimizers.SGD(learning_rate=learning_rate)

training_idx = np.arange(0, epochs+1, 1)
cost_graph = np.zeros(epochs+1)
check = np.array([0, epochs*0.01, epochs*0.05, epochs*0.10, epochs*0.4, epochs])

W_trained = []
b_trained = []
check_idx = 0

# 학습 (Training)
for cnt in range(0, epochs+1):
  cost_graph[cnt] = Cost()
  if cnt % (epochs/20) == 0:
    print("[{:>5}] cost = {:>10.4}, W = [[{:>7.4}] [{:>7.4}] [{:>7.4}]], B = [[{:>7.4}]]".format(cnt, cost_graph[cnt], W[0][0], W[1][0],W[2][0], B.numpy()))  
  if check[check_idx] == cnt:
    W_trained.append(W.numpy())
    b_trained.append(B.numpy())
    check_idx += 1

  optimizer.minimize(Cost,[W, B])

[    0] cost =     0.8356, W = [[ 0.1288] [-0.9864] [  1.462]], B = [[ -1.282]]
[  500] cost =     0.3439, W = [[  3.424] [-0.1211] [  2.435]], B = [[ -1.941]]
[ 1000] cost =     0.2777, W = [[  4.801] [ 0.2808] [  3.013]], B = [[ -2.866]]
[ 1500] cost =     0.2475, W = [[  5.654] [  0.638] [  3.491]], B = [[ -3.507]]
[ 2000] cost =     0.2301, W = [[  6.247] [ 0.9589] [  3.902]], B = [[   -4.0]]
[ 2500] cost =     0.2186, W = [[  6.688] [   1.25] [  4.264]], B = [[ -4.402]]
[ 3000] cost =     0.2105, W = [[   7.03] [  1.516] [  4.588]], B = [[ -4.743]]
[ 3500] cost =     0.2043, W = [[  7.305] [  1.761] [  4.882]], B = [[ -5.038]]
[ 4000] cost =     0.1994, W = [[  7.532] [  1.987] [   5.15]], B = [[ -5.299]]
[ 4500] cost =     0.1955, W = [[  7.724] [  2.196] [  5.397]], B = [[ -5.532]]
[ 5000] cost =     0.1923, W = [[  7.888] [   2.39] [  5.625]], B = [[ -5.743]]
[ 5500] cost =     0.1896, W = [[  8.031] [  2.571] [  5.838]], B = [[ -5.935]]
[ 6000] cost =     0.1873, W = [[  8.158

>### Hypothesis Test 

In [14]:
# Training 결과 Test 및 Prediction

print("[Training Test]")
y = Hypothesis(x_input)
y = y.numpy().reshape((-1,))
Y = [np.float32(y_elem>0.5) for y_elem in y]
for idx in range(x_input.shape[0]):
    print("Input {} , Target : {} => Y :{:>2}(y:{:>5.2})".format(x_input[idx], labels[idx], Y[idx], y[idx]))

[Training Test]
Input [0.         0.021978   0.12547529] , Target : [0.] => Y :0.0(y:0.0021)
Input [0.         0.30219778 0.15969582] , Target : [0.] => Y :0.0(y:0.0077)
Input [0.         0.3076922  0.12547529] , Target : [0.] => Y :0.0(y:0.0062)
Input [0.         0.88461524 0.10266159] , Target : [0.] => Y :0.0(y:0.044)
Input [0.         0.24725273 0.1444867 ] , Target : [0.] => Y :0.0(y:0.0057)
Input [0.         0.27472526 0.15589353] , Target : [0.] => Y :0.0(y:0.0068)
Input [0.         0.23076916 0.07984791] , Target : [0.] => Y :0.0(y:0.0033)
Input [0.         0.13736263 0.07984791] , Target : [0.] => Y :0.0(y:0.0023)
Input [0.         0.13186811 0.05323194] , Target : [0.] => Y :0.0(y:0.0019)
Input [0.03846154 0.48351642 0.7946768 ] , Target : [0.] => Y :1.0(y: 0.69)
Input [0.03846154 0.29120874 0.06463879] , Target : [0.] => Y :0.0(y:0.0052)
Input [0.03846154 0.40659338 0.11026616] , Target : [0.] => Y :0.0(y:0.011)
Input [0.03846154 0.4120879  0.09505703] , Target : [0.] => Y :

In [15]:
# predict
def predict(x):
  return Hypothesis((x-x_min)/(x_max-x_min))