## 02_logistic_regression_diabetes

In [1]:
import tensorflow as tf
import numpy as np
tf.random.set_seed(5)
tf.__version__

'2.17.0'

In [6]:
# 데이터 불러오기
xy = np.loadtxt('data-03-diabetes.csv',delimiter=',',dtype=np.float32)
xy.shape   # (759, 9)

# 학습 데이터 분리 : 70% , 531개
x_train = xy[:531,:-1]  # X
y_train = xy[:531,[-1]] # Y
print(x_train.shape,y_train.shape)  # (531, 8) (531, 1)

# 검증 데이터 분리 : 30% , 228개
x_test = xy[531:,:-1]  # X
y_test = xy[531:,[-1]] # Y
print(x_test.shape,y_test.shape)  # (228, 8) (228, 1)

(531, 8) (531, 1)
(228, 8) (228, 1)


In [7]:
# 변수 초기화 : weight, bias
# H(X) = X*W + b
#   X   *  W     = Y
# (m,n) * (n,l) = (m,l)   : 행렬의 내적 곱셈 공식
# (531,8) * (8,1) = (531,1)
W = tf.Variable(tf.random.normal([8,1]),name='weight')
b = tf.Variable(tf.random.normal([1]),name='bias')    
W

<tf.Variable 'weight:0' shape=(8, 1) dtype=float32, numpy=
array([[-0.18030666],
       [-0.95028627],
       [-0.03964049],
       [-0.7425406 ],
       [ 1.3231523 ],
       [-0.61854804],
       [ 0.8540664 ],
       [-0.08899953]], dtype=float32)>

In [8]:
# 예측 함수(hypothesis) : H(X) = sigmoid(X*W + b)
def hypothesis(X):
    return tf.sigmoid(tf.matmul(X,W) + b)  

In [9]:
# 비용 함수 : logloss , 2진 분류 모델
def cost_func():
    cost = -tf.reduce_mean(y_train*tf.math.log(hypothesis(x_train)) +
                         (1-y_train)*tf.math.log(1-hypothesis(x_train)))
    return cost

In [11]:
# 경사 하강법
# learning_rate(학습율)을 0.01 로 설정하여 optimizer객체를 생성
# optimizer = tf.compat.v1.train.GradientDescentOptimizer(learning_rate=0.01)
optimizer = tf.compat.v1.train.AdamOptimizer(learning_rate=0.01)

In [12]:
# 학습 시작
print('****** Start Learning!!')
for step in range(10001):
    # cost를 minimize 한다
    optimizer.minimize(cost_func,var_list=[W,b])

    if step % 100 == 0:
        print('%04d'%step,'cost:[',cost_func().numpy(),']',
             ' W:',W.numpy(),' b:',b.numpy())

print('****** Learning Finished!!')

****** Start Learning!!
0000 cost:[ 0.8006095 ]  W: [[-0.19030663]
 [-0.9402866 ]
 [-0.02964057]
 [-0.7525406 ]
 [ 1.3131523 ]
 [-0.62854797]
 [ 0.8440664 ]
 [-0.09899952]]  b: [0.23652862]
0100 cost:[ 0.5272774 ]  W: [[-0.6510306 ]
 [-1.9078404 ]
 [ 0.23281248]
 [-1.2235714 ]
 [ 0.7060885 ]
 [-1.4086715 ]
 [ 0.3165253 ]
 [-0.42911738]]  b: [0.6459376]
0200 cost:[ 0.5019685 ]  W: [[-0.70037246]
 [-2.628704  ]
 [ 0.16854057]
 [-1.249312  ]
 [ 0.4307452 ]
 [-1.8856751 ]
 [ 0.07900542]
 [-0.26116362]]  b: [0.66892844]
0300 cost:[ 0.4933478 ]  W: [[-0.7115047 ]
 [-3.0237443 ]
 [ 0.19624925]
 [-1.1704328 ]
 [ 0.25556725]
 [-2.1660578 ]
 [-0.1320214 ]
 [-0.10619933]]  b: [0.6788839]
0400 cost:[ 0.48995644 ]  W: [[-7.1795541e-01]
 [-3.2340996e+00]
 [ 2.1906957e-01]
 [-1.0466332e+00]
 [ 1.3928919e-01]
 [-2.3197625e+00]
 [-3.0422512e-01]
 [ 1.3174267e-03]]  b: [0.671318]
0500 cost:[ 0.4884151 ]  W: [[-0.726108  ]
 [-3.3358593 ]
 [ 0.22019492]
 [-0.91839325]
 [ 0.05383291]
 [-2.401019  ]
 [-0.43

In [13]:
# 회귀 계수, weight과 bias 출력
print('Weight:',W.numpy())
print('Bias:',b.numpy())

Weight: [[-0.7646879 ]
 [-3.3311455 ]
 [ 0.24389939]
 [-0.5480663 ]
 [-0.21599822]
 [-2.5523164 ]
 [-0.8651044 ]
 [ 0.11382016]]
Bias: [0.38240406]


In [21]:
# 정확도 측정 : accuracy computation
def predict(X):
    return tf.cast(hypothesis(X) > 0.5,dtype=tf.float32)

# 검증 데이터를 사용하여 예측 (70%:30%)
preds = predict(x_test)
accuracy = tf.reduce_mean(tf.cast(tf.equal(preds,y_test),dtype=tf.float32))

print('Accuracy:',accuracy.numpy()) # Accuracy: 0.78070176
# print('Hypothesis:\n',hypothesis(x_test).numpy())
# print('Predict:\n',preds.numpy())

Accuracy: 0.78070176
