In [1]:
# 03_logistic_regression_diabetes
# 당뇨병 진단 2진 분류 모델

import tensorflow as tf
import numpy as np
tf.random.set_seed(5)
tf.__version__

'2.17.0'

In [2]:
# 데이터 불러오기
xy = np.loadtxt('data-03-diabetes.csv',delimiter=',',dtype=np.float32)
xy.shape   # (759,9)

# 학습 데이터 분리 : 70% , 531개
x_train = xy[:531,:-1]  # X
y_train = xy[:531,[-1]] # Y
x_train.shape   # (531, 8)
y_train.shape   # (531, 1)

# 검증 데이터 분리 : 30% , 228개
x_test = xy[531:,:-1]
y_test = xy[531:,[-1]]
x_test.shape    # (228, 8)
y_test.shape    # (228, 1)

(228, 1)

In [3]:
# 변수 초기화 : weight, bias
# (m,n) * (n,l) = (m,l)   : 행렬의 내적 곱셈 공식
# (531,8) * (8,1) = (531,1)
W = tf.Variable(tf.random.normal([8,1]), name='weight')
b = tf.Variable(tf.random.normal([1]), name='bias')

In [4]:
# 예측 함수(hypothesis) : H(X) = sigmoid(W*X + b)
# tf.sigmoid() : tf.div(1.,1. + tf.exp(-tf.matmul(X,W) + b))
def hypothesis(X):
    return tf.sigmoid(tf.matmul(X,W) + b)  # 0과 1사이의 값이 출력

In [5]:
# 비용 함수 : logloss , 2진 분류 모델
def cost_func():
    cost = -tf.reduce_mean(y_train*tf.math.log(hypothesis(x_train)) +
                         (1-y_train)*tf.math.log(1-hypothesis(x_train)))
    return cost

In [6]:
# 경사 하강법
# learning_rate(학습율)을 0.01 로 설정하여 optimizer객체를 생성
# optimizer = tf.keras.optimizers.Adam(learning_rate=0.01)
optimizer = tf.compat.v1.train.AdamOptimizer(learning_rate=0.01)




In [7]:
# 학습 시작
print('***** Start Learning!!')
for step in range(10001):
    # cost를 minimize 한다
    optimizer.minimize(cost_func,var_list=[W,b])
    
    if step % 1000 == 0:
        print('%04d'%step,'cost:[',cost_func().numpy(),']',
             ' W:',W.numpy(),' b:',b.numpy())
print('***** Learning Finished!!') 

***** Start Learning!!
0000 cost:[ 0.8006095 ]  W: [[-0.19030663]
 [-0.9402866 ]
 [-0.02964057]
 [-0.7525406 ]
 [ 1.3131523 ]
 [-0.62854797]
 [ 0.8440664 ]
 [-0.09899952]]  b: [0.23652862]
1000 cost:[ 0.48674083 ]  W: [[-0.75487643]
 [-3.3620782 ]
 [ 0.2107457 ]
 [-0.5814681 ]
 [-0.1694194 ]
 [-2.5226417 ]
 [-0.76776016]
 [ 0.12745772]]  b: [0.4801392]
2000 cost:[ 0.4866587 ]  W: [[-0.7645522 ]
 [-3.3315148 ]
 [ 0.24341257]
 [-0.54766756]
 [-0.21579522]
 [-2.5522876 ]
 [-0.8637517 ]
 [ 0.11404324]]  b: [0.38385916]
3000 cost:[ 0.4866587 ]  W: [[-0.76468706]
 [-3.3311477 ]
 [ 0.24389826]
 [-0.5480666 ]
 [-0.21599711]
 [-2.5523138 ]
 [-0.8651016 ]
 [ 0.11382001]]  b: [0.38240698]
4000 cost:[ 0.48665863 ]  W: [[-0.7646871 ]
 [-3.331147  ]
 [ 0.24389747]
 [-0.5480666 ]
 [-0.21599752]
 [-2.5523148 ]
 [-0.8651022 ]
 [ 0.11381993]]  b: [0.38240632]
5000 cost:[ 0.48665863 ]  W: [[-0.764688  ]
 [-3.3311455 ]
 [ 0.24390048]
 [-0.54806757]
 [-0.2159971 ]
 [-2.552316  ]
 [-0.86510265]
 [ 0.1138196

In [8]:
# 회귀 계수, weight과 bias 출력
print('Weight:',W.numpy())
print('Bias:',b.numpy())

Weight: [[-0.7646879 ]
 [-3.3311455 ]
 [ 0.24389939]
 [-0.5480663 ]
 [-0.21599822]
 [-2.5523164 ]
 [-0.8651044 ]
 [ 0.11382016]]
Bias: [0.38240406]


In [9]:
# 정확도 측정 : accuracy computation
def predict(X):
    return tf.cast(hypothesis(X) > 0.5,dtype=tf.float32)

# 검증 데이터를 사용하여 예측 (70%:30%)
preds = predict(x_test)
accuracy = tf.reduce_mean(tf.cast(tf.equal(preds,y_test),dtype=tf.float32))

print('Accuracy:',accuracy.numpy()) # Accuracy: 0.78070176
# print('Hypothesis:\n',hypothesis(x_test).numpy())
# print('Predict:\n',preds.numpy())

Accuracy: 0.78070176
