In [1]:
import tensorflow as tf
import numpy as np
tf.random.set_seed(5)

In [5]:
#
xy = np.loadtxt('data-03-diabetes.csv',delimiter=',',dtype=np.float32)
xy.shape  # (759, 9)

# 학습 데이터 분리 : 70% , 531개
x_train = xy[:531,:-1]
y_train = xy[:531,[-1]]
x_train.shape   # (531, 8)
y_train.shape   # (531, 1)

# 검증 데이터 분리 : 30%, 228개
x_test = xy[531:,:-1]
y_test = xy[531:,[-1]]
x_test.shape   # (228, 8)
y_test.shape   # (228, 1)

(228, 1)

In [10]:
# 변수 초기화 : weight, bias
# (m,n) * (n,L) = (m,L)
# (531,8) * (8,1) = (531,1)
W = tf.Variable(tf.random.normal([8,1]), name='weight')  # 중요
b = tf.Variable(tf.random.normal([1]), name='bias')

In [11]:
# 예측 함수(hypothesis) : H(X) = sigmoid(W*X + b)
def hypothesis(X):
    return tf.sigmoid(tf.matmul(X,W)) + b  # 내적 곱셈

In [12]:
# 비용함수 : logloss, 2진 분류 모델
def cost_func():
    
    cost = -tf.reduce_mean(y_train*tf.math.log(hypothesis(x_train)) +
                         (1-y_train)*tf.math.log(1-hypothesis(x_train)))
    return cost

In [13]:
# 경사 하강법
# Learning_rate(학습율)을 0.01 로 설정하여 optimizer객체를 생성
optimizer = tf.keras.optimizers.Adam(lr=0.01)

In [19]:
print('***** Start Learning!!')
for step in range(10001):
    # cost를 minimize 한다
    optimizer.minimize(cost_func,var_list=[W,b])
    
    if step % 1000 == 0:
        print('%04d'%step,'cost:[',cost_func().numpy(),']',
             ' W:',W.numpy(),' b:',b.numpy())
print('***** Learning Finished!!')

***** Start Learning!!
0000 cost:[ nan ]  W: [[-0.19961962]
 [-1.0098403 ]
 [-0.5985454 ]
 [ 0.45328954]
 [ 0.07013801]
 [-0.5845643 ]
 [ 1.6249347 ]
 [-0.93383265]]  b: [0.36522233]
1000 cost:[ nan ]  W: [[-0.19967464]
 [-1.0097419 ]
 [-0.5985111 ]
 [ 0.4532166 ]
 [ 0.07010444]
 [-0.5844947 ]
 [ 1.6250762 ]
 [-0.9338891 ]]  b: [0.36532176]
2000 cost:[ nan ]  W: [[-0.19971652]
 [-1.0100646 ]
 [-0.59919614]
 [ 0.4537294 ]
 [ 0.07038593]
 [-0.5848748 ]
 [ 1.624479  ]
 [-0.9335099 ]]  b: [0.36505654]
3000 cost:[ nan ]  W: [[-0.19951461]
 [-1.0099535 ]
 [-0.59860826]
 [ 0.45336455]
 [ 0.07020424]
 [-0.5847556 ]
 [ 1.6249645 ]
 [-0.93370104]]  b: [0.36516815]
4000 cost:[ nan ]  W: [[-0.19962054]
 [-1.0096866 ]
 [-0.5984019 ]
 [ 0.45325828]
 [ 0.06988556]
 [-0.58441603]
 [ 1.6253405 ]
 [-0.93389213]]  b: [0.36540312]
5000 cost:[ nan ]  W: [[-0.19920419]
 [-1.0101877 ]
 [-0.5989489 ]
 [ 0.45363352]
 [ 0.07056118]
 [-0.58492345]
 [ 1.6245898 ]
 [-0.93346083]]  b: [0.36485177]
6000 cost:[ nan ]

In [20]:
# 회귀 계수, weight과 bias 출력
print('Weight:',W.numpy())
print('Bias:',b.numpy())

Weight: [[-0.19971995]
 [-1.0097159 ]
 [-0.5983984 ]
 [ 0.45325747]
 [ 0.07005819]
 [-0.58439195]
 [ 1.6251067 ]
 [-0.933908  ]]
Bias: [0.3653539]


In [21]:
# 정확도 측정 : accuracy computation
def predict(X):
    return tf.cast(hypothesis(X) > 0.5,dtype=tf.float32)

# 검증 데이터를 사용하여 예측
preds = predict(x_test)
accuracy = tf.reduce_mean(tf.cast(tf.equal(preds,y_test),dtype=tf.float32))

print('Accuracy:',accuracy.numpy()) # 
# print('Hypothesis:\n',hypothesis(x_test).numpy())
# print('Predict:\n',preds.numpy())

Accuracy: 0.69736844
