<a href="https://colab.research.google.com/github/devyulbae/AIClass/blob/main/Q2_diabetes_prediction.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt

tf.__version__

'2.15.0'

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


# 당뇨병을 진단해봅시다.

In [3]:
xy = np.loadtxt('/content/drive/MyDrive/datas/data-03-diabetes.csv',
                delimiter=',',
                dtype=np.float32)
x_train = xy[0:-100, 0:-1]
y_train = xy[0:-100, [-1]]

x_test = xy[-100:, 0:-1]
y_test = xy[-100:, [-1]]

print(x_train.shape, y_train.shape)
print(x_test.shape, y_test.shape)

(659, 8) (659, 1)
(100, 8) (100, 1)


In [4]:
print(x_test[0])
print(y_test[0])

[-0.294118  -0.0150754 -0.0491803 -0.333333  -0.550827   0.0134128
 -0.699402  -0.266667 ]
[1.]


In [5]:
dataset = tf.data.Dataset.from_tensor_slices((x_train, y_train)).batch(len(x_train))


In [6]:
W = tf.Variable(tf.random.normal([8,1]), name='weight')
b = tf.Variable(tf.random.normal([1]), name='bias')


## 가설 설정
* 병이 있다 / 없다로 분류
* sigmoid classification으로 진행


## $$ y_k = \frac{1}{1+e^{-x}}   $$

In [7]:
def logistic_regression(features):
    hypothesis  = tf.divide(1., 1. + tf.exp(-(tf.matmul(features, W) + b)))
    return hypothesis

print(logistic_regression(x_train))

tf.Tensor(
[[0.71950567]
 [0.4000654 ]
 [0.6326548 ]
 [0.7177467 ]
 [0.40393847]
 [0.48046744]
 [0.4255801 ]
 [0.5292876 ]
 [0.8302023 ]
 [0.72381926]
 [0.34201163]
 [0.57013553]
 [0.7466679 ]
 [0.84629804]
 [0.50149214]
 [0.37502465]
 [0.47524962]
 [0.58665484]
 [0.4174915 ]
 [0.27944404]
 [0.65800774]
 [0.62260115]
 [0.44748044]
 [0.6871113 ]
 [0.65426826]
 [0.30811787]
 [0.86620337]
 [0.47001028]
 [0.7596662 ]
 [0.3891057 ]
 [0.36226392]
 [0.37936056]
 [0.66591686]
 [0.4688693 ]
 [0.57422984]
 [0.61547273]
 [0.3034653 ]
 [0.66075796]
 [0.5073268 ]
 [0.46088803]
 [0.64466083]
 [0.64101756]
 [0.7171003 ]
 [0.29621822]
 [0.48609266]
 [0.23348811]
 [0.43192285]
 [0.4013269 ]
 [0.28334293]
 [0.42740875]
 [0.46533826]
 [0.81449294]
 [0.6499419 ]
 [0.7264917 ]
 [0.718679  ]
 [0.30656165]
 [0.47005862]
 [0.37356678]
 [0.6986442 ]
 [0.6295483 ]
 [0.45801806]
 [0.42183605]
 [0.6574112 ]
 [0.44597185]
 [0.43177342]
 [0.5595243 ]
 [0.38214076]
 [0.4248366 ]
 [0.325655  ]
 [0.471742  ]
 [0.55335

## Loss Function

* 기존 MSE 대신 Cross Entropy 사용

## $$
\begin{align}
loss(h(x),y) & = −y log(h(x))−(1−y)log(1−h(x))
\end{align}
$$

In [8]:
def loss_fn(hypothesis, labels):
    cost = -tf.reduce_mean( labels*tf.math.log(hypothesis)+ (1-labels)*tf.math.log(1-hypothesis))
    return cost

optimizer = tf.compat.v1.train.GradientDescentOptimizer(learning_rate=1e-2)

## 학습

In [12]:
epochs = 10001

for step in range(epochs):
  for features, labels in dataset:
    with tf.GradientTape() as tape:
      pred = logistic_regression(features)
      loss_value = loss_fn( pred, labels)
      grads = tape.gradient(loss_value, [W,b])
      optimizer.apply_gradients(grads_and_vars=zip(grads,[W,b]))
      if step % 500 == 0:
            print("Iter: {}, Loss: {:.4f}".format(step, loss_fn(logistic_regression(features),labels)))

Iter: 0, Loss: 0.4839
Iter: 500, Loss: 0.4831
Iter: 1000, Loss: 0.4823
Iter: 1500, Loss: 0.4816
Iter: 2000, Loss: 0.4810
Iter: 2500, Loss: 0.4805
Iter: 3000, Loss: 0.4800
Iter: 3500, Loss: 0.4795
Iter: 4000, Loss: 0.4791
Iter: 4500, Loss: 0.4787
Iter: 5000, Loss: 0.4783
Iter: 5500, Loss: 0.4780
Iter: 6000, Loss: 0.4777
Iter: 6500, Loss: 0.4774
Iter: 7000, Loss: 0.4772
Iter: 7500, Loss: 0.4770
Iter: 8000, Loss: 0.4767
Iter: 8500, Loss: 0.4765
Iter: 9000, Loss: 0.4764
Iter: 9500, Loss: 0.4762
Iter: 10000, Loss: 0.4760


## 테스트

In [13]:
def accuracy_fn(hypothesis, labels):
    predicted = tf.cast(hypothesis > 0.5, dtype=tf.float32)
    accuracy = tf.reduce_mean(tf.cast(tf.equal(predicted, labels), dtype=tf.float32))
    return accuracy

In [14]:
test_acc = accuracy_fn(logistic_regression(x_test),y_test)
print("Testset Accuracy: {:.4f}".format(test_acc))

Testset Accuracy: 0.7400
