# 피마인디언 당뇨병

In [1]:
from google.colab import files
import numpy as np
import tensorflow as tf

In [2]:
# 실행할 때마다 같은 결과를 출력하기 위한 seed 값 설정
seed = 2021
np.random.seed(seed)
tf.random.set_seed(seed)

### 데이터 전처리

In [3]:
file_uploaded = files.upload()

Saving pima-indians-diabetes.csv to pima-indians-diabetes.csv


In [4]:
filename = list(file_uploaded.keys())[0]

In [5]:
dataset = np.loadtxt(filename,delimiter=',')
dataset

array([[  6.   , 148.   ,  72.   , ...,   0.627,  50.   ,   1.   ],
       [  1.   ,  85.   ,  66.   , ...,   0.351,  31.   ,   0.   ],
       [  8.   , 183.   ,  64.   , ...,   0.672,  32.   ,   1.   ],
       ...,
       [  5.   , 121.   ,  72.   , ...,   0.245,  30.   ,   0.   ],
       [  1.   , 126.   ,  60.   , ...,   0.349,  47.   ,   1.   ],
       [  1.   ,  93.   ,  70.   , ...,   0.315,  23.   ,   0.   ]])

####정규화

In [7]:
from sklearn.preprocessing import StandardScaler
scale = StandardScaler()
X_norm = scale.fit_transform(dataset[:, :-1])
Y = dataset[:, -1]
X_norm.shape, Y.shape

((768, 8), (768,))

In [8]:
from sklearn.model_selection import train_test_split
X_train,X_test,y_train,y_test = train_test_split(
    X_norm, Y,stratify=Y,random_state = seed
)
X_train.shape,X_test.shape

((576, 8), (192, 8))

### 모델 설계

In [9]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense,Input

In [10]:
model = Sequential([
    Dense(12, input_shape=(8,), activation='relu'),
    Dense(8, activation='relu'),
    Dense(1, activation='sigmoid')
])

### 모델 환경설정

In [11]:
model.compile(
                loss='binary_crossentropy', 
                optimizer='adam', 
                metrics=['accuracy']
            )

### 모델 저장 관련 환경 설정

In [12]:
import os
MODEL_DIR = './model/'
if not os.path.exists(MODEL_DIR):
    os.mkdir(MODEL_DIR)

In [13]:
!ls

model  pima-indians-diabetes.csv  sample_data


In [14]:
# 모델 저장 조건
modelpath = MODEL_DIR + "best{epoch:03d}-{val_loss:.4f}.hdf5"

In [15]:
from tensorflow.keras.callbacks import ModelCheckpoint,EarlyStopping
checkpointer = ModelCheckpoint(filepath=modelpath, monitor='val_loss', verbose=1, save_best_only=True)
early_stopping = EarlyStopping(patience=30)

### 모델 학습

In [16]:
history = model.fit(X_train,y_train, validation_split=0.2, epochs=200, batch_size=50, callbacks=[checkpointer,early_stopping])

Epoch 1/200

Epoch 00001: val_loss improved from inf to 0.61408, saving model to ./model/best001-0.6141.hdf5
Epoch 2/200

Epoch 00002: val_loss improved from 0.61408 to 0.59672, saving model to ./model/best002-0.5967.hdf5
Epoch 3/200

Epoch 00003: val_loss improved from 0.59672 to 0.58231, saving model to ./model/best003-0.5823.hdf5
Epoch 4/200

Epoch 00004: val_loss improved from 0.58231 to 0.57177, saving model to ./model/best004-0.5718.hdf5
Epoch 5/200

Epoch 00005: val_loss improved from 0.57177 to 0.56351, saving model to ./model/best005-0.5635.hdf5
Epoch 6/200

Epoch 00006: val_loss improved from 0.56351 to 0.55622, saving model to ./model/best006-0.5562.hdf5
Epoch 7/200

Epoch 00007: val_loss improved from 0.55622 to 0.55095, saving model to ./model/best007-0.5510.hdf5
Epoch 8/200

Epoch 00008: val_loss improved from 0.55095 to 0.54692, saving model to ./model/best008-0.5469.hdf5
Epoch 9/200

Epoch 00009: val_loss improved from 0.54692 to 0.54331, saving model to ./model/best009

In [22]:
from tensorflow.keras.models import load_model
best_model = load_model('./model/best018-0.5261.hdf5')
acc = best_model.evaluate(X_test, y_test, verbose=2)
print(f"Accuracy: {acc[1]:.4f}")

6/6 - 0s - loss: 0.4848 - accuracy: 0.7865
Accuracy: 0.7865


### 테스트

In [18]:
index = 10
test_data = X_test[index,:].reshape(1,-1)
test_data

array([[-0.54791859,  2.38188392,  0.04624525,  4.92186584, -0.69289057,
         0.34362394,  0.31144581,  2.44704844]])

In [19]:
label = y_test[index]
int(label)

1

In [20]:
pred = model.predict(test_data)
pred

array([[0.82371205]], dtype=float32)

In [21]:
int(pred[0][0] > 0.5)

1