In [1103]:

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import tensorflow as tf
seed = 2023
np.random.seed(seed)
tf.random.set_seed(seed)
import warnings
warnings.filterwarnings('ignore')

In [1104]:
df = pd.read_csv('data/pima-indians-diabetes.csv', skiprows=9, header=None)
df.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8
0,6,148,72,35,0,33.6,0.627,50,1
1,1,85,66,29,0,26.6,0.351,31,0
2,8,183,64,0,0,23.3,0.672,32,1
3,1,89,66,23,94,28.1,0.167,21,0
4,0,137,40,35,168,43.1,2.288,33,1


In [1105]:
df[8].value_counts()

0    500
1    268
Name: 8, dtype: int64

In [1106]:
from sklearn.preprocessing import StandardScaler
X_scaled = StandardScaler().fit_transform(df.iloc[:,:-1].values)

In [1107]:
# y data: label encoding
from sklearn.preprocessing import LabelEncoder
y_labeled = LabelEncoder().fit_transform(df[8].values)

In [1108]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(
    X_scaled, y_labeled, stratify=y_labeled, test_size=0.2, random_state=seed
)

In [1109]:
from tensorflow.keras.models import Sequential, load_model
from tensorflow.keras.layers import Dense
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping

In [1110]:
model = Sequential([
    Dense(50, input_dim=8, activation='relu'),
    Dense(8, activation='relu'),
    Dense(1, activation='sigmoid')
])
model.summary()

Model: "sequential_97"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_330 (Dense)           (None, 50)                450       
                                                                 
 dense_331 (Dense)           (None, 8)                 408       
                                                                 
 dense_332 (Dense)           (None, 1)                 9         
                                                                 
Total params: 867 (3.39 KB)
Trainable params: 867 (3.39 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [1111]:
model.compile('adam', 'binary_crossentropy', metrics=['accuracy'])

In [1112]:
mc = ModelCheckpoint('models/pima_best_fin.h5', monitor='val_loss',
                     verbose=0, save_best_only=True)
es = EarlyStopping(monitor='val_loss', patience=20) 

In [1113]:
hist = model.fit(X_train, y_train, validation_split=0.2,
                   epochs=500, batch_size=100, verbose=0,
                   callbacks=[mc, es])

In [1114]:
best_model = load_model('models/pima_best_fin.h5')
best_model.evaluate(X_test, y_test)



[0.4657512605190277, 0.7922077775001526]