In [1]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler


import tensorflow as tf
from tensorflow import keras
from keras.optimizers import Adam, SGD, Adagrad
from tensorflow.keras import layers
from tensorflow.keras import layers


from sklearn.model_selection import train_test_split
from imblearn.over_sampling import RandomOverSampler

In [2]:
df = pd.read_csv("./diabetes.csv")
df.head()

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age,Outcome
0,6,148,72,35,0,33.6,0.627,50,1
1,1,85,66,29,0,26.6,0.351,31,0
2,8,183,64,0,0,23.3,0.672,32,1
3,1,89,66,23,94,28.1,0.167,21,0
4,0,137,40,35,168,43.1,2.288,33,1


In [3]:
df.shape

(768, 9)

In [4]:
cols = df.columns
cols = list(cols)
print(cols)

['Pregnancies', 'Glucose', 'BloodPressure', 'SkinThickness', 'Insulin', 'BMI', 'DiabetesPedigreeFunction', 'Age', 'Outcome']


In [5]:
df.corr()

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age,Outcome
Pregnancies,1.0,0.129459,0.141282,-0.081672,-0.073535,0.017683,-0.033523,0.544341,0.221898
Glucose,0.129459,1.0,0.15259,0.057328,0.331357,0.221071,0.137337,0.263514,0.466581
BloodPressure,0.141282,0.15259,1.0,0.207371,0.088933,0.281805,0.041265,0.239528,0.065068
SkinThickness,-0.081672,0.057328,0.207371,1.0,0.436783,0.392573,0.183928,-0.11397,0.074752
Insulin,-0.073535,0.331357,0.088933,0.436783,1.0,0.197859,0.185071,-0.042163,0.130548
BMI,0.017683,0.221071,0.281805,0.392573,0.197859,1.0,0.140647,0.036242,0.292695
DiabetesPedigreeFunction,-0.033523,0.137337,0.041265,0.183928,0.185071,0.140647,1.0,0.033561,0.173844
Age,0.544341,0.263514,0.239528,-0.11397,-0.042163,0.036242,0.033561,1.0,0.238356
Outcome,0.221898,0.466581,0.065068,0.074752,0.130548,0.292695,0.173844,0.238356,1.0


In [6]:
X = df[['Pregnancies','Glucose','BMI','Age']]

In [7]:
X.head()

Unnamed: 0,Pregnancies,Glucose,BMI,Age
0,6,148,33.6,50
1,1,85,26.6,31
2,8,183,23.3,32
3,1,89,28.1,21
4,0,137,43.1,33


In [8]:
y = df['Outcome']
y.head()

0    1
1    0
2    1
3    0
4    1
Name: Outcome, dtype: int64

In [9]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.30, random_state=42)

In [10]:
sm = RandomOverSampler(sampling_strategy='minority', random_state=42)
X_train, y_train = sm.fit_resample(X_train, y_train)

In [11]:
X_train.shape

(698, 4)

In [12]:
scaler = StandardScaler()

In [13]:
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [14]:
X_test[0]

array([ 0.66916602, -0.83186589,  0.1290672 ,  0.81154032])

In [15]:
model = keras.Sequential()
model.add(layers.Dense(1000, input_shape=(4,), kernel_initializer='uniform', activation='tanh'))
model.add(layers.Dense(300, kernel_initializer='uniform', activation='relu'))
model.add(layers.Dense(100, kernel_initializer='uniform', activation='tanh'))
model.add(layers.Dense(1, kernel_initializer='uniform', activation='sigmoid'))


In [16]:
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense (Dense)               (None, 1000)              5000      
                                                                 
 dense_1 (Dense)             (None, 300)               300300    
                                                                 
 dense_2 (Dense)             (None, 100)               30100     
                                                                 
 dense_3 (Dense)             (None, 1)                 101       
                                                                 
Total params: 335,501
Trainable params: 335,501
Non-trainable params: 0
_________________________________________________________________


In [17]:
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

In [18]:
model.fit(X_train, y_train, epochs=75, batch_size=16,  verbose=2, validation_data=(X_test, y_test))

Epoch 1/75
44/44 - 1s - loss: 0.5195 - accuracy: 0.7450 - val_loss: 0.6466 - val_accuracy: 0.6883 - 665ms/epoch - 15ms/step
Epoch 2/75
44/44 - 0s - loss: 0.4780 - accuracy: 0.7693 - val_loss: 0.5648 - val_accuracy: 0.7100 - 153ms/epoch - 3ms/step
Epoch 3/75
44/44 - 0s - loss: 0.4835 - accuracy: 0.7722 - val_loss: 0.6197 - val_accuracy: 0.6926 - 149ms/epoch - 3ms/step
Epoch 4/75
44/44 - 0s - loss: 0.4685 - accuracy: 0.7779 - val_loss: 0.5664 - val_accuracy: 0.7186 - 149ms/epoch - 3ms/step
Epoch 5/75
44/44 - 0s - loss: 0.4649 - accuracy: 0.7708 - val_loss: 0.6153 - val_accuracy: 0.7056 - 148ms/epoch - 3ms/step
Epoch 6/75
44/44 - 0s - loss: 0.4632 - accuracy: 0.7765 - val_loss: 0.5761 - val_accuracy: 0.6970 - 148ms/epoch - 3ms/step
Epoch 7/75
44/44 - 0s - loss: 0.4628 - accuracy: 0.7765 - val_loss: 0.5797 - val_accuracy: 0.7013 - 148ms/epoch - 3ms/step
Epoch 8/75
44/44 - 0s - loss: 0.4562 - accuracy: 0.7779 - val_loss: 0.5553 - val_accuracy: 0.7316 - 148ms/epoch - 3ms/step
Epoch 9/75
44/4

<keras.callbacks.History at 0x22c8b1f8100>

In [19]:
_, accuracy = model.evaluate(X_test, y_test)
print('Accuracy: %.2f' % (accuracy*100))

Accuracy: 74.89


In [20]:
predict_x = model.predict(X_test)
classes_x=np.argmax(predict_x,axis=1)



In [21]:
# from matplotlib import pyplot as plt
# import seaborn as sns

# plt.figure(figsize=(15,8))
# sns.heatmap(df.corr())