In [1]:
!pip install ydata-profiling
from ydata_profiling import ProfileReport



In [2]:
import tensorflow as tf
from sklearn.preprocessing import StandardScaler, LabelEncoder
import pandas as pd
import numpy as np
import keras
from keras.models import Sequential
from keras.layers import Dense

In [3]:
data = pd.read_csv("dataset.csv")
print(data.shape)

(400, 11)


In [4]:
data.head()

Unnamed: 0,gender,age,hypertension,heart_disease,Marriage,work_type,Living_type,avg_glucose,bmi,smoking_status,illness
0,Male,67.0,0,1,Yes,Private,Urban,228.69,36.6,formerly smoked,1
1,Male,80.0,0,1,Yes,Private,Rural,105.92,32.5,never smoked,1
2,Female,49.0,0,0,Yes,Private,Urban,171.23,34.4,smokes,1
3,Female,79.0,1,0,Yes,Self-employed,Rural,174.12,24.0,never smoked,1
4,Male,81.0,0,0,Yes,Private,Urban,186.21,29.0,formerly smoked,1


In [5]:
data.describe()

Unnamed: 0,age,hypertension,heart_disease,avg_glucose,bmi,illness
count,400.0,400.0,400.0,400.0,400.0,400.0
mean,55.2678,0.18,0.1325,119.39195,29.48175,0.5
std,22.51279,0.384669,0.339458,54.377459,6.488354,0.500626
min,0.8,0.0,0.0,56.07,15.6,0.0
25%,44.0,0.0,0.0,80.46,25.575,0.0
50%,59.0,0.0,0.0,97.665,28.6,0.5
75%,74.25,0.0,0.0,144.345,33.025,1.0
max,82.0,1.0,1.0,271.74,48.9,1.0


In [6]:
scaler = StandardScaler()
data[['age', 'hypertension', 'heart_disease', 'avg_glucose', 'bmi', 'illness']] = scaler.fit_transform(data[['age','hypertension','heart_disease','avg_glucose','bmi','illness']])

In [7]:
label_encoder = LabelEncoder()
data['gender'] = label_encoder.fit_transform(data['gender'])
data['Marriage'] = label_encoder.fit_transform(data['Marriage'])
data['work_type'] = label_encoder.fit_transform(data['work_type'])
data['Living_type'] = label_encoder.fit_transform(data['Living_type'])
data['smoking_status'] = label_encoder.fit_transform(data['smoking_status'])

In [8]:
data.head()

Unnamed: 0,gender,age,hypertension,heart_disease,Marriage,work_type,Living_type,avg_glucose,bmi,smoking_status,illness
0,1,0.521788,-0.468521,2.558744,1,1,1,2.012505,1.098455,1,1.0
1,1,1.09996,-0.468521,2.558744,1,1,0,-0.248059,0.465762,2,1.0
2,0,-0.278759,-0.468521,-0.390817,1,1,1,0.954494,0.758961,3,1.0
3,0,1.055485,2.134375,-0.390817,1,2,0,1.007708,-0.845918,2,1.0
4,1,1.144435,-0.468521,-0.390817,1,1,1,1.230321,-0.074341,1,1.0


In [9]:
y=data['illness']
x=data.drop(columns=['illness'])
y = y.values.reshape(-1, 1)

In [10]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.05, random_state=42)

In [11]:
y_train = np.where(y_train == -1, 0, y_train)
y_test = np.where(y_test == -1, 0, y_test)

In [12]:
from sklearn.utils.class_weight import compute_class_weight
y_train_flat = y_train.ravel()
classes = np.unique(y_train_flat)
class_weights = compute_class_weight(class_weight='balanced', classes=classes, y=y_train_flat)
class_weight_dict = {classes[i]: class_weights[i] for i in range(len(classes))}

In [13]:
print(class_weight_dict)

{0.0: 1.0, 1.0: 1.0}


In [72]:
model = keras.Sequential()
model.add(keras.Input(shape=(10,)))
model.add(keras.layers.Dense(128, activation='relu'))
model.add(keras.layers.Dense(128, activation='relu'))
model.add(keras.layers.Dense(64, activation='relu'))
model.add(keras.layers.Dense(32, activation='relu'))
model.add(keras.layers.Dense(16, activation='relu'))
model.add(keras.layers.Dense(1, activation='sigmoid'))

In [73]:
from tensorflow.keras.optimizers import Adam
optimizer = Adam(learning_rate=0.0003)
model.compile(optimizer=optimizer, loss='binary_crossentropy', metrics=['accuracy'])

In [74]:
from tensorflow.keras.callbacks import EarlyStopping
early_stopping = EarlyStopping(monitor='val_loss', patience=20, restore_best_weights=True)

In [75]:
history = model.fit(X_train, y_train, epochs=50, validation_data=(X_test, y_test),
                    batch_size=76, callbacks=[early_stopping], class_weight=class_weight_dict)

Epoch 1/50
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 140ms/step - accuracy: 0.5500 - loss: 0.6892 - val_accuracy: 0.6500 - val_loss: 0.6703
Epoch 2/50
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 60ms/step - accuracy: 0.6681 - loss: 0.6695 - val_accuracy: 0.7500 - val_loss: 0.6489
Epoch 3/50
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 22ms/step - accuracy: 0.7980 - loss: 0.6493 - val_accuracy: 0.8000 - val_loss: 0.6277
Epoch 4/50
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 21ms/step - accuracy: 0.7702 - loss: 0.6385 - val_accuracy: 0.8500 - val_loss: 0.6038
Epoch 5/50
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 24ms/step - accuracy: 0.7655 - loss: 0.6200 - val_accuracy: 0.8500 - val_loss: 0.5798
Epoch 6/50
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 29ms/step - accuracy: 0.7505 - loss: 0.6082 - val_accuracy: 0.9000 - val_loss: 0.5561
Epoch 7/50
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━

In [76]:
loss, accuracy = model.evaluate(X_test, y_test)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 52ms/step - accuracy: 0.9000 - loss: 0.2563


In [77]:
print(f"Loss: {loss}")
print(f"Accuracy: {accuracy * 100:.2f}%")

Loss: 0.2563461363315582
Accuracy: 90.00%


In [78]:
model.summary()

In [79]:
model.save('my_model.keras')