In [33]:
import numpy as np
import pandas as pd

In [34]:
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.utils.class_weight import compute_class_weight

In [35]:
import tensorflow as tf
from tensorflow.keras import layers,models,callbacks,optimizers

In [36]:
col_names = ['pregnant','glucose','bp','skin','insulin','bmi','pedigree','age','label']
df = pd.read_csv('diab_pima.csv',header=None,names=col_names)

In [37]:
df.head(7)

Unnamed: 0,pregnant,glucose,bp,skin,insulin,bmi,pedigree,age,label
0,6,148,72,35,0,33.6,0.627,50,1
1,1,85,66,29,0,26.6,0.351,31,0
2,8,183,64,0,0,23.3,0.672,32,1
3,1,89,66,23,94,28.1,0.167,21,0
4,0,137,40,35,168,43.1,2.288,33,1
5,5,116,74,0,0,25.6,0.201,30,0
6,3,78,50,32,88,31.0,0.248,26,1


In [38]:
for c in ['glucose','bp','skin','insulin','bmi']:
    non_zero = df.loc[df[c] != 0,c].median()
    df[c] = df[c].replace(0,df[c].median())

In [39]:
df.head(7)

Unnamed: 0,pregnant,glucose,bp,skin,insulin,bmi,pedigree,age,label
0,6,148,72,35,30.5,33.6,0.627,50,1
1,1,85,66,29,30.5,26.6,0.351,31,0
2,8,183,64,23,30.5,23.3,0.672,32,1
3,1,89,66,23,94.0,28.1,0.167,21,0
4,0,137,40,35,168.0,43.1,2.288,33,1
5,5,116,74,23,30.5,25.6,0.201,30,0
6,3,78,50,32,88.0,31.0,0.248,26,1


In [40]:
X = df[['pregnant','insulin','bmi','age','glucose','bp','pedigree']].astype('float32').values
y = df[['label']].astype('int32').values

In [41]:
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.25,random_state=42,stratify=y)

In [42]:
#Normalizacja danych
norm = layers.Normalization(axis=-1)
norm.adapt(X_train)

In [43]:
model = models.Sequential([
    layers.Input(shape=(X_train.shape[1],)),
    norm,
    layers.Dense(32,activation='relu'),
    layers.Dropout(0.2),
    layers.Dense(16,activation='relu'),
    layers.Dense(1,activation='sigmoid')
])

In [44]:
model.compile(
    optimizer=optimizers.Adam(learning_rate=1e-3),
    loss='binary_crossentropy',
    metrics=['accuracy',tf.keras.metrics.AUC(name='auc')]
)

In [46]:
#wagi klas
y_train_1d = np.asarray(y_train).ravel()
classes = np.unique(y_train_1d)
class_weights = compute_class_weight(class_weight='balanced',classes=classes,y=y_train_1d)
class_weight_dict = {int(c):w for c,w in zip(classes,class_weights)}

In [47]:
es = callbacks.EarlyStopping(
    monitor='val_auc',mode='max',patience=20,restore_best_weights=True
)

In [48]:
history = model.fit(
    X_train,y_train,
    validation_split=0.2,
    epochs=300,
    batch_size=32,
    class_weight=class_weight_dict,
    callbacks=[es],
    verbose=1
)

Epoch 1/300
[1m 1/15[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m29s[0m 2s/step - accuracy: 0.5938 - auc: 0.3261 - loss: 0.7136[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.5528 - auc: 0.3168 - loss: 0.7744[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 42ms/step - accuracy: 0.5565 - auc: 0.3184 - loss: 0.7705 - val_accuracy: 0.5776 - val_auc: 0.4947 - val_loss: 0.6812
Epoch 2/300
[1m 1/15[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 34ms/step - accuracy: 0.5625 - auc: 0.3631 - loss: 0.7882[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step - accuracy: 0.5587 - auc: 0.4270 - loss: 0.7246 - val_accuracy: 0.6552 - val_auc: 0.6989 - val_

In [49]:
#ewaluacja
test_loss,test_acc, test_auc = model.evaluate(X_test,y_test,verbose=0)
print(f"\nTest Accuracy: {test_acc:.4f}")
print(f"\nTest Loss: {test_loss:.4f}")
print(f"\nTest AUC: {test_auc:.4f}")


Test Accuracy: 0.7448

Test Loss: 0.5246

Test AUC: 0.8201


In [50]:
y_proba = model.predict(X_test,verbose=0).ravel()
y_pred = (y_proba>=0.5).astype(int)

print(f"\nMacierz pomyłek: {confusion_matrix(y_test,y_pred)}")
print(f"\nraport klasyfikacyjny: {classification_report(y_test,y_pred,digits=4)}")


Macierz pomyłek: [[94 31]
 [18 49]]

raport klasyfikacyjny:               precision    recall  f1-score   support

           0     0.8393    0.7520    0.7932       125
           1     0.6125    0.7313    0.6667        67

    accuracy                         0.7448       192
   macro avg     0.7259    0.7417    0.7300       192
weighted avg     0.7601    0.7448    0.7491       192

