
# DNN (TensorFlow) — Pima Indians Diabetes: Trening, Predykcja i Wizualizacje


In [None]:

import os
import numpy as np
import pandas as pd

from sklearn.model_selection import train_test_split
from sklearn.metrics import roc_curve, auc, classification_report, confusion_matrix
from sklearn.preprocessing import StandardScaler
from sklearn.impute import SimpleImputer

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers

import matplotlib.pyplot as plt

RANDOM_STATE = 42
np.random.seed(RANDOM_STATE)
tf.random.set_seed(RANDOM_STATE)


In [None]:

# Ścieżka do danych
CSV_PATH = "pima.csv"

def synthesize_pima(n=768, random_state=42):
    rng = np.random.default_rng(random_state)
    Pregnancies = rng.integers(0, 17, size=n)
    Glucose = np.clip(rng.normal(120, 30, size=n), 0, None)
    BloodPressure = np.clip(rng.normal(70, 12, size=n), 0, None)
    SkinThickness = np.clip(rng.normal(20, 10, size=n), 0, None)
    Insulin = np.clip(rng.normal(80, 40, size=n), 0, None)
    BMI = np.clip(rng.normal(32, 7, size=n), 0, None)
    DiabetesPedigreeFunction = np.clip(rng.gamma(2.0, 0.15, size=n), 0, None)
    Age = rng.integers(21, 81, size=n)

    risk = (
        0.015 * (Glucose - 100) +
        0.03  * (BMI - 25) +
        0.02  * (Age - 30) +
        0.8   * DiabetesPedigreeFunction +
        0.01  * (Pregnancies)
    )
    prob = 1 / (1 + np.exp(-risk))
    Outcome = (rng.uniform(0, 1, size=n) < prob).astype(int)

    df_syn = pd.DataFrame({
        "Pregnancies": Pregnancies,
        "Glucose": np.round(Glucose, 1),
        "BloodPressure": np.round(BloodPressure, 1),
        "SkinThickness": np.round(SkinThickness, 1),
        "Insulin": np.round(Insulin, 1),
        "BMI": np.round(BMI, 1),
        "DiabetesPedigreeFunction": np.round(DiabetesPedigreeFunction, 3),
        "Age": Age,
        "Outcome": Outcome
    })
    return df_syn

if os.path.exists(CSV_PATH):
    df = pd.read_csv(CSV_PATH)
    print("Wczytano dane z pliku:", CSV_PATH)
else:
    df = synthesize_pima()
    df.to_csv(CSV_PATH, index=False)
    print("Brak pliku, wygenerowano dane syntetyczne i zapisano do:", CSV_PATH)

df.head()


In [None]:

zero_as_nan_cols = ["Glucose", "BloodPressure", "SkinThickness", "Insulin", "BMI"]
for c in zero_as_nan_cols:
    if c in df.columns:
        df.loc[df[c] == 0, c] = np.nan

FEATURES = ["Pregnancies","Glucose","BloodPressure","SkinThickness","Insulin",
            "BMI","DiabetesPedigreeFunction","Age"]
TARGET = "Outcome"

X = df[FEATURES].copy()
y = df[TARGET].astype(int).copy()

imputer = SimpleImputer(strategy="median")
scaler = StandardScaler()

X_imputed = imputer.fit_transform(X)
X_scaled = scaler.fit_transform(X_imputed)

X_train, X_test, y_train, y_test = train_test_split(
    X_scaled, y, test_size=0.2, random_state=RANDOM_STATE, stratify=y
)


In [None]:

def build_dnn(input_dim):
    model = keras.Sequential([
        layers.Input(shape=(input_dim,)),
        layers.Dense(32, activation="relu"),
        layers.BatchNormalization(),
        layers.Dropout(0.2),
        layers.Dense(16, activation="relu"),
        layers.BatchNormalization(),
        layers.Dropout(0.2),
        layers.Dense(1, activation="sigmoid")
    ])
    model.compile(
        optimizer=keras.optimizers.Adam(learning_rate=1e-3),
        loss="binary_crossentropy",
        metrics=["accuracy"]
    )
    return model

model = build_dnn(X_train.shape[1])
model.summary()


In [None]:

early = keras.callbacks.EarlyStopping(
    monitor="val_loss", patience=15, restore_best_weights=True
)

history = model.fit(
    X_train, y_train,
    validation_split=0.2,
    epochs=200,
    batch_size=32,
    callbacks=[early],
    verbose=0
)


In [None]:

plt.plot(history.history["loss"], label="train_loss")
plt.plot(history.history["val_loss"], label="val_loss")
plt.legend()
plt.show()

plt.plot(history.history["accuracy"], label="train_acc")
plt.plot(history.history["val_accuracy"], label="val_acc")
plt.legend()
plt.show()


In [None]:

test_loss, test_acc = model.evaluate(X_test, y_test, verbose=0)
print(f"Test — loss: {test_loss:.4f}, accuracy: {test_acc:.4f}")

y_proba = model.predict(X_test, verbose=0).ravel()
y_pred = (y_proba >= 0.5).astype(int)
print(classification_report(y_test, y_pred))
print(confusion_matrix(y_test, y_pred))

fpr, tpr, _ = roc_curve(y_test, y_proba)
roc_auc = auc(fpr, tpr)

plt.plot(fpr, tpr, label=f"AUC = {roc_auc:.3f}")
plt.plot([0, 1], [0, 1], linestyle="--")
plt.legend()
plt.show()


In [None]:

model.save("pima_dnn.h5")
