In [None]:
import numpy as np
import pandas as pd
import seaborn as sns
from keras import Model
from matplotlib import pyplot as plt

In [None]:
df = pd.read_csv("../data/diabetes.csv")

In [None]:
df.head(5)

In [None]:
df.info()

In [None]:
df.describe()

In [None]:
from sklearn import preprocessing


def preprocess(df):
    print("Before preprocessing")
    print("Number of rows with 0 values")
    for col in df.columns:
        missing_rows = df.loc[df[col] == 0].shape[0]
        print(col + ": " + str(missing_rows))

    # Replace 0 values with the mean of the existing values
    df["Glucose"] = df["Glucose"].replace(0, np.nan)
    df["BloodPressure"] = df["BloodPressure"].replace(0, np.nan)
    df["SkinThickness"] = df["SkinThickness"].replace(0, np.nan)
    df["Insulin"] = df["Insulin"].replace(0, np.nan)
    df["BMI"] = df["BMI"].replace(0, np.nan)
    df["Glucose"] = df["Glucose"].fillna(df["Glucose"].mean())
    df["BloodPressure"] = df["BloodPressure"].fillna(df["BloodPressure"].mean())
    df["SkinThickness"] = df["SkinThickness"].fillna(df["SkinThickness"].mean())
    df["Insulin"] = df["Insulin"].fillna(df["Insulin"].mean())
    df["BMI"] = df["BMI"].fillna(df["BMI"].mean())

    print("---------------------------------------------")
    print("After preprocessing")
    print("Number of rows with 0 values")
    for col in df.columns:
        missing_rows = df.loc[df[col] == 0].shape[0]
        print(col + ": " + str(missing_rows))

    # Standardization
    df_scaled = preprocessing.scale(df)
    df_scaled = pd.DataFrame(df_scaled, columns=df.columns)
    df_scaled["Outcome"] = df["Outcome"]
    df = df_scaled

    return df

In [None]:
# df = preprocess(df)

In [None]:
df.head(10)

In [None]:
f, ax = plt.subplots(1, figsize=(8, 6))
sns.heatmap(df.corr(), annot=True, ax=ax)

In [None]:
sns.countplot(x=df.Outcome)

In [None]:
column_names = df.columns
column_names = column_names.drop("Outcome")
for name in column_names:
    print("{}\n".format(name))
    print(df.groupby(["Outcome"])[name].mean())
    print("*" * 50)
    print()

In [None]:
df.hist()
plt.show()

# Multilayer perceptron

In [None]:
X = df.iloc[:, 0:8]
y = df.iloc[:, 8]

In [None]:
from sklearn.model_selection import train_test_split

# np.random.seed(5)
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.33, random_state=42
)

In [None]:
from keras.activations import hard_sigmoid  # noqa
from keras.layers import Dense
from keras.models import Sequential
from keras.optimizers import SGD  # noqa


# model.add(Dense(10, input_dim=8, activation='relu'))
# model.add(Dense(50, activation='relu'))
# model.add(Dense(10, activation='relu'))
# model.add(Dense(5, activation='relu'))
# model.add(Dense(1, activation='sigmoid'))


def create_model():
    model = Sequential()
    model.add(Dense(10, kernel_initializer="uniform", input_dim=8, activation="relu"))
    model.add(Dense(50, kernel_initializer="uniform", activation="relu"))
    model.add(Dense(10, kernel_initializer="uniform", activation="relu"))
    model.add(Dense(5, kernel_initializer="uniform", activation="relu"))
    model.add(Dense(1, kernel_initializer="uniform", activation="sigmoid"))
    sgd = SGD(learning_rate=1 / len(df), momentum=0.9, decay=0.0, nesterov=False)
    model.compile(loss="binary_crossentropy", optimizer=sgd, metrics=["accuracy"])
    return model

In [None]:
model_1 = create_model()

In [None]:
model_1.summary()

# Training

In [None]:
def evaluate_model(model: Model):
    m = model
    scores = m.evaluate(X, y)
    print("\n%s: %.2f%%" % (model_1.metrics_names[1], scores[1] * 100))


def evaluate_model_train_test(model: Model):
    scores = model.evaluate(X_train, y_train, verbose=False)
    print("Training Accuracy: %.2f%%" % (scores[1] * 100))
    scores = model.evaluate(X_test, y_test, verbose=False)
    print("Testing Accuracy: %.2f%%" % (scores[1] * 100))

In [None]:
run_hist_1a = model_1.fit(
    X_train, y_train, validation_data=(X_test, y_test), epochs=200, batch_size=10
)

In [None]:
evaluate_model(model_1)
evaluate_model_train_test(model_1)

In [None]:
fig_run_hist_1a, ax = plt.subplots(figsize=(10, 8))
ax.plot(run_hist_1a.history["loss"], "r", marker=".", label="Train Loss")
ax.plot(run_hist_1a.history["val_loss"], "b", marker=".", label="Validation Loss")
ax.legend()

In [None]:
run_hist_1b = model_1.fit(
    X_train, y_train, validation_data=(X_test, y_test), epochs=1000, batch_size=10
)

In [None]:
evaluate_model(model_1)
evaluate_model_train_test(model_1)

In [None]:
from plotly.subplots import make_subplots
import plotly.graph_objects as go


def msa_plot():
    n_loss = len(run_hist_1a.history["loss"])
    m_loss = len(run_hist_1b.history["loss"])
    fig_msa = make_subplots(rows=1, cols=1)
    run_hist_1a_traces_msa = [
        go.Scatter(
            x=np.arange(n_loss), y=run_hist_1a.history["loss"], name="loss - Run 1"
        ),
        go.Scatter(
            x=np.arange(n_loss),
            y=run_hist_1a.history["val_loss"],
            name="val_loss - Run 1",
        ),
    ]
    run_hist_1b_traces_msa = [
        go.Scatter(
            x=np.arange(n_loss, n_loss + m_loss),
            y=run_hist_1b.history["loss"],
            name="loss - Run 2",
        ),
        go.Scatter(
            x=np.arange(n_loss, n_loss + m_loss),
            y=run_hist_1b.history["val_loss"],
            name="val_loss - Run 2",
        ),
    ]
    fig_msa.add_traces([*run_hist_1a_traces_msa, *run_hist_1b_traces_msa])
    fig_msa.show()


def mse_plot():
    n_mse = len(run_hist_1a.history["mean_squared_error"])
    m_mse = len(run_hist_1b.history["mean_squared_error"])
    fig_mse = make_subplots(rows=2, cols=1)
    run_hist_1a_traces_mse = [
        go.Scatter(
            x=np.arange(n_mse),
            y=run_hist_1a.history["mean_squared_error"],
            name="mean_squared_error - Run 1",
        ),
        go.Scatter(
            x=np.arange(n_mse),
            y=run_hist_1a.history["val_mean_squared_error"],
            name="val_mean_squared_error - Run 1",
        ),
    ]
    run_hist_1b_traces_mse = [
        go.Scatter(
            x=np.arange(n_mse, n_mse + m_mse),
            y=run_hist_1b.history["mean_squared_error"],
            name="mean_squared_error - Run 2",
        ),
        go.Scatter(
            x=np.arange(n_mse, n_mse + m_mse),
            y=run_hist_1b.history["val_mean_squared_error"],
            name="val_mean_squared_error - Run 2",
        ),
    ]
    fig_mse.add_traces(run_hist_1a_traces_mse, rows=[1, 2], cols=[1, 1])
    fig_mse.add_traces(run_hist_1b_traces_mse, rows=[1, 2], cols=[1, 1])
    fig_mse.show()

In [None]:
msa_plot()

![](../assets/image-35.jpg)


In [None]:
from tensorflow.python.ops.confusion_matrix import confusion_matrix

y_test_pred = model_1.predict(X_test)

c_matrix = confusion_matrix(y_test, y_test_pred)
ax = sns.heatmap(
    c_matrix,
    annot=True,
    xticklabels=["No Diabetes", "Diabetes"],
    yticklabels=["No Diabetes", "Diabetes"],
    cbar=False,
    cmap="Blues",
    fmt=".6g",
)
plt.title("Confusion Matrix")
ax.set_xlabel("Prediction")
ax.set_ylabel("Actual")
plt.show()
plt.clf()

In [None]:
from sklearn.metrics import roc_curve

# Results - ROC Curve
y_test_pred_probs = model_1.predict(X_test)
FPR, TPR, _ = roc_curve(y_test, y_test_pred_probs)
plt.plot(FPR, TPR)
plt.plot([0, 1], [0, 1], "--", color="black")  # diagonal line
plt.title("ROC Curve")
plt.xlabel("False Positive Rate")
plt.ylabel("True Positive Rate")
plt.show()
plt.clf()

In [None]:
def predict_use_case(model: Model, **kwargs):
    def predict(col, data):
        data = data.reshape(1, -1)
        final_prediction = model.predict(data)
        print(f"col: {col}, final_prediction: ", final_prediction)

    if not kwargs:
        print("Please Enter the Folowing Metrics one at a time")
        a = int(input("Enter Metric Pregnancies: "))
        b = int(input("Enter Metric Glucose: "))
        c = int(input("Enter Metric BloodPressure: "))
        d = int(input("Enter Metric SkinThickness: "))
        e = int(input("Enter Metric Insulin: "))
        f = float(input("Enter Metric BMI: "))
        g = float(input("Enter Metric DiabetesPedigreeFunction: "))
        h = int(input("Enter Metric Age: "))
        data = np.array([a, b, c, d, e, f, g, h])
    else:
        for k, v in kwargs.items():
            predict(k, np.array(v))

In [None]:
model_1.save("../models/diabetes_risk_nn.h5")

In [None]:
from keras.saving.save import load_model

model = load_model("../models/diabetes_risk_nn.h5")
outcome_1 = [6, 148, 72, 35, 0, 33.6, 0.627, 50]
outcome_0 = [1, 85, 66, 29, 0, 26.6, 0.351, 31]

predict_use_case(model, outcome_1=outcome_1, outcome_0=outcome_0)