# Import the dataset

In [None]:
import requests
import pandas as pd

# Load the dataset from example8.py
url = "https://raw.githubusercontent.com/jbrownlee/Datasets/master/pima-indians-diabetes.data.csv"
dataset = requests.get(url).text

# load it into a pandas dataframe
dataset = pd.read_csv(url, header=None)

# Add the column names
dataset.columns = ["pregnant", "glucose", "bp", "skin", "insulin", "bmi", "pedigree", "age", "class"]

# Show the dataset
print(dataset)

# Pre-process dataset for the neural net

In [None]:
from sklearn.preprocessing import MinMaxScaler

# Use the MinMaxScaler to scale the dataset
scaler = MinMaxScaler()

# Fit the scaler to the dataset
scaler.fit(dataset)

# Transform the dataset
scaled_dataset = scaler.transform(dataset)

# Show the scaled dataset
print(scaled_dataset)

# Split the data 

In [None]:
from sklearn.model_selection import train_test_split

# Split the dataset into features and target
features = dataset.iloc[:, 0:8]
target = dataset.iloc[:, 8]

# Split the dataset into training and testing data
features_train, features_test, target_train, target_test = train_test_split(features, target, test_size=0.4, random_state=1)

print(f"Size of Training Data: {len(features_train)}")
print(f"Size of Testing Data: {len(features_test)}")

print(f"Size in Percent for Training Data: {len(features_train) / (len(features_train) + len(features_test)) * 100}%")
print(f"Size in Percent for Testing Data: {len(features_test) / (len(features_train) + len(features_test)) * 100}%")

# Split the training data into training and validation data
features_train, features_val, target_train, target_val = train_test_split(features_train, target_train, test_size=0.2, random_state=1)

# Create a neural net

In [None]:
import keras
from keras.models import Sequential
from keras.layers import Dense

# Create the model
model = Sequential()

# Add the input layer
model.add(Dense(12, input_dim=8, activation="relu"))

# Add the hidden layer
model.add(Dense(8, activation="relu"))

# Add the output layer
model.add(Dense(1, activation="sigmoid"))

# Compile the model
model.compile(loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"])

# Train the model
#history = model.fit(features_train, target_train, epochs=15, batch_size=10, validation_data=(features_val, target_val), callbacks=[keras.callbacks.EarlyStopping(monitor="val_accuracy", patience=250, restore_best_weights=True)])
history = model.fit(features_train, target_train, epochs=150, batch_size=10, validation_data=(features_val, target_val))

# Evaluation

In [None]:
# Evaluate the model
scores = model.evaluate(features_test, target_test)
print(f"{model.metrics_names[1]}: {scores[1] * 100}%")

# Training History

In [None]:
import matplotlib.pyplot as plt

# Show the Training History for the Training and Validation Data
plt.plot(history.history["accuracy"])
plt.plot(history.history["val_accuracy"])
plt.title("Model Accuracy")
plt.ylabel("Accuracy")
plt.xlabel("Epoch")
plt.legend(["Train", "Validation"], loc="upper left")
plt.show()

# Small Backtest

In [None]:
# Now we can use the model to predict the class of a new patient
# Create a new patient
new_patient = [[6, 11, 72, 35, 0, 36, 627, 50, 1]]
# Load as a dataframe
new_patient = pd.DataFrame(new_patient)
# Use the column names
new_patient.columns = ["pregnant", "glucose", "bp", "skin", "insulin", "bmi", "pedigree", "age", "class"]
print(f"Unscaled new_patient {new_patient}\n")

# Scale the new patient
new_patient = scaler.transform(new_patient)
print(f"Scaled new_patient{new_patient}\n")

# Drop the target column
# Drop one dataframe column and save it as a new dataframe
new_patient = new_patient[:, 0:8]

# Predict the class of the new patient
prediction = model.predict(new_patient)
print(f"\nPure Prediction: {prediction}\n")

# Translate the prediction into the binary classification of the patien to be either diabetic or not
if prediction > 0.5:
    prediction = "Diabetic"
else:
    prediction = "Not Diabetic"

print(f"Human readable Prediction: {prediction}")