In [4]:
import pandas as pd
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.model_selection import train_test_split
import tensorflow as tf
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score

In [3]:
# read data into a df
diabetes_df = pd.read_csv("diabetes.csv")
diabetes_df

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age,Outcome
0,6,148,72,35,0,33.6,0.627,50,1
1,1,85,66,29,0,26.6,0.351,31,0
2,8,183,64,0,0,23.3,0.672,32,1
3,1,89,66,23,94,28.1,0.167,21,0
4,0,137,40,35,168,43.1,2.288,33,1
...,...,...,...,...,...,...,...,...,...
763,10,101,76,48,180,32.9,0.171,63,0
764,2,122,70,27,0,36.8,0.340,27,0
765,5,121,72,23,112,26.2,0.245,30,0
766,1,126,60,0,0,30.1,0.349,47,1


##### -- this dataset is sufficeint for both logical regression and Neural network
##### -- data needs no preprocessing or scaling this data for Logistic regression
##### -- Neural Network needs the data to be standardized/normalized -- so we will scale the data

In [6]:
# first off we need to remove outcomes from the data
y = diabetes_df.Outcome
X = diabetes_df.drop(columns="Outcome")

# we then need split the data
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42, stratify=y)


--------------------------------------------------------------
####  here we will standardize/normalize by scaling for nn
--------------------------------------------------------------

In [7]:
# we standardize the numerical variable, to preprocess the data for nueral network

# first create a scaler instance
scaler = StandardScaler()

# next fit the StandardScaler with input training data (X_train)
X_scaler = scaler.fit(X_train)

# scale/transform the data
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

================================================================
#### we are ready to train both the logistical and neural models
----------------------------------------------------------------
### we train Logistic model first
================================================================

#### logistic Regression models are built using Scikits --LogisticRegression class IN the Linear_model
#### Parameters used in this model 
  #### - Solver set to 'lbfgs' ---an algorithn for learning and optimizing (there are many optimizers, choice is yours)
  #### - Iteration parameter---max_iter = set to 200 iterations - will give us opportunity to converge on effective weights  

In [8]:
# define / building logistic regression model
log_classifier = LogisticRegression(solver='lbfgs', max_iter=200)

In [9]:
# train the model--fit the training data into the logistc regression MODEL
log_classifier.fit(X_train, y_train)

LogisticRegression(max_iter=200)

In [11]:
# now we can evaluate the model
y_pred = log_classifier.predict(X_test)

print(f" Logistic regression model accuracy: {accuracy_score(y_test, y_pred):.3f}")

 Logistic regression model accuracy: 0.729


In [None]:
===============================================================================
#### haing trained with Logistic Model, we get an accuracy of 73%
===============================================================================
#### we now need to train the Neural network
##### - w/w compile and evaluate nn model
##### - parameters used include:
   ###### --- One (1) hidden layer with units = 16 neurons, input_dim = 8
   ###### --- use activation function "relu"
   ###### --- Loss function = "binary_crossentropy", Metric = "accuracy"
   ###### --- optimizer = "adam", number of iterations/epochs = 50 (compare with 200 in logistic model)
========================================================================================================  
    

In [14]:
# first we define basin nn model
nn_model=tf.keras.models.Sequential()
# add input layer to nn model
nn_model.add(tf.keras.layers.Dense(units=16, activation="relu", input_dim=8))
# add 1 hidden layer
nn_model.add(tf.keras.layers.Dense(units=1, activation="sigmoid"))

# compile the sequential model and customize metrics
nn_model.compile(loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"])

# train the model by fitting the the training data into the model, 50 iterations
fit_model = nn_model.fit(X_train_scaled, y_train, epochs=50)


# Evaluate the model using the test data
model_loss, model_accuracy = nn_model.evaluate(X_test_scaled,y_test,verbose=2)
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")


Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50
6/6 - 0s - loss: 0.4976 - accuracy: 0.7188
Loss: 0.49756503105163574, Accuracy: 0.71875
