In [None]:
import pandas as pd   
import time
from sklearn.model_selection import train_test_split #, RepeatedKFold, GridSearchCV, cross_val_score
from sklearn.metrics import roc_curve, roc_auc_score
import warnings
warnings.filterwarnings('ignore')

In [None]:
import os
print(os.getcwd())
#change working directory to the location of the data file
os.chdir('/mnt/d/Sajjad/08-2023/Python Code/Introduction to Machine Learning/')
print(os.getcwd())

Extracting data from UCI Machine Learning Repository

In [None]:
from ucimlrepo import fetch_ucirepo 

In [None]:
# fetch dataset 
cdc_diabetes_health_indicators = fetch_ucirepo(id=891) 
  
# data (as pandas dataframes) 
X = cdc_diabetes_health_indicators.data.features 
y = cdc_diabetes_health_indicators.data.targets 
  
# metadata 
print(cdc_diabetes_health_indicators.metadata) 
  
# variable information 
print(cdc_diabetes_health_indicators.variables) 

Since Education and Income are in ordinal scale, hence, no need to do one-hot encoding. 

In [None]:
#set Education Variable as Categorical
#X['Education'] = X['Education'].astype('category')
#X['Education'].value_counts()

In [None]:
X.head()

In [None]:
X.dtypes

Making two train/test dataset. One without scaling and the other with scaling. 

In [None]:
#without scaling
trainX2, testX2, trainy2, testy2 = train_test_split(X, y, test_size=0.3, random_state=2)

In [None]:
from sklearn.preprocessing import StandardScaler

with scaling dataset

In [None]:
#scaled X
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)
trainX, testX, trainy, testy = train_test_split(X_scaled, y, test_size=0.3, random_state=2)

In [None]:
#count the distribution of y in train and test
print(trainy.value_counts())
testy.value_counts()

In [None]:
trainX.shape

In [None]:
testX.shape

this type casting is not always required but at times torch generates an error so just as a matter of caution converting all types to float32

In [None]:
#convert all int64 to float32
trainX = trainX.astype('float32')
testX = testX.astype('float32')
trainy = trainy.astype('float32')
testy = testy.astype('float32')

using "torch" as keras_backend. Could have used tensorflow or jax as well.

In [None]:
import os
import torch
os.environ["KERAS_BACKEND"] = "torch"
from keras.models import Sequential
from keras.layers import Dense, Dropout

explore batch size, iteration size

In [None]:
# get the number of input features in X and assign to n_features
n_features = trainX.shape[1]
# Create a Sequential model
model = Sequential()

# Add the first hidden layer with 10 neurons and specify the input shape
model.add(Dense(10, input_dim=n_features, activation='relu'))

# Add the second hidden layer with 5 neurons
model.add(Dense(5, activation='relu'))

# Add the output layer with 1 neuron (for binary classification) and 'sigmoid' activation
model.add(Dense(1, activation='sigmoid'))

# Compile the model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['AUC']) #change optimizer

# Print the model summary
print(model.summary())


In [None]:
model.fit(trainX, trainy, epochs=20, batch_size=512)

In [None]:
model.evaluate(testX, testy, batch_size=4096)

comparing the performance with catboost, xgboost, and lgbm

In [None]:
from catboost import CatBoostClassifier
import xgboost as xgb
import lightgbm as lgb

In [None]:
num_of_models = 100
depth_level = 3

In [None]:
#use lgboost
lgb_model = lgb.LGBMClassifier(max_depth=depth_level, n_estimators=num_of_models, learning_rate=0.1)
start_time = time.time()
#fit xgb_model
lgb_model.fit(trainX2,trainy2)
md_probs = lgb_model.predict_proba(testX2)
md_probs = md_probs[:,1]
md_auc = roc_auc_score(testy2, md_probs)
print("LG Boost", " : ", md_auc)
#record the end time
end_time = time.time()
#calculate the total time
total_time = end_time - start_time
print("Total time LGB: ", total_time)

In [None]:
cb = CatBoostClassifier(iterations=num_of_models, depth=depth_level, learning_rate=0.1, loss_function='Logloss', verbose=False)
#record the start time
start_time = time.time()
cb.fit(trainX2,trainy2)
md_probs = cb.predict_proba(testX2)
md_probs = md_probs[:,1]
md_auc = roc_auc_score(testy2, md_probs)
print("Cat Boost", " : ", md_auc)
#record the end time
end_time = time.time()
#calculate the total time
total_time = end_time - start_time
print("Total time CB: ", total_time)

In [None]:
#use xgboost
xgb_model = xgb.XGBClassifier(max_depth=depth_level, n_estimators=num_of_models, learning_rate=0.1)
start_time = time.time()
#fit xgb_model
xgb_model.fit(trainX2,trainy2)
md_probs = xgb_model.predict_proba(testX2)
md_probs = md_probs[:,1]
md_auc = roc_auc_score(testy2, md_probs)
print("XG Boost", " : ", md_auc)
#record the end time
end_time = time.time()
#calculate the total time
total_time = end_time - start_time
print("Total time XGB: ", total_time)

using sklearn version of feedforward neural network (also called Multi-layer Perceptron)

In [None]:
from sklearn.neural_network import MLPClassifier

In [None]:
#using multilayer perceptron of sklearn
mlp = MLPClassifier(hidden_layer_sizes=(10, 5), max_iter=10, solver='adam', verbose=10, random_state=1,
                    learning_rate_init=.1, batch_size=2048)
#mlp.fit(trainX, trainy)
for epoch in range(10):  # Set the desired number of epochs
    mlp.partial_fit(trainX, trainy, classes=[0, 1])

    # Evaluate on the validation set and print AUC ROC
    y_prob = mlp.predict_proba(testX)[:, 1]
    auc_roc = roc_auc_score(testy, y_prob)
    print(f"Epoch {epoch + 1}, AUC ROC: {auc_roc:.4f}") 

In [None]:
from keras.optimizers import SGD

experimenting with different optimizers

In [None]:
# get the number of input features in X and assign to n_features
n_features = trainX.shape[1]
# Create a Sequential model
model = Sequential()

# Add the first hidden layer with 10 neurons and specify the input shape
model.add(Dense(20, input_dim=n_features, activation='relu'))

# Add the second hidden layer with 5 neurons
model.add(Dense(10, activation='relu'))

# Add the output layer with 1 neuron (for binary classification) and 'sigmoid' activation
model.add(Dense(1, activation='sigmoid'))

# Create an SGD optimizer with momentum
sgd_optimizer = SGD(learning_rate=0.01, momentum=0.9)
# Compile the model
#model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['AUC']) #change optimizer
model.compile(optimizer=sgd_optimizer, loss='binary_crossentropy', metrics=['AUC'])

model.fit(trainX, trainy, epochs=20, batch_size=2048)


In [None]:
model.evaluate(testX, testy, verbose=0, batch_size=2048)

In [None]:
from keras.callbacks import EarlyStopping

experimenting with early stopping without validation set

In [None]:
# Define EarlyStopping callback to monitor training loss
early_stopping = EarlyStopping(monitor='loss', patience=3, restore_best_weights=True)

# Train the model with dropout and early stopping
model.fit(trainX, trainy, epochs=10, batch_size=2048,callbacks=[early_stopping])

In [None]:
model.evaluate(testX, testy, verbose=0, batch_size=2048)

experimenting with Dropout

In [None]:
from keras.layers import Dropout

In [None]:
# Create a Sequential model
model = Sequential()

# Add layers to the model (example architecture with dropout)
model.add(Dense(20, input_dim=21, activation='relu'))
model.add(Dropout(0.25))  # Example dropout layer with a dropout rate of 0.5
model.add(Dense(15, activation='relu'))
model.add(Dropout(0.25))  # Example dropout layer with a dropout rate of 0.3
model.add(Dense(10, activation='relu'))
model.add(Dropout(0.25))  # Example dropout layer with a dropout rate of 0.3
model.add(Dense(1, activation='sigmoid'))
# Create an SGD optimizer with momentum
sgd_optimizer = SGD(learning_rate=0.01, momentum=0.9)
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['AUC']) #change optimizer
model.fit(trainX, trainy, epochs=30, batch_size=2048, callbacks=[early_stopping])

In [None]:
# Create a Sequential model
model = Sequential()

# Add layers to the model (example architecture with dropout)
model.add(Dense(20, input_dim=21, activation='relu'))
model.add(Dropout(0.25))  # Example dropout layer with a dropout rate of 0.5
model.add(Dense(1, activation='sigmoid'))
# Create an SGD optimizer with momentum
sgd_optimizer = SGD(learning_rate=0.01, momentum=0.9)
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['AUC']) #change optimizer
model.fit(trainX, trainy, epochs=30, batch_size=2048, callbacks=[early_stopping])

experimenting with weights regularization

In [None]:
from keras.regularizers import l1, l2

In [None]:
# Create a Sequential model
model = Sequential()

# Add layers to the model (example architecture with dropout)
model.add(Dense(20, input_dim=21, activation='relu', kernel_regularizer=l2(0.01)))
model.add(Dropout(0.25))  # Example dropout layer with a dropout rate of 0.5
model.add(Dense(10, input_dim=21, activation='relu', kernel_regularizer=l1(0.01)))
model.add(Dropout(0.25))  # Example dropout layer with a dropout rate of 0.5
model.add(Dense(1, activation='sigmoid'))
# Create an SGD optimizer with momentum
sgd_optimizer = SGD(learning_rate=0.01, momentum=0.9)
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['AUC']) #change optimizer
model.fit(trainX, trainy, epochs=150, batch_size=2048, callbacks=[early_stopping])

experimenting with early stopping with validation set

In [None]:
valX, trainX3, valy, trainy3 = train_test_split(trainX, trainy, test_size=0.2, random_state=42)

In [None]:
early_stopping = EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True)
# Create a Sequential model
model = Sequential()

# Add layers to the model (example architecture with dropout)
model.add(Dense(20, input_dim=21, activation='relu'))
model.add(Dropout(0.25))  # Example dropout layer with a dropout rate of 0.5
model.add(Dense(10, input_dim=21, activation='relu'))
model.add(Dropout(0.25))  # Example dropout layer with a dropout rate of 0.5
model.add(Dense(1, activation='sigmoid'))
# Create an SGD optimizer with momentum
sgd_optimizer = SGD(learning_rate=0.01, momentum=0.9)
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['AUC']) #change optimizer
model.fit(trainX3, trainy3, validation_data=(valX, valy), epochs=50, batch_size=2048, callbacks=[early_stopping])

experimenting with weights initialization

In [None]:
from keras.initializers import he_normal

In [None]:
# Create a Sequential model
model = Sequential()

# Add layers to the model (example architecture with dropout)
model.add(Dense(20, input_dim=21, activation='relu', kernel_initializer=he_normal()))
model.add(Dropout(0.25))  # Example dropout layer with a dropout rate of 0.5
model.add(Dense(10, input_dim=21, activation='relu'))
model.add(Dropout(0.25))  # Example dropout layer with a dropout rate of 0.5
model.add(Dense(1, activation='sigmoid'))
# Create an SGD optimizer with momentum
sgd_optimizer = SGD(learning_rate=0.01, momentum=0.9)
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['AUC']) #change optimizer
model.fit(trainX, trainy, validation_data=(valX, valy), epochs=50, batch_size=2048, callbacks=[early_stopping])