In [459]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [460]:
from sklearn.naive_bayes import GaussianNB
from sklearn.linear_model import Perceptron
import pickle

In [461]:
df=pd.read_csv('diabetes.csv')

In [462]:
df_orig=pd.read_csv('diabetes.csv')

In [463]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 768 entries, 0 to 767
Data columns (total 5 columns):
 #   Column   Non-Null Count  Dtype  
---  ------   --------------  -----  
 0   Glucose  768 non-null    int64  
 1   Insulin  768 non-null    int64  
 2   BMI      768 non-null    float64
 3   Age      768 non-null    int64  
 4   Outcome  768 non-null    int64  
dtypes: float64(1), int64(4)
memory usage: 30.1 KB


In [464]:
df.describe()

Unnamed: 0,Glucose,Insulin,BMI,Age,Outcome
count,768.0,768.0,768.0,768.0,768.0
mean,120.894531,79.799479,31.992578,33.240885,0.348958
std,31.972618,115.244002,7.88416,11.760232,0.476951
min,0.0,0.0,0.0,21.0,0.0
25%,99.0,0.0,27.3,24.0,0.0
50%,117.0,30.5,32.0,29.0,0.0
75%,140.25,127.25,36.6,41.0,1.0
max,199.0,846.0,67.1,81.0,1.0


In [465]:
df.head()

Unnamed: 0,Glucose,Insulin,BMI,Age,Outcome
0,148,0,33.6,50,1
1,85,0,26.6,31,0
2,183,0,23.3,32,1
3,89,94,28.1,21,0
4,137,168,43.1,33,1


In [466]:
# Standard Scaling due to varied ranges of different attributes in given dataset
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
scaled_features = scaler.fit_transform(df[['Glucose', 'Insulin', 'BMI', 'Age']])
df = pd.DataFrame(scaled_features, columns=['Glucose', 'Insulin', 'BMI', 'Age'])
df = pd.concat([df, df_orig['Outcome']], axis=1)
df.head()

Unnamed: 0,Glucose,Insulin,BMI,Age,Outcome
0,0.848324,-0.692891,0.204013,1.425995,1
1,-1.123396,-0.692891,-0.684422,-0.190672,0
2,1.943724,-0.692891,-1.103255,-0.105584,1
3,-0.998208,0.123302,-0.494043,-1.041549,0
4,0.504055,0.765836,1.409746,-0.020496,1


In [467]:
from sklearn.model_selection import train_test_split
X = df[['Glucose', 'Insulin', 'BMI', 'Age']]
y = df['Outcome']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=21)

print("Training set size:", X_train.shape)
print("Testing set size:", X_test.shape)


Training set size: (614, 4)
Testing set size: (154, 4)


In [468]:
# Gaussian Naive-Bayes Classifier
naive_bayes_model = GaussianNB()
naive_bayes_model.fit(X_train, y_train)
y_pred_nb = naive_bayes_model.predict(X_test)
print("Naive Bayes Predictions:", y_pred_nb)

Naive Bayes Predictions: [0 0 0 1 1 0 0 0 0 0 0 0 0 0 0 0 1 0 0 1 0 0 1 1 1 0 0 0 0 1 0 1 0 0 0 1 0
 0 0 0 0 0 0 0 1 0 0 0 0 1 1 1 0 1 0 0 0 0 0 0 1 0 0 1 0 0 0 0 0 0 1 1 0 0
 0 0 0 1 0 0 0 0 0 0 0 0 0 1 0 0 0 1 0 0 1 0 0 0 1 0 0 0 0 1 1 0 0 0 0 0 0
 0 1 0 1 1 1 1 1 0 1 0 0 1 0 0 0 1 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 0 0
 0 1 0 0 1 1]


In [469]:
# Scikit-Learn Perceptron Classifier
perceptron_model = Perceptron(max_iter=1000, eta0=0.01)
perceptron_model.fit(X_train, y_train)
y_pred_perceptron = perceptron_model.predict(X_test)
print("Perceptron Predictions:", y_pred_perceptron)

Perceptron Predictions: [0 0 0 1 0 0 0 0 0 1 1 1 1 1 0 0 0 1 0 1 1 1 1 1 1 1 1 0 1 1 0 1 1 1 0 0 1
 0 0 1 0 1 0 0 1 0 0 1 0 1 1 1 1 1 0 1 1 0 0 1 1 1 0 1 1 0 0 1 1 0 1 1 0 0
 0 0 1 1 0 0 0 0 0 0 0 0 1 1 0 0 0 1 1 1 1 1 0 1 1 1 0 0 0 1 1 0 0 0 0 0 0
 0 1 0 0 1 1 1 1 0 1 1 0 1 0 1 0 1 0 1 1 1 0 1 1 0 0 0 1 1 1 1 0 1 1 1 1 1
 1 0 0 1 1 0]


In [470]:
# Custom Perceptron Classifier
class CustomPerceptron:
    def __init__(self, learning_rate=0.01, n_iters=1000):
        self.learning_rate = learning_rate
        self.n_iters = n_iters
        self.weights = None
        self.bias = None

    def fit(self, X, y):
        n_samples, n_features = X.shape
        self.weights = np.zeros(n_features)
        self.bias = 0

        # Binary Classifier
        y_ = np.where(y <= 0, -1, 1)

        for _ in range(self.n_iters):
            for idx, x_i in enumerate(X):
                # Perceptron Training Update Rule
                linear_output = np.dot(x_i, self.weights) + self.bias
                y_predicted = np.sign(linear_output)

                # Weights and Bias Updation (if incorrect prediction)
                if y_[idx] * y_predicted <= 0:
                    self.weights += self.learning_rate * y_[idx] * x_i
                    self.bias += self.learning_rate * y_[idx]

    def predict(self, X):
        # sum(wi.xi)+b
        linear_output = np.dot(X, self.weights) + self.bias
        return np.where(linear_output >= 0, 1, 0)


custom_perceptron_model = CustomPerceptron(learning_rate=0.01, n_iters=1000)
custom_perceptron_model.fit(X_train.values, y_train.values)
y_pred_custom_perceptron = custom_perceptron_model.predict(X_test.values)
print("Custom Perceptron Predictions:", y_pred_custom_perceptron)

Custom Perceptron Predictions: [0 0 0 1 0 0 0 0 0 0 0 1 0 0 0 0 0 1 0 1 0 0 1 1 1 0 0 0 0 1 0 1 0 0 0 0 0
 0 0 1 0 1 0 0 1 0 0 0 0 1 1 1 1 1 0 0 0 0 0 1 1 1 0 1 0 0 0 0 0 0 1 1 0 0
 0 0 0 1 0 0 0 0 0 0 0 0 0 1 0 0 0 1 0 0 1 0 0 0 0 1 0 0 0 1 1 0 0 0 0 0 0
 0 1 0 0 0 1 1 1 0 1 1 0 1 0 0 0 0 0 0 1 0 0 0 0 0 0 0 1 1 0 0 0 0 1 1 1 0
 1 0 0 0 1 0]


In [471]:
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

accuracy_nb = accuracy_score(y_test, y_pred_nb)
conf_matrix_nb = confusion_matrix(y_test, y_pred_nb)
class_report_nb = classification_report(y_test, y_pred_nb)

print("Naive Bayes Model Accuracy:", accuracy_nb)
print("Confusion Matrix:\n", conf_matrix_nb)
print("Classification Report:\n", class_report_nb)

accuracy_perceptron = accuracy_score(y_test, y_pred_perceptron)
conf_matrix_perceptron = confusion_matrix(y_test, y_pred_perceptron)
class_report_perceptron = classification_report(y_test, y_pred_perceptron)

print("Perceptron Model Accuracy:", accuracy_perceptron)
print("Confusion Matrix:\n", conf_matrix_perceptron)
print("Classification Report:\n", class_report_perceptron)

accuracy_custom_perceptron = accuracy_score(y_test, y_pred_custom_perceptron)
conf_matrix_custom_perceptron = confusion_matrix(y_test, y_pred_custom_perceptron)
class_report_custom_perceptron = classification_report(y_test, y_pred_custom_perceptron)

print("Custom Perceptron Model Accuracy:", accuracy_custom_perceptron)
print("Confusion Matrix:\n", conf_matrix_custom_perceptron)
print("Classification Report:\n", class_report_custom_perceptron)

Naive Bayes Model Accuracy: 0.6948051948051948
Confusion Matrix:
 [[80 14]
 [33 27]]
Classification Report:
               precision    recall  f1-score   support

           0       0.71      0.85      0.77        94
           1       0.66      0.45      0.53        60

    accuracy                           0.69       154
   macro avg       0.68      0.65      0.65       154
weighted avg       0.69      0.69      0.68       154

Perceptron Model Accuracy: 0.6883116883116883
Confusion Matrix:
 [[59 35]
 [13 47]]
Classification Report:
               precision    recall  f1-score   support

           0       0.82      0.63      0.71        94
           1       0.57      0.78      0.66        60

    accuracy                           0.69       154
   macro avg       0.70      0.71      0.69       154
weighted avg       0.72      0.69      0.69       154

Custom Perceptron Model Accuracy: 0.7077922077922078
Confusion Matrix:
 [[79 15]
 [30 30]]
Classification Report:
               

In [472]:
#TODO : Highlight Strengths and Weaknesses

In [473]:
#Saving Models
with open('naive_bayes_model.pkl', 'wb') as nb_file:
    pickle.dump(naive_bayes_model, nb_file)
with open('perceptron_model.pkl', 'wb') as perc_file:
    pickle.dump(perceptron_model, perc_file)
with open('custom_perceptron_model.pkl', 'wb') as custom_perc_file:
    pickle.dump(custom_perceptron_model, custom_perc_file)
print("Models saved successfully.")
with open('scaler.pkl', 'wb') as scaler_file:
    pickle.dump(scaler, scaler_file)
print("Scaler saved successfully.")

Models saved successfully.
Scaler saved successfully.


In [474]:
#Loading Models
with open('naive_bayes_model.pkl', 'rb') as nb_file:
    loaded_naive_bayes_model = pickle.load(nb_file)
with open('perceptron_model.pkl', 'rb') as perc_file:
    loaded_perceptron_model = pickle.load(perc_file)
with open('custom_perceptron_model.pkl', 'rb') as custom_perc_file:
    loaded_custom_perceptron_model = pickle.load(custom_perc_file)
print("Models loaded successfully.")
with open('scaler.pkl', 'rb') as scaler_file:
    loaded_scaler = pickle.load(scaler_file)
print("Scaler loaded successfully.")

Models loaded successfully.
Scaler loaded successfully.


In [475]:
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import KFold
k = 5
kf = KFold(n_splits=k, shuffle=True,random_state=21)

# Cross-validate Naive Bayes model
nb_scores = cross_val_score(naive_bayes_model, X, y, cv=kf, scoring='accuracy')
print(f'Naive Bayes Model Accuracy for each fold: {nb_scores}')
print(f'Average Naive Bayes Model Accuracy: {np.mean(nb_scores)}')

# Cross-validate Perceptron model
perc_scores = cross_val_score(perceptron_model, X, y, cv=kf, scoring='accuracy')
print(f'Perceptron Model Accuracy for each fold: {perc_scores}')
print(f'Average Perceptron Model Accuracy: {np.mean(perc_scores)}')

# Cross-validate Custom Perceptron model
custom_perc_scores = []
for train_index, test_index in kf.split(X):
    X_train_fold, X_test_fold = X.iloc[train_index], X.iloc[test_index]
    y_train_fold, y_test_fold = y.iloc[train_index], y.iloc[test_index]
    fold_custom_perceptron = CustomPerceptron(learning_rate=0.01, n_iters=1000)
    fold_custom_perceptron.fit(X_train_fold.values, y_train_fold.values)
    y_pred_fold = fold_custom_perceptron.predict(X_test_fold.values)
    fold_accuracy = accuracy_score(y_test_fold, y_pred_fold)
    custom_perc_scores.append(fold_accuracy)

print(f'Custom Perceptron Model Accuracy for each fold: {custom_perc_scores}')
print(f'Average Custom Perceptron Model Accuracy: {np.mean(custom_perc_scores)}')

Naive Bayes Model Accuracy for each fold: [0.69480519 0.74025974 0.80519481 0.77777778 0.75163399]
Average Naive Bayes Model Accuracy: 0.7539343009931245
Perceptron Model Accuracy for each fold: [0.65584416 0.75324675 0.7012987  0.73856209 0.7254902 ]
Average Perceptron Model Accuracy: 0.714888379594262
Custom Perceptron Model Accuracy for each fold: [0.6883116883116883, 0.6948051948051948, 0.6883116883116883, 0.7973856209150327, 0.7320261437908496]
Average Custom Perceptron Model Accuracy: 0.7201680672268906


In [476]:
##TODO : COMPARE PERFORMANCE