In [93]:
# Import Libraries
import numpy as np
import pandas as pd

In [2]:
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
from collections import Counter

In [94]:
# Load Dataset
data = pd.read_csv('/content/diabetes.csv')

In [74]:
data.head()

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age,Outcome
0,6,148,72,35,0,33.6,0.627,50,1
1,1,85,66,29,0,26.6,0.351,31,0
2,8,183,64,0,0,23.3,0.672,32,1
3,1,89,66,23,94,28.1,0.167,21,0
4,0,137,40,35,168,43.1,2.288,33,1


In [7]:
X = data.drop('Outcome',axis=1)
y = data["Outcome"]

In [8]:
print("Dataset shape: ",X.shape)
print("Classes: ",np.unique(y))

Dataset shape:  (768, 8)
Classes:  [0 1]


In [95]:
# Train-Test Split
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

In [96]:
# Feature Scaling
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

#Logistic Regression

In [97]:
# Create and Train Logistic Regression Model
model = LogisticRegression()
model.fit(X_train, y_train)

In [98]:
# Predictions
y_pred = model.predict(X_test)

In [99]:
# Accuracy
acc = accuracy_score(y_test, y_pred)
print("Accuracy: ",acc)

Accuracy:  0.7532467532467533


In [100]:
# Confusion Matrix
cm = confusion_matrix(y_test, y_pred)
print("Confusion Matrix:\n",cm)

Confusion Matrix:
 [[79 20]
 [18 37]]


In [89]:
patient = X_test[56].reshape(1, -1)
prediction = model.predict(patient)
if prediction == 1:
  print("Model prediction: Diabetes")
else:
  print("Model Prediction: No Diabetes")

Model prediction: Diabetes


#L1 Regularization

In [24]:
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [25]:
l1_model = LogisticRegression(
    penalty='l1',
    solver='liblinear',
    C=1.0
)
l1_model.fit(X_train_scaled, y_train)

In [27]:
l1_predictions = l1_model.predict(X_test_scaled)
l1_accuracy = accuracy_score(y_test, l1_predictions)
print("L1 Accuracy: ",l1_accuracy)

L1 Accuracy:  0.7467532467532467


In [28]:
print("L1 Coefficents: ",l1_model.coef_)
print("L1 Intercept: ",l1_model.intercept_)

L1 Coefficents:  [[ 0.2051172   1.06175    -0.23230707  0.02628086 -0.18406104  0.77121666
   0.22160552  0.41176274]]
L1 Intercept:  [-0.87174351]


#L2 Regularization

In [31]:
l2_model = LogisticRegression(
    penalty='l2',
    solver='lbfgs',
    C=1.0
)
l2_model.fit(X_train_scaled, y_train)

In [32]:
l2_predictions = l2_model.predict(X_test_scaled)
l2_accuracy = accuracy_score(y_test, l2_predictions)
print("L2 Accuracy: ",l2_accuracy)

L2 Accuracy:  0.7532467532467533


In [33]:
print("L2 Coefficients: ",l2_model.coef_)
print("L2 Intercept: ",l2_model.intercept_)

L2 Coefficients:  [[ 0.21255394  1.07130143 -0.24782647  0.0457288  -0.20078252  0.77818216
   0.23062467  0.42120732]]
L2 Intercept:  [-0.88612015]


#K-Nearest Neighbour

In [65]:
test_point = X_test[20]
K = 25

In [66]:
distances = []

for i in range(len(X_train)):
  distance = np.sqrt(np.sum((X_train[i] - test_point) ** 2))
  distances.append((distance, y_train.iloc[i]))

In [67]:
distances.sort(key = lambda x:x[0])

In [68]:
k_nearest_labels = [label for distance, label in distances[:K]]

In [69]:
prediction = Counter(k_nearest_labels).most_common(1)[0][0]

In [81]:
if prediction == 1:
  print("Model Prediction: Diabetes")
else:
  print("Model Prediction: No Diabetes")

Model Prediction: Diabetes


In [92]:
print("Accuracy of all the models:")
print("Logistic Regression: ",acc)
print("L1 Regularization: ",l1_accuracy)
print("L2 Regularization: ",l2_accuracy)

Accuracy of all the models:
Logistic Regression:  0.7532467532467533
L1 Regularization:  0.7467532467532467
L2 Regularization:  0.7532467532467533


In [101]:
# Classification Report
logistic_report = classification_report(y_test, y_pred)
l1_report = classification_report(y_test, l1_predictions)
l2_report = classification_report(y_test, l1_predictions)

print("Logistic Regression Classification Report:\n",logistic_report)
print("L1 Regularization Report:\n",l1_report)
print("L2 Regularization Report:\n",l2_report)

Logistic Regression Classification Report:
               precision    recall  f1-score   support

           0       0.81      0.80      0.81        99
           1       0.65      0.67      0.66        55

    accuracy                           0.75       154
   macro avg       0.73      0.74      0.73       154
weighted avg       0.76      0.75      0.75       154

L1 Regularization Report:
               precision    recall  f1-score   support

           0       0.81      0.79      0.80        99
           1       0.64      0.67      0.65        55

    accuracy                           0.75       154
   macro avg       0.73      0.73      0.73       154
weighted avg       0.75      0.75      0.75       154

L2 Regularization Report:
               precision    recall  f1-score   support

           0       0.81      0.79      0.80        99
           1       0.64      0.67      0.65        55

    accuracy                           0.75       154
   macro avg       0.73      0