In [None]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn import svm

In [None]:
diabdata = pd.read_csv("/content/diabetes.csv")

In [None]:
print("Class Distribution:\n", diabdata['Outcome'].value_counts())
print("\nGroup Mean by Class:\n", diabdata.groupby('Outcome').mean())

Class Distribution:
 Outcome
0    500
1    268
Name: count, dtype: int64

Group Mean by Class:
          Pregnancies     Glucose  BloodPressure  SkinThickness     Insulin  \
Outcome                                                                      
0           3.298000  109.980000      68.184000      19.664000   68.792000   
1           4.865672  141.257463      70.824627      22.164179  100.335821   

               BMI  DiabetesPedigreeFunction        Age  
Outcome                                                  
0        30.304200                  0.429734  31.190000  
1        35.142537                  0.550500  37.067164  


In [None]:
X = diabdata.drop(columns="Outcome", axis=1)
Y = diabdata["Outcome"]


In [None]:
scaler = StandardScaler()
scaler.fit(X)
X = scaler.transform(X)

In [None]:
Xtrain, Xtest, Ytrain, Ytest = train_test_split(X, Y, test_size=0.35, stratify=Y, random_state=2)

In [None]:
print("Model: Logistic Regression")
log_model = LogisticRegression()
log_model.fit(Xtrain, Ytrain)
log_pred = log_model.predict(Xtest)

Model: Logistic Regression


In [None]:
print("Accuracy:", accuracy_score(Ytest, log_pred))

Accuracy: 0.7769516728624535


In [None]:
print("\nClassification Report:\n", classification_report(Ytest, log_pred))


Classification Report:
               precision    recall  f1-score   support

           0       0.78      0.91      0.84       175
           1       0.77      0.52      0.62        94

    accuracy                           0.78       269
   macro avg       0.77      0.72      0.73       269
weighted avg       0.78      0.78      0.76       269



In [None]:
print("Confusion Matrix:\n", confusion_matrix(Ytest, log_pred))

Confusion Matrix:
 [[160  15]
 [ 45  49]]


In [None]:
input1 = np.asarray((4, 120, 92, 0, 0, 37.6, 0.191, 30)).reshape(1, -1)
input1_std = scaler.transform(input1)
log_result = log_model.predict(input1_std)
print("\nLogistic Regression Prediction:", "No diabetes" if log_result[0] == 0 else "Has diabetes")


Logistic Regression Prediction: No diabetes




In [None]:
print("Model: K-Nearest Neighbors")
knn_model = KNeighborsClassifier(n_neighbors=3)
knn_model.fit(Xtrain, Ytrain)
knn_pred = knn_model.predict(Xtest)

Model: K-Nearest Neighbors


In [None]:
print("Accuracy:", accuracy_score(Ytest, knn_pred))

Accuracy: 0.7286245353159851


In [None]:
print("\nClassification Report:\n", classification_report(Ytest, knn_pred))


Classification Report:
               precision    recall  f1-score   support

           0       0.77      0.83      0.80       175
           1       0.63      0.54      0.58        94

    accuracy                           0.73       269
   macro avg       0.70      0.69      0.69       269
weighted avg       0.72      0.73      0.72       269



In [None]:
print("Confusion Matrix:\n", confusion_matrix(Ytest, knn_pred))

Confusion Matrix:
 [[145  30]
 [ 43  51]]


In [None]:
input2 = np.asarray((5, 166, 72, 19, 175, 25.8, 0.587, 51)).reshape(1, -1)
input2_std = scaler.transform(input2)
knn_result = knn_model.predict(input2_std)
print("\nKNN Prediction:", "No diabetes" if knn_result[0] == 0 else "Has diabetes")


KNN Prediction: Has diabetes




In [None]:
print("Model: Decision Tree")
dt_model = DecisionTreeClassifier(random_state=2)
dt_model.fit(Xtrain, Ytrain)
dt_pred = dt_model.predict(Xtest)

Model: Decision Tree


In [None]:
print("Accuracy:", accuracy_score(Ytest, dt_pred))

Accuracy: 0.654275092936803


In [None]:
print("\nClassification Report:\n", classification_report(Ytest, dt_pred))


Classification Report:
               precision    recall  f1-score   support

           0       0.72      0.76      0.74       175
           1       0.51      0.46      0.48        94

    accuracy                           0.65       269
   macro avg       0.61      0.61      0.61       269
weighted avg       0.65      0.65      0.65       269



In [None]:
print("Confusion Matrix:\n", confusion_matrix(Ytest, dt_pred))

Confusion Matrix:
 [[133  42]
 [ 51  43]]


In [None]:
input3 = np.asarray((2, 100, 68, 25, 85, 30.0, 0.5, 28)).reshape(1, -1)
input3_std = scaler.transform(input3)
dt_result = dt_model.predict(input3_std)
print("\nDecision Tree Prediction:", "No diabetes" if dt_result[0] == 0 else "Has diabetes")


Decision Tree Prediction: No diabetes




In [None]:
print("Model: SVM (Linear Kernel)")
svm_model = svm.SVC(kernel="linear")
svm_model.fit(Xtrain, Ytrain)
svm_pred = svm_model.predict(Xtest)

Model: SVM (Linear Kernel)


In [None]:
print("Accuracy:", accuracy_score(Ytest, svm_pred))

Accuracy: 0.7843866171003717


In [None]:
print("\nClassification Report:\n", classification_report(Ytest, svm_pred))


Classification Report:
               precision    recall  f1-score   support

           0       0.78      0.93      0.85       175
           1       0.79      0.52      0.63        94

    accuracy                           0.78       269
   macro avg       0.79      0.72      0.74       269
weighted avg       0.79      0.78      0.77       269



In [None]:
print("Confusion Matrix:\n", confusion_matrix(Ytest, svm_pred))

Confusion Matrix:
 [[162  13]
 [ 45  49]]


In [None]:
input4 = np.asarray((3, 150, 78, 0, 0, 35.0, 0.2, 45)).reshape(1, -1)
input4_std = scaler.transform(input4)
svm_result = svm_model.predict(input4_std)
print("\nSVM Prediction:", "No diabetes" if svm_result[0] == 0 else "Has diabetes")


SVM Prediction: Has diabetes


