In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.metrics import accuracy_score
import numpy as np

In [None]:
diabetes = pd.read_csv('diabetes.csv')

diabetes.shape
diabetes.info()
diabetes.head()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 768 entries, 0 to 767
Data columns (total 9 columns):
 #   Column                    Non-Null Count  Dtype  
---  ------                    --------------  -----  
 0   Pregnancies               768 non-null    int64  
 1   Glucose                   768 non-null    int64  
 2   BloodPressure             768 non-null    int64  
 3   SkinThickness             768 non-null    int64  
 4   Insulin                   768 non-null    int64  
 5   BMI                       768 non-null    float64
 6   DiabetesPedigreeFunction  768 non-null    float64
 7   Age                       768 non-null    int64  
 8   Outcome                   768 non-null    int64  
dtypes: float64(2), int64(7)
memory usage: 54.1 KB


Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age,Outcome
0,6,148,72,35,0,33.6,0.627,50,1
1,1,85,66,29,0,26.6,0.351,31,0
2,8,183,64,0,0,23.3,0.672,32,1
3,1,89,66,23,94,28.1,0.167,21,0
4,0,137,40,35,168,43.1,2.288,33,1


In [6]:
#Separating data and Label

X= diabetes.drop(columns=['Outcome'])
y= diabetes['Outcome']

In [None]:
#creating a test train split

X_train,X_test,y_train,y_test = train_test_split(X,y,stratify= y, test_size=0.2, random_state=25)

In [27]:
results={} #init a results dict

#select models
models = {
    'Logistic Regression': LogisticRegression(max_iter=1000),
    'K-Nearest Neighbors': KNeighborsClassifier(),
    'Support Vector Machine': SVC(),
    'Random Forest': RandomForestClassifier(),
    'Gradient Boosting': GradientBoostingClassifier()
}


In [30]:
for model_name, model in models.items():
        
    cv_scores = cross_val_score(model, X_train, y_train, cv=5, scoring='accuracy')
    results[model_name] = {
        'Cross-Validation Accuracy Mean': np.mean(cv_scores),
        'Cross-Validation Accuracy Std': np.std(cv_scores)
    }
    model.fit(X_train,y_train)
    test_accuracy = accuracy_score(y_test, model.predict(X_test))
    
    results[model_name]['Test Accuracy'] = test_accuracy
    
    # Predictions
    predictions = model.predict(X_test)
    results[model_name]['Predictions'] = predictions

      # Initialize a dictionary for each model in results
    results[model_name] = {
        'Test Accuracy': test_accuracy,
        'Predictions': model.predict(X_test)  # Store predictions
    }

   

for model_name,metrics in results.items():
    print(f'{model_name}')
    print(f"  Test Accuracy: {metrics['Test Accuracy'] * 100:.2f}%")
    print(f"  Predictions: {metrics['Predictions']}\n")



Logistic Regression
  Test Accuracy: 79.22%
  Predictions: [0 1 1 0 0 1 0 0 0 1 1 0 0 0 1 1 1 0 1 0 0 0 0 1 0 0 1 1 0 1 1 0 0 0 1 0 0
 0 0 1 0 0 0 1 0 0 1 0 0 1 0 0 0 0 1 0 1 0 0 0 0 0 0 0 0 0 1 1 0 0 0 0 0 0
 0 0 0 0 0 0 1 0 0 0 1 0 1 0 1 1 0 0 0 0 0 1 0 0 0 0 1 1 0 1 1 1 0 1 0 0 0
 0 0 0 0 0 1 0 1 0 0 1 0 1 0 1 0 0 0 0 0 1 0 0 1 1 1 0 0 0 0 0 0 0 0 0 0 0
 0 1 0 1 0 0]

K-Nearest Neighbors
  Test Accuracy: 74.03%
  Predictions: [0 1 1 0 0 1 0 0 0 1 1 0 0 0 1 1 1 1 1 0 0 0 0 0 0 0 1 1 0 1 1 0 0 0 1 0 1
 0 0 0 0 0 0 0 1 0 0 0 0 1 0 0 0 0 1 0 1 0 1 0 0 0 0 1 0 0 1 0 0 0 0 1 0 0
 0 0 0 0 0 0 0 0 0 0 1 0 1 0 1 1 0 0 0 0 0 1 0 0 1 0 0 1 0 0 1 1 0 1 1 0 0
 0 1 0 0 0 1 0 1 0 0 1 0 1 0 0 0 1 0 0 0 1 1 1 1 1 1 0 1 0 0 0 0 0 0 0 0 1
 0 0 0 1 0 0]

Support Vector Machine
  Test Accuracy: 75.97%
  Predictions: [0 1 1 0 0 1 0 0 0 1 0 0 0 0 1 0 1 1 1 0 0 0 0 1 0 0 1 1 0 1 1 0 0 0 1 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 1 0 1 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 1 0 1 0 1 1 0 0 0 0