In [6]:
import pandas as pd
from sklearn.model_selection import train_test_split
import pickle

# Load the dataset
diabetes = pd.read_csv('diabetes.csv')
diabetes.head()

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age,Outcome
0,6,148,72,35,0,33.6,0.627,50,1
1,1,85,66,29,0,26.6,0.351,31,0
2,8,183,64,0,0,23.3,0.672,32,1
3,1,89,66,23,94,28.1,0.167,21,0
4,0,137,40,35,168,43.1,2.288,33,1


In [16]:
diabetes.describe()

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age,Outcome
count,768.0,768.0,768.0,768.0,768.0,768.0,768.0,768.0,768.0
mean,3.845052,120.894531,69.105469,20.536458,79.799479,31.992578,0.471876,33.240885,0.348958
std,3.369578,31.972618,19.355807,15.952218,115.244002,7.88416,0.331329,11.760232,0.476951
min,0.0,0.0,0.0,0.0,0.0,0.0,0.078,21.0,0.0
25%,1.0,99.0,62.0,0.0,0.0,27.3,0.24375,24.0,0.0
50%,3.0,117.0,72.0,23.0,30.5,32.0,0.3725,29.0,0.0
75%,6.0,140.25,80.0,32.0,127.25,36.6,0.62625,41.0,1.0
max,17.0,199.0,122.0,99.0,846.0,67.1,2.42,81.0,1.0


In [7]:
# Split the data into features (X) and target (y)
X_diabetes = diabetes.drop('Outcome', axis=1)
y_diabetes = diabetes['Outcome']

from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_diabetes = sc.fit_transform(X_diabetes)

# Split the dataset into 80% training and 20% testing
X_train, X_test, y_train, y_test = train_test_split(X_diabetes, y_diabetes, test_size=0.2, random_state=42)

In [8]:
with open('diabetes_scaler.pkl', 'wb') as file:
    pickle.dump(sc, file)

In [9]:
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report

# Initialize and train the Logistic Regression model
logreg = LogisticRegression(max_iter=1000)
logreg.fit(X_train, y_train)

# Predict on the test set
y_pred_logreg = logreg.predict(X_test)

# Evaluate the model
accuracy_logreg = accuracy_score(y_test, y_pred_logreg)
report_logreg = classification_report(y_test, y_pred_logreg)

# Save the trained model as a pickle file
data_to_save = {
    'model': logreg,
    'accuracy': accuracy_logreg
}

with open("logreg_diabetes_model.pkl", 'wb') as file:
    pickle.dump(data_to_save, file)

In [10]:
from sklearn.neighbors import KNeighborsClassifier

# Initialize and train the k-NN model
knn = KNeighborsClassifier(n_neighbors=3)
knn.fit(X_train, y_train)

# Predict on the test set
y_pred_knn = knn.predict(X_test)

# Evaluate the model
accuracy_knn = accuracy_score(y_test, y_pred_knn)
report_knn = classification_report(y_test, y_pred_knn)

# Save the trained model as a pickle file
data_to_save = {
    'model': knn,
    'accuracy': accuracy_knn
}

with open("knn_diabetes_model.pkl", 'wb') as file:
    pickle.dump(data_to_save, file)

In [11]:
from sklearn.svm import SVC
import pickle

# Initialize and train the SVM model
svm = SVC(kernel='linear', probability=True)
svm.fit(X_train, y_train)

# Predict on the test set
y_pred_svm = svm.predict(X_test)

# Evaluate the model
accuracy_svm = accuracy_score(y_test, y_pred_svm)
report_svm = classification_report(y_test, y_pred_svm)

# Save the trained model as a pickle file
data_to_save = {
    'model': svm,
    'accuracy': accuracy_svm
}

with open("svm_diabetes_model.pkl", 'wb') as file:
    pickle.dump(data_to_save, file)

In [12]:
from sklearn.tree import DecisionTreeClassifier

# Initialize and train the Decision Tree model
dtree = DecisionTreeClassifier(random_state=42)
dtree.fit(X_train, y_train)

# Predict on the test set
y_pred_dtree = dtree.predict(X_test)

# Evaluate the model
accuracy_dtree = accuracy_score(y_test, y_pred_dtree)
report_dtree = classification_report(y_test, y_pred_dtree)

# Save the trained model as a pickle file
data_to_save = {
    'model': dtree,
    'accuracy': accuracy_dtree
}

with open("dtree_diabetes_model.pkl", 'wb') as file:
    pickle.dump(data_to_save, file)


In [13]:
from sklearn.ensemble import RandomForestClassifier

# Initialize and train the Random Forest model
rf = RandomForestClassifier(n_estimators=100, random_state=42)
rf.fit(X_train, y_train)

# Predict on the test set
y_pred_rf = rf.predict(X_test)

# Evaluate the model
accuracy_rf = accuracy_score(y_test, y_pred_rf)
report_rf = classification_report(y_test, y_pred_rf)

# Save the trained model as a pickle file
data_to_save = {
    'model': rf,
    'accuracy': accuracy_rf
}

with open("rf_diabetes_model.pkl", 'wb') as file:
    pickle.dump(data_to_save, file)


In [14]:
from sklearn.ensemble import GradientBoostingClassifier

# Initialize and train the Gradient Boosting model
gbm = GradientBoostingClassifier(n_estimators=100, random_state=42)
gbm.fit(X_train, y_train)

# Predict on the test set
y_pred_gbm = gbm.predict(X_test)

# Evaluate the model
accuracy_gbm = accuracy_score(y_test, y_pred_gbm)
report_gbm = classification_report(y_test, y_pred_gbm)

# Save the trained model as a pickle file
data_to_save = {
    'model': gbm,
    'accuracy': accuracy_gbm
}

with open("gbm_diabetes_model.pkl", 'wb') as file:
    pickle.dump(data_to_save, file)


In [15]:
from sklearn.ensemble import AdaBoostClassifier

# Initialize and train the AdaBoost model
adaboost = AdaBoostClassifier(n_estimators=100, random_state=42)
adaboost.fit(X_train, y_train)

# Predict on the test set
y_pred_adaboost = adaboost.predict(X_test)

# Evaluate the model
accuracy_adaboost = accuracy_score(y_test, y_pred_adaboost)
report_adaboost = classification_report(y_test, y_pred_adaboost)

# Save the trained model as a pickle file
data_to_save = {
    'model': adaboost,
    'accuracy': accuracy_adaboost
}

with open("adaboost_diabetes_model.pkl", 'wb') as file:
    pickle.dump(data_to_save, file)
