In [2]:
# Importing the Dependencies
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn import svm, tree, ensemble, neighbors
from sklearn.metrics import accuracy_score

# Data Collection and Analysis

# PIMA Diabetes Dataset
diabetes_dataset = pd.read_csv('/content/diabetes.csv')

# Separating the data and labels
X = diabetes_dataset.drop(columns='Outcome', axis=1)
Y = diabetes_dataset['Outcome']

# Standardization
scaler = StandardScaler()
scaler.fit(X)
standardized_data = scaler.transform(X)
X = standardized_data

# Train-Test Split
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, stratify=Y, random_state=2)

# Training and Evaluating Different Models

# Support Vector Machine (SVM) with Linear Kernel
classifier_svm = svm.SVC(kernel='linear')
classifier_svm.fit(X_train, Y_train)
svm_train_predictions = classifier_svm.predict(X_train)
svm_test_predictions = classifier_svm.predict(X_test)
svm_train_accuracy = accuracy_score(svm_train_predictions, Y_train)
svm_test_accuracy = accuracy_score(svm_test_predictions, Y_test)

# Decision Tree Classifier
classifier_tree = tree.DecisionTreeClassifier()
classifier_tree.fit(X_train, Y_train)
tree_train_predictions = classifier_tree.predict(X_train)
tree_test_predictions = classifier_tree.predict(X_test)
tree_train_accuracy = accuracy_score(tree_train_predictions, Y_train)
tree_test_accuracy = accuracy_score(tree_test_predictions, Y_test)

# Random Forest Classifier
classifier_rf = ensemble.RandomForestClassifier(n_estimators=100, random_state=2)
classifier_rf.fit(X_train, Y_train)
rf_train_predictions = classifier_rf.predict(X_train)
rf_test_predictions = classifier_rf.predict(X_test)
rf_train_accuracy = accuracy_score(rf_train_predictions, Y_train)
rf_test_accuracy = accuracy_score(rf_test_predictions, Y_test)

# K-Nearest Neighbors (K-NN) Classifier
classifier_knn = neighbors.KNeighborsClassifier(n_neighbors=5)
classifier_knn.fit(X_train, Y_train)
knn_train_predictions = classifier_knn.predict(X_train)
knn_test_predictions = classifier_knn.predict(X_test)
knn_train_accuracy = accuracy_score(knn_train_predictions, Y_train)
knn_test_accuracy = accuracy_score(knn_test_predictions, Y_test)

# Displaying Model Performance

print("SVM Accuracy (Training):", svm_train_accuracy)
print("SVM Accuracy (Testing):", svm_test_accuracy)

print("Decision Tree Accuracy (Training):", tree_train_accuracy)
print("Decision Tree Accuracy (Testing):", tree_test_accuracy)

print("Random Forest Accuracy (Training):", rf_train_accuracy)
print("Random Forest Accuracy (Testing):", rf_test_accuracy)

print("K-NN Accuracy (Training):", knn_train_accuracy)
print("K-NN Accuracy (Testing):", knn_test_accuracy)


SVM Accuracy (Training): 0.7866449511400652
SVM Accuracy (Testing): 0.7727272727272727
Decision Tree Accuracy (Training): 1.0
Decision Tree Accuracy (Testing): 0.7012987012987013
Random Forest Accuracy (Training): 1.0
Random Forest Accuracy (Testing): 0.7272727272727273
K-NN Accuracy (Training): 0.8289902280130294
K-NN Accuracy (Testing): 0.7207792207792207
