In [28]:
# 1. Import required libraries
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
from sklearn.metrics import roc_curve, auc

In [5]:
# 2. Load the dataset
# Load Pima Indians Diabetes dataset and specify correct delimiter
url = "https://raw.githubusercontent.com/jbrownlee/Datasets/master/pima-indians-diabetes.data.csv"
columns = ['Pregnancies', 'Glucose', 'BloodPressure', 'SkinThickness', 
           'Insulin', 'BMI', 'DiabetesPedigreeFunction', 'Age', 'Outcome']
data = pd.read_csv(url, header=None, names=columns)

In [6]:
# 3. Preprocess the data
# Replace zeros with NaN where necessary and then handle missing values
data[['Glucose', 'BloodPressure', 'SkinThickness', 'Insulin', 'BMI']] = data[['Glucose', 'BloodPressure', 'SkinThickness', 'Insulin', 'BMI']].replace(0, np.nan)

# Fill missing values with the mean of the column
data.fillna(data.mean(), inplace=True)

# Split the data into features (X) and target (y)
X = data.drop('Outcome', axis=1)
y = data['Outcome']

In [7]:
# 4. Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [8]:
# 5. Standardize the features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [31]:
# 6. Train an SVM model with linear kernel
svm_model = SVC(kernel='linear', class_weight='balanced')  # Adjust for class imbalance
svm_model.fit(X_train, y_train)

# 7. Make predictions
y_pred = svm_model.predict(X_test)

# 8. Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
conf_matrix = confusion_matrix(y_test, y_pred)
class_report = classification_report(y_test, y_pred)

print(f"Accuracy: {accuracy * 100:.2f}%")
print("Confusion Matrix:")
print(conf_matrix)
print("Classification Report:")
print(class_report)

Accuracy: 70.13%
Confusion Matrix:
[[68 31]
 [15 40]]
Classification Report:
              precision    recall  f1-score   support

           0       0.82      0.69      0.75        99
           1       0.56      0.73      0.63        55

    accuracy                           0.70       154
   macro avg       0.69      0.71      0.69       154
weighted avg       0.73      0.70      0.71       154



In [26]:
# 6. Train an SVM model with rbf kernel
svm_model = SVC(kernel='rbf', class_weight='balanced')  # Adjust for class imbalance
svm_model.fit(X_train, y_train)

# 7. Make predictions
y_pred = svm_model.predict(X_test)

# 8. Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
conf_matrix = confusion_matrix(y_test, y_pred)
class_report = classification_report(y_test, y_pred)

print(f"Accuracy: {accuracy * 100:.2f}%")
print("Confusion Matrix:")
print(conf_matrix)
print("Classification Report:")
print(class_report)

Accuracy: 72.08%
Confusion Matrix:
[[68 31]
 [12 43]]
Classification Report:
              precision    recall  f1-score   support

           0       0.85      0.69      0.76        99
           1       0.58      0.78      0.67        55

    accuracy                           0.72       154
   macro avg       0.72      0.73      0.71       154
weighted avg       0.75      0.72      0.73       154



In [27]:
# 6. Train an SVM model with linear kernel
svm_model = SVC(kernel='poly', class_weight='balanced')  # Adjust for class imbalance
svm_model.fit(X_train, y_train)

# 7. Make predictions
y_pred = svm_model.predict(X_test)

# 8. Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
conf_matrix = confusion_matrix(y_test, y_pred)
class_report = classification_report(y_test, y_pred)

print(f"Accuracy: {accuracy * 100:.2f}%")
print("Confusion Matrix:")
print(conf_matrix)
print("Classification Report:")
print(class_report)

Accuracy: 78.57%
Confusion Matrix:
[[81 18]
 [15 40]]
Classification Report:
              precision    recall  f1-score   support

           0       0.84      0.82      0.83        99
           1       0.69      0.73      0.71        55

    accuracy                           0.79       154
   macro avg       0.77      0.77      0.77       154
weighted avg       0.79      0.79      0.79       154

