In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix, classification_report

# Load the dataset (replace with your actual file path)
# Example: df = pd.read_csv('diabetes.csv')
df = pd.read_csv('diabetes_svm_sample_data.csv')  # Replace with your actual path

# Display the first few rows to check the data
print(df.head())

# Check for missing values
print(df.isnull().sum())

# Separate features (X) and target variable (y)
X = df.drop('DiabetesOutcome', axis=1)  # Features
y = df['DiabetesOutcome']  # Target variable

# Split the dataset into training and testing sets (80% training, 20% testing)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize the feature values to have mean=0 and variance=1 for better performance of SVM
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Initialize the SVM model (using Radial Basis Function (RBF) kernel by default)
svm_model = SVC(kernel='rbf', random_state=42)

# Train the SVM model on the training data
svm_model.fit(X_train, y_train)

# Predict the target for the test set
y_pred = svm_model.predict(X_test)

# Evaluate the model performance
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)

# Print evaluation metrics
print(f"Accuracy: {accuracy:.2f}")
print(f"Precision: {precision:.2f}")
print(f"Recall: {recall:.2f}")
print(f"F1-score: {f1:.2f}")

# Print the classification report for more details
print("\nClassification Report:")
print(classification_report(y_test, y_pred))

# Display the confusion matrix
print("\nConfusion Matrix:")
print(confusion_matrix(y_test, y_pred))


   Age        BMI  BloodPressure  InsulinLevel  GlucoseLevel  SkinThickness  \
0   58  27.603132            118           128           193             41   
1   48  26.989953             95           165           102             30   
2   34  24.809996             78           141           170             25   
3   62  18.802716            126           169            81             30   
4   27  22.775112             78           118           136             20   

   DiabetesOutcome  
0                1  
1                1  
2                1  
3                1  
4                1  
Age                0
BMI                0
BloodPressure      0
InsulinLevel       0
GlucoseLevel       0
SkinThickness      0
DiabetesOutcome    0
dtype: int64
Accuracy: 0.60
Precision: 0.86
Recall: 0.46
F1-score: 0.60

Classification Report:
              precision    recall  f1-score   support

           0       0.46      0.86      0.60         7
           1       0.86      0.46      0.60    