In [None]:
# diabetes_predictor.py

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn import svm
from sklearn.metrics import accuracy_score

# ------------------------------
# Load dataset
# ------------------------------
data_path = "data/diabetes.csv"
diabetes_dataset = pd.read_csv(data_path)

print("\nDataset Info:")
print(diabetes_dataset.info())
print("\nFirst 5 rows:")
print(diabetes_dataset.head())

# ------------------------------
# Data Exploration & Visualization
# ------------------------------
plt.figure(figsize=(6, 4))
sns.countplot(x="Outcome", data=diabetes_dataset, palette="viridis")
plt.title("Distribution of Diabetes Outcome (0 = No, 1 = Yes)")
plt.show()

plt.figure(figsize=(10, 6))
sns.heatmap(diabetes_dataset.corr(), annot=True, cmap="coolwarm", fmt=".2f")
plt.title("Correlation Heatmap")
plt.show()

# ------------------------------
# Split features and labels
# ------------------------------
X = diabetes_dataset.drop(columns="Outcome", axis=1)
Y = diabetes_dataset["Outcome"]

# Standardize features
scaler = StandardScaler()
X = scaler.fit_transform(X)

# Train-test split
X_train, X_test, Y_train, Y_test = train_test_split(
    X, Y, test_size=0.2, stratify=Y, random_state=2
)

# ------------------------------
# Train Model
# ------------------------------
classifier = svm.SVC(kernel="linear")
classifier.fit(X_train, Y_train)

# Accuracy
train_acc = accuracy_score(classifier.predict(X_train), Y_train)
test_acc = accuracy_score(classifier.predict(X_test), Y_test)

print("\nModel Performance:")
print("Training Accuracy:", train_acc)
print("Testing Accuracy:", test_acc)

# ------------------------------
# User Input Prediction
# ------------------------------
print("\nEnter the following patient details:")

pregnancies = float(input("Number of Pregnancies: "))
glucose = float(input("Glucose Level: "))
blood_pressure = float(input("Blood Pressure value: "))
skin_thickness = float(input("Skin Thickness value: "))
insulin = float(input("Insulin Level: "))
bmi = float(input("BMI value: "))
diabetes_pedigree = float(input("Diabetes Pedigree Function value: "))
age = float(input("Age of the Person: "))

# Prepare input
input_data = np.array(
    [pregnancies, glucose, blood_pressure, skin_thickness, insulin, bmi, diabetes_pedigree, age]
).reshape(1, -1)

# Standardize input
std_data = scaler.transform(input_data)

# Prediction
prediction = classifier.predict(std_data)[0]

print("\nPrediction result:")
if prediction == 0:
    print("✅ The person is NOT diabetic.")
else:
    print("⚠️ The person IS diabetic.")
