In [1]:
# Diabetes Prediction using Logistic Regression

from sklearn.datasets import load_diabetes
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, confusion_matrix
import numpy as np
import pandas as pd

# Step 1: Load dataset (Note: sklearn's diabetes dataset is regression, so let's simulate classification)
data = load_diabetes(as_frame=True)
df = data.frame

# Convert target into binary classification (1 if above median)
df["Outcome"] = np.where(df["target"] > df["target"].median(), 1, 0)
df.drop("target", axis=1, inplace=True)

# Step 2: Prepare data
X = df.drop("Outcome", axis=1)
y = df["Outcome"]

# Step 3: Split data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Step 4: Scale features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Step 5: Train model
model = LogisticRegression()
model.fit(X_train, y_train)

# Step 6: Predict & evaluate
y_pred = model.predict(X_test)
acc = accuracy_score(y_test, y_pred)

print(f"Accuracy: {acc:.2f}")
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))


Accuracy: 0.73
Confusion Matrix:
 [[36 13]
 [11 29]]
