In [1]:
from sklearn.datasets import load_diabetes
import pandas as pd

# Load diabetes dataset
data = load_diabetes(as_frame=True)
df = data.frame

# Show first 5 rows
df.head()


Unnamed: 0,age,sex,bmi,bp,s1,s2,s3,s4,s5,s6,target
0,0.038076,0.05068,0.061696,0.021872,-0.044223,-0.034821,-0.043401,-0.002592,0.019907,-0.017646,151.0
1,-0.001882,-0.044642,-0.051474,-0.026328,-0.008449,-0.019163,0.074412,-0.039493,-0.068332,-0.092204,75.0
2,0.085299,0.05068,0.044451,-0.00567,-0.045599,-0.034194,-0.032356,-0.002592,0.002861,-0.02593,141.0
3,-0.089063,-0.044642,-0.011595,-0.036656,0.012191,0.024991,-0.036038,0.034309,0.022688,-0.009362,206.0
4,0.005383,-0.044642,-0.036385,0.021872,0.003935,0.015596,0.008142,-0.002592,-0.031988,-0.046641,135.0


In [2]:
# Convert to binary classification
df['label'] = df['target'].apply(lambda x: 1 if x > 120 else 0)

# Drop original target column
df = df.drop(columns='target')

# Check updated dataset
df.head()


Unnamed: 0,age,sex,bmi,bp,s1,s2,s3,s4,s5,s6,label
0,0.038076,0.05068,0.061696,0.021872,-0.044223,-0.034821,-0.043401,-0.002592,0.019907,-0.017646,1
1,-0.001882,-0.044642,-0.051474,-0.026328,-0.008449,-0.019163,0.074412,-0.039493,-0.068332,-0.092204,0
2,0.085299,0.05068,0.044451,-0.00567,-0.045599,-0.034194,-0.032356,-0.002592,0.002861,-0.02593,1
3,-0.089063,-0.044642,-0.011595,-0.036656,0.012191,0.024991,-0.036038,0.034309,0.022688,-0.009362,1
4,0.005383,-0.044642,-0.036385,0.021872,0.003935,0.015596,0.008142,-0.002592,-0.031988,-0.046641,1


In [3]:
from sklearn.model_selection import train_test_split

X = df.drop('label', axis=1)
y = df['label']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [4]:
from sklearn.linear_model import LogisticRegression

model = LogisticRegression(max_iter=1000)
model.fit(X_train, y_train)


In [5]:
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

y_pred = model.predict(X_test)

print("Accuracy:", accuracy_score(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred))
print("\nConfusion Matrix:\n", confusion_matrix(y_test, y_pred))


Accuracy: 0.651685393258427

Classification Report:
               precision    recall  f1-score   support

           0       0.79      0.36      0.49        42
           1       0.61      0.91      0.74        47

    accuracy                           0.65        89
   macro avg       0.70      0.64      0.61        89
weighted avg       0.70      0.65      0.62        89


Confusion Matrix:
 [[15 27]
 [ 4 43]]


In [6]:
import numpy as np

# Example input (same number of features as X)
sample = np.array([0.03, 0.04, 0.02, 0.01, 0.00, -0.01, 0.02, 0.01, 0.03, 0.04]).reshape(1, -1)

prediction = model.predict(sample)
print("Diabetic" if prediction[0] == 1 else "Not Diabetic")


Diabetic


