In [1]:
# libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report


In [3]:
# random dataset
np.random.seed(42)

data = {
    'BP': np.random.randint(100, 180, 100),
    'Cholestrol': np.random.randint(150, 280, 100),
    'Glucose': np.random.randint(70, 200, 100),
    'Weight': np.random.randint(50, 100, 100)
}

In [22]:
# Define target: Diabetes risk (Yes/No)
# Simple rule-based labeling for demonstration
# (High BP, High Cholesterol, High Glucose => likely Yes)
risk = []
for i in range(100):
    if (data['BP'][i] > 100 and data['Cholestrol'][i] > 220) or data['Glucose'][i] > 140:
        risk.append('yes')
    else:
        risk.append('no')
        
data['Diabetes_Risk'] = risk

In [28]:
df = pd.DataFrame(data)
df.describe()

Unnamed: 0,BP,Cholestrol,Glucose,Weight
count,100.0,100.0,100.0,100.0
mean,138.11,220.95,136.42,74.55
std,24.295693,39.534252,38.037244,14.521579
min,100.0,150.0,73.0,50.0
25%,114.0,187.75,103.5,62.75
50%,140.0,220.5,128.0,75.0
75%,159.0,253.0,169.25,86.0
max,179.0,279.0,199.0,99.0


In [29]:
df.columns

Index(['BP', 'Cholestrol', 'Glucose', 'Weight', 'Diabetes_Risk'], dtype='object')

In [30]:
X = df[['BP', 'Cholestrol', 'Glucose', 'Weight']]
y = df['Diabetes_Risk']

In [46]:
# splitting
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

In [47]:
# classifier
clf = DecisionTreeClassifier(criterion='entropy', max_depth=4, random_state=42)
clf.fit(X_train, y_train)

In [48]:
y_pred = clf.predict(X_test)

In [49]:
# confusion matrix
confusion = confusion_matrix(y_test, y_pred)
print(confusion)

[[ 8  0]
 [ 0 22]]


In [50]:
# Classification report
report = classification_report(y_test, y_pred)
print(report)

              precision    recall  f1-score   support

          no       1.00      1.00      1.00         8
         yes       1.00      1.00      1.00        22

    accuracy                           1.00        30
   macro avg       1.00      1.00      1.00        30
weighted avg       1.00      1.00      1.00        30

