In [1]:
import numpy as np
from sklearn.tree import DecisionTreeClassifier

# Step 1: Define the dataset (BloodPressure, SkinThickness, etc.)
data = {
    'BloodPressure': [72, 66, 64, 66, 40, 74, 50, 0, 70, 96, 92, 74, 80, 60, 72, 0, 84, 74, 30],
    'SkinThickness': [35, 29, 0, 23, 35, 0, 32, 0, 45, 0, 0, 0, 0, 23, 19, 0, 47, 0, 38],
    'Insulin': [0, 0, 0, 94, 168, 0, 88, 0, 543, 0, 0, 0, 0, 846, 175, 0, 230, 0, 83],
    'BMI': [33.6, 26.6, 23.3, 28.1, 43.1, 25.6, 31.0, 35.3, 30.5, 0, 37.6, 38.0, 27.1, 30.1, 25.8, 30.0, 45.8, 29.6, 43.3],
    'DiabetesPedigreeFunction': [0.627, 0.351, 0.672, 0.167, 2.288, 0.201, 0.248, 0.134, 0.158, 0.232, 0.191, 0.537, 1.441, 0.398, 0.587, 0.484, 0.551, 0.254, 0.183],
    'Age': [50, 31, 32, 21, 33, 30, 26, 29, 53, 54, 30, 34, 57, 59, 51, 32, 31, 31, 33],
    'Outcome': [1, 0, 1, 0, 1, 0, 1, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0]
}

# Step 2: Convert data into numpy arrays
X = np.column_stack((
    data['BloodPressure'],
    data['SkinThickness'],
    data['Insulin'],
    data['BMI'],
    data['DiabetesPedigreeFunction'],
    data['Age']
))  # Features matrix

y = np.array(data['Outcome'])  # Target vector

# Step 3: Train the Decision Tree classifier
clf = DecisionTreeClassifier(random_state=42)
clf.fit(X, y)

# Step 4: Example of a new entry (user input) for prediction
# Let's say the new entry provided by the user is as follows:
new_entry = np.array([[75, 22, 150, 30.5, 0.428, 45]])  # Example input: [BloodPressure, SkinThickness, Insulin, BMI, DiabetesPedigreeFunction, Age]

# Step 5: Make the prediction for the new entry
predicted_outcome = clf.predict(new_entry)[0]

# Step 6: Output the prediction
if predicted_outcome == 1:
    prediction = "Diabetic"
else:
    prediction = "Not Diabetic"

print(f"The predicted outcome for the new entry is: {prediction}")


The predicted outcome for the new entry is: Diabetic


In [2]:
import numpy as np
from sklearn.tree import DecisionTreeClassifier

# Step 1: Define the initial dataset (BloodPressure, SkinThickness, etc.)
data = {
    'BloodPressure': [72, 66, 64, 66, 40, 74, 50, 0, 70, 96, 92, 74, 80, 60, 72, 0, 84, 74, 30],
    'SkinThickness': [35, 29, 0, 23, 35, 0, 32, 0, 45, 0, 0, 0, 0, 23, 19, 0, 47, 0, 38],
    'Insulin': [0, 0, 0, 94, 168, 0, 88, 0, 543, 0, 0, 0, 0, 846, 175, 0, 230, 0, 83],
    'BMI': [33.6, 26.6, 23.3, 28.1, 43.1, 25.6, 31.0, 35.3, 30.5, 0, 37.6, 38.0, 27.1, 30.1, 25.8, 30.0, 45.8, 29.6, 43.3],
    'DiabetesPedigreeFunction': [0.627, 0.351, 0.672, 0.167, 2.288, 0.201, 0.248, 0.134, 0.158, 0.232, 0.191, 0.537, 1.441, 0.398, 0.587, 0.484, 0.551, 0.254, 0.183],
    'Age': [50, 31, 32, 21, 33, 30, 26, 29, 53, 54, 30, 34, 57, 59, 51, 32, 31, 31, 33],
    'Outcome': [1, 0, 1, 0, 1, 0, 1, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0]
}

# Step 2: Convert data into numpy arrays (features and target)
X = np.column_stack((
    data['BloodPressure'],
    data['SkinThickness'],
    data['Insulin'],
    data['BMI'],
    data['DiabetesPedigreeFunction'],
    data['Age']
))  # Features matrix

y = np.array(data['Outcome'])  # Target vector

# Step 3: Train the Decision Tree classifier
clf = DecisionTreeClassifier(random_state=42)
clf.fit(X, y)

# Step 4: Allow user to input a new entry (user entry for prediction and updating)
# Example: user provides a new entry with values [BloodPressure, SkinThickness, Insulin, BMI, DiabetesPedigreeFunction, Age]
new_entry = np.array([[75, 22, 150, 30.5, 0.428, 45]])  # Example new entry

# Step 5: Make the prediction for the new entry using the trained model
predicted_outcome = clf.predict(new_entry)[0]

# Step 6: Output the prediction for the new entry
if predicted_outcome == 1:
    prediction = "Diabetic"
else:
    prediction = "Not Diabetic"

print(f"The predicted outcome for the new entry is: {prediction}")

# Step 7: Update the dataset with the new entry and retrain the model
# Append the new entry to the existing dataset
X_updated = np.vstack([X, new_entry])  # Add the new entry to the feature matrix
y_updated = np.append(y, predicted_outcome)  # Append the predicted outcome to the target vector

# Retrain the Decision Tree with the updated dataset
clf.fit(X_updated, y_updated)

# Step 8: Verify that the model has been retrained by predicting again
retrained_prediction = clf.predict(new_entry)[0]
if retrained_prediction == 1:
    retrained_prediction_text = "Diabetic"
else:
    retrained_prediction_text = "Not Diabetic"

print(f"After retraining, the prediction for the new entry is: {retrained_prediction_text}")


The predicted outcome for the new entry is: Diabetic
After retraining, the prediction for the new entry is: Diabetic


In [3]:
import numpy as np
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split

# Step 1: Define the dataset using the given dictionary
data = {
    'BloodPressure': [72, 66, 64, 66, 40, 74, 50, 0, 70, 96, 92, 74, 80, 60, 72, 0, 84, 74, 30],
    'SkinThickness': [35, 29, 0, 23, 35, 0, 32, 0, 45, 0, 0, 0, 0, 23, 19, 0, 47, 0, 38],
    'Insulin': [0, 0, 0, 94, 168, 0, 88, 0, 543, 0, 0, 0, 0, 846, 175, 0, 230, 0, 83],
    'BMI': [33.6, 26.6, 23.3, 28.1, 43.1, 25.6, 31.0, 35.3, 30.5, 0, 37.6, 38.0, 27.1, 30.1, 25.8, 30.0, 45.8, 29.6, 43.3],
    'DiabetesPedigreeFunction': [0.627, 0.351, 0.672, 0.167, 2.288, 0.201, 0.248, 0.134, 0.158, 0.232, 0.191, 0.537, 1.441, 0.398, 0.587, 0.484, 0.551, 0.254, 0.183],
    'Age': [50, 31, 32, 21, 33, 30, 26, 29, 53, 54, 30, 34, 57, 59, 51, 32, 31, 31, 33],
    'Outcome': [1, 0, 1, 0, 1, 0, 1, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0]
}

# Step 2: Convert data into numpy arrays for features and target
X = np.column_stack((
    data['BloodPressure'],
    data['SkinThickness'],
    data['Insulin'],
    data['BMI'],
    data['DiabetesPedigreeFunction'],
    data['Age']
))  # Features matrix (shape: n_samples x n_features)

y = np.array(data['Outcome'])  # Target vector (shape: n_samples,)

# Step 3: Split the data into training and testing sets (80% train, 20% test)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Step 4: Train the Decision Tree classifier
clf = DecisionTreeClassifier(random_state=42)
clf.fit(X_train, y_train)

# Step 5: Make predictions on the test set
y_pred = clf.predict(X_test)

# Step 6: Output the predicted outcomes for the test set
print("Predictions on the test set:", y_pred)

# Step 7: Allow user to input a new entry for prediction
# Example of user input: [BloodPressure, SkinThickness, Insulin, BMI, DiabetesPedigreeFunction, Age]
new_entry = np.array([[75, 22, 150, 30.5, 0.428, 45]])  # Replace with user input

# Predict the outcome for the new entry
new_entry_pred = clf.predict(new_entry)[0]

# Output the prediction for the new entry
if new_entry_pred == 1:
    print("The new entry is predicted to be Diabetic.")
else:
    print("The new entry is predicted to be Not Diabetic.")

# Step 8: (Optional) Show model accuracy on the test set
accuracy = clf.score(X_test, y_test)
print(f"Accuracy on the test set: {accuracy * 100:.2f}%")


Predictions on the test set: [1 0 1 1]
The new entry is predicted to be Diabetic.
Accuracy on the test set: 75.00%
