In [3]:
import pandas as pd
import numpy as np
import joblib
from sklearn.model_selection import train_test_split, StratifiedKFold, cross_val_score
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from imblearn.over_sampling import SMOTE

# Load cleaned data
df = pd.read_csv("cleaned_diabetes_112.csv")

# Feature sets for each target
targets_features = {
    'prediabetic': ['Glucose', 'BloodPressure', 'Insulin'],
    'diabetes': ['Glucose', 'BMI', 'Insulin'],
    'prehypertension': ['Age', 'BloodPressure', 'Insulin'],
    'hypertension': ['BloodPressure', 'Age', 'Glucose']
}

# Models to train
models = {
    'Random_Forest': RandomForestClassifier(random_state=42),
    'Logistic_Regression': LogisticRegression(max_iter=1000),
    'SVM': SVC(probability=True)
}

saved_models = {}

for target, features in targets_features.items():
    print(f"\n=== Training for: {target} ===")
    X = df[features].dropna()
    y = df.loc[X.index, target]

    # Train-test split
    X_train, X_test, y_train, y_test = train_test_split(X, y, stratify=y, test_size=0.2, random_state=42)

    # Scale features
    scaler = StandardScaler()
    X_train = scaler.fit_transform(X_train)
    X_test = scaler.transform(X_test)

    # Balance data
    smote = SMOTE(random_state=42)
    X_train_bal, y_train_bal = smote.fit_resample(X_train, y_train)

    # Train models
    for name, model in models.items():
        model.fit(X_train_bal, y_train_bal)
        y_pred = model.predict(X_test)

        print(f"\nModel: {name}")
        print(confusion_matrix(y_test, y_pred))
        print(classification_report(y_test, y_pred))

        # Save model
        model_filename = f"model_{target}_{name}.pkl"
        joblib.dump(model, model_filename)
        saved_models[f"{target}_{name}"] = model_filename

print("\n✅ All models saved:")
for k, v in saved_models.items():
    print(f"{k}: {v}")



=== Training for: prediabetic ===

Model: Random_Forest
[[142   0]
 [  1  81]]
              precision    recall  f1-score   support

           0       0.99      1.00      1.00       142
           1       1.00      0.99      0.99        82

    accuracy                           1.00       224
   macro avg       1.00      0.99      1.00       224
weighted avg       1.00      1.00      1.00       224






Model: Logistic_Regression
[[125  17]
 [  7  75]]
              precision    recall  f1-score   support

           0       0.95      0.88      0.91       142
           1       0.82      0.91      0.86        82

    accuracy                           0.89       224
   macro avg       0.88      0.90      0.89       224
weighted avg       0.90      0.89      0.89       224


Model: SVM
[[136   6]
 [  0  82]]
              precision    recall  f1-score   support

           0       1.00      0.96      0.98       142
           1       0.93      1.00      0.96        82

    accuracy                           0.97       224
   macro avg       0.97      0.98      0.97       224
weighted avg       0.98      0.97      0.97       224


=== Training for: diabetes ===





Model: Random_Forest
[[138   0]
 [  0  86]]
              precision    recall  f1-score   support

           0       1.00      1.00      1.00       138
           1       1.00      1.00      1.00        86

    accuracy                           1.00       224
   macro avg       1.00      1.00      1.00       224
weighted avg       1.00      1.00      1.00       224


Model: Logistic_Regression
[[133   5]
 [  0  86]]
              precision    recall  f1-score   support

           0       1.00      0.96      0.98       138
           1       0.95      1.00      0.97        86

    accuracy                           0.98       224
   macro avg       0.97      0.98      0.98       224
weighted avg       0.98      0.98      0.98       224


Model: SVM
[[134   4]
 [  0  86]]
              precision    recall  f1-score   support

           0       1.00      0.97      0.99       138
           1       0.96      1.00      0.98        86

    accuracy                           0.98       2




Model: Random_Forest
[[180   0]
 [  0  44]]
              precision    recall  f1-score   support

           0       1.00      1.00      1.00       180
           1       1.00      1.00      1.00        44

    accuracy                           1.00       224
   macro avg       1.00      1.00      1.00       224
weighted avg       1.00      1.00      1.00       224


Model: Logistic_Regression
[[178   2]
 [  0  44]]
              precision    recall  f1-score   support

           0       1.00      0.99      0.99       180
           1       0.96      1.00      0.98        44

    accuracy                           0.99       224
   macro avg       0.98      0.99      0.99       224
weighted avg       0.99      0.99      0.99       224


Model: SVM
[[180   0]
 [  0  44]]
              precision    recall  f1-score   support

           0       1.00      1.00      1.00       180
           1       1.00      1.00      1.00        44

    accuracy                           1.00       2




Model: Random_Forest
[[184   0]
 [  0  40]]
              precision    recall  f1-score   support

           0       1.00      1.00      1.00       184
           1       1.00      1.00      1.00        40

    accuracy                           1.00       224
   macro avg       1.00      1.00      1.00       224
weighted avg       1.00      1.00      1.00       224


Model: Logistic_Regression
[[178   6]
 [  0  40]]
              precision    recall  f1-score   support

           0       1.00      0.97      0.98       184
           1       0.87      1.00      0.93        40

    accuracy                           0.97       224
   macro avg       0.93      0.98      0.96       224
weighted avg       0.98      0.97      0.97       224


Model: SVM
[[184   0]
 [  0  40]]
              precision    recall  f1-score   support

           0       1.00      1.00      1.00       184
           1       1.00      1.00      1.00        40

    accuracy                           1.00       2

In [5]:
# Predict based on user input
print("Enter Patient Info:")
name = input("Name: ")
age = int(input("Age: "))
bmi = int(input("BMI: "))
bp = int(input("BloodPressure: "))
glucose = int(input("Glucose: "))
insulin = int(input("Insulin: "))

# Data dictionary
input_data = {
    'Age': age,
    'BMI': bmi,
    'BloodPressure': bp,
    'Glucose': glucose,
    'Insulin': insulin
}

# Feature map
targets_features = {
    'prediabetic': ['Glucose', 'BloodPressure', 'Insulin'],
    'diabetes': ['Glucose', 'BMI', 'Insulin'],
    'prehypertension': ['Age', 'BloodPressure', 'Insulin'],
    'hypertension': ['BloodPressure', 'Age', 'Glucose']
}

print(f"\n📊 Predictions for {name}:")
for target, features in targets_features.items():
    model_path = f"model_{target}_Random_Forest.pkl"  # You can swap with other models
    model = joblib.load(model_path)
    
    sample = [input_data[feat] for feat in features]
    result = model.predict([sample])[0]
    print(f"{target.capitalize()}: {result}")


Enter Patient Info:


Name:  aditya
Age:  20
BMI:  21
BloodPressure:  110
Glucose:  90
Insulin:  10



📊 Predictions for aditya:
Prediabetic: 0
Diabetes: 1
Prehypertension: 0
Hypertension: 1


In [7]:
import pandas as pd
import numpy as np
import joblib
from sklearn.model_selection import train_test_split, StratifiedKFold, cross_val_score
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from imblearn.over_sampling import SMOTE

# === Step 1: Load cleaned dataset ===
df = pd.read_csv("cleaned_diabetes_112.csv")  # Update path if needed

# === Step 2: Define target-feature mapping ===
targets_features = {
    'prediabetic': ['Glucose', 'BloodPressure', 'Insulin'],
    'diabetes': ['Glucose', 'BMI', 'Insulin'],
    'prehypertension': ['Age', 'BloodPressure', 'Insulin'],
    'hypertension': ['BloodPressure', 'Age', 'Glucose']
}

# === Step 3: Define models ===
models = {
    'Random_Forest': RandomForestClassifier(random_state=42),
    'Logistic_Regression': LogisticRegression(max_iter=1000),
    'SVM': SVC(probability=True)
}

saved_models = {}

# === Step 4: Train models and save ===
for target, features in targets_features.items():
    print(f"\n=== Training for: {target} ===")
    X = df[features].dropna()
    y = df.loc[X.index, target]

    X_train, X_test, y_train, y_test = train_test_split(X, y, stratify=y, test_size=0.2, random_state=42)

    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)

    smote = SMOTE(random_state=42)
    X_train_smote, y_train_smote = smote.fit_resample(X_train_scaled, y_train)

    for name, model in models.items():
        model.fit(X_train_smote, y_train_smote)
        y_pred = model.predict(X_test_scaled)

        print(f"\n--- {name} for {target} ---")
        print(confusion_matrix(y_test, y_pred))
        print(classification_report(y_test, y_pred))

        model_filename = f"model_{target}_{name}.pkl"
        joblib.dump(model, model_filename)
        saved_models[f"{target}_{name}"] = model_filename

print("\n✅ All models trained and saved:")
for k, v in saved_models.items():
    print(f"{k}: {v}")

# === Step 5: User Input Prediction ===
print("\n🧍 Enter Patient Details")
name = input("Name: ")
age = int(input("Age: "))
bmi = int(input("BMI: "))
bp = int(input("BloodPressure: "))
glucose = int(input("Glucose: "))
insulin = int(input("Insulin: "))

input_data = {
    'Age': age,
    'BMI': bmi,
    'BloodPressure': bp,
    'Glucose': glucose,
    'Insulin': insulin
}

print(f"\n📊 Predictions for {name}:\n")

for target, features in targets_features.items():
    for model_name in models.keys():
        model_file = f"model_{target}_{model_name}.pkl"
        try:
            model = joblib.load(model_file)
            sample = np.array([input_data[feat] for feat in features]).reshape(1, -1)
            prediction = model.predict(sample)[0]
            print(f"{target.capitalize()} ({model_name}): {prediction}")
        except Exception as e:
            print(f"[ERROR] Could not predict {target} with {model_name}: {e}")



=== Training for: prediabetic ===





--- Random_Forest for prediabetic ---
[[142   0]
 [  1  81]]
              precision    recall  f1-score   support

           0       0.99      1.00      1.00       142
           1       1.00      0.99      0.99        82

    accuracy                           1.00       224
   macro avg       1.00      0.99      1.00       224
weighted avg       1.00      1.00      1.00       224


--- Logistic_Regression for prediabetic ---
[[125  17]
 [  7  75]]
              precision    recall  f1-score   support

           0       0.95      0.88      0.91       142
           1       0.82      0.91      0.86        82

    accuracy                           0.89       224
   macro avg       0.88      0.90      0.89       224
weighted avg       0.90      0.89      0.89       224


--- SVM for prediabetic ---
[[136   6]
 [  0  82]]
              precision    recall  f1-score   support

           0       1.00      0.96      0.98       142
           1       0.93      1.00      0.96        82






--- Random_Forest for diabetes ---
[[138   0]
 [  0  86]]
              precision    recall  f1-score   support

           0       1.00      1.00      1.00       138
           1       1.00      1.00      1.00        86

    accuracy                           1.00       224
   macro avg       1.00      1.00      1.00       224
weighted avg       1.00      1.00      1.00       224


--- Logistic_Regression for diabetes ---
[[133   5]
 [  0  86]]
              precision    recall  f1-score   support

           0       1.00      0.96      0.98       138
           1       0.95      1.00      0.97        86

    accuracy                           0.98       224
   macro avg       0.97      0.98      0.98       224
weighted avg       0.98      0.98      0.98       224


--- SVM for diabetes ---
[[134   4]
 [  0  86]]
              precision    recall  f1-score   support

           0       1.00      0.97      0.99       138
           1       0.96      1.00      0.98        86

    accur




--- Random_Forest for prehypertension ---
[[180   0]
 [  0  44]]
              precision    recall  f1-score   support

           0       1.00      1.00      1.00       180
           1       1.00      1.00      1.00        44

    accuracy                           1.00       224
   macro avg       1.00      1.00      1.00       224
weighted avg       1.00      1.00      1.00       224


--- Logistic_Regression for prehypertension ---
[[178   2]
 [  0  44]]
              precision    recall  f1-score   support

           0       1.00      0.99      0.99       180
           1       0.96      1.00      0.98        44

    accuracy                           0.99       224
   macro avg       0.98      0.99      0.99       224
weighted avg       0.99      0.99      0.99       224


--- SVM for prehypertension ---
[[180   0]
 [  0  44]]
              precision    recall  f1-score   support

           0       1.00      1.00      1.00       180
           1       1.00      1.00      1.00




--- Random_Forest for hypertension ---
[[184   0]
 [  0  40]]
              precision    recall  f1-score   support

           0       1.00      1.00      1.00       184
           1       1.00      1.00      1.00        40

    accuracy                           1.00       224
   macro avg       1.00      1.00      1.00       224
weighted avg       1.00      1.00      1.00       224


--- Logistic_Regression for hypertension ---
[[178   6]
 [  0  40]]
              precision    recall  f1-score   support

           0       1.00      0.97      0.98       184
           1       0.87      1.00      0.93        40

    accuracy                           0.97       224
   macro avg       0.93      0.98      0.96       224
weighted avg       0.98      0.97      0.97       224


--- SVM for hypertension ---
[[184   0]
 [  0  40]]
              precision    recall  f1-score   support

           0       1.00      1.00      1.00       184
           1       1.00      1.00      1.00        4

Name:  aditya
Age:  20
BMI:  21
BloodPressure:  110
Glucose:  85
Insulin:  3



📊 Predictions for aditya:

Prediabetic (Random_Forest): 0
Prediabetic (Logistic_Regression): 0
Prediabetic (SVM): 0
Diabetes (Random_Forest): 1
Diabetes (Logistic_Regression): 1
Diabetes (SVM): 1
Prehypertension (Random_Forest): 0
Prehypertension (Logistic_Regression): 1
Prehypertension (SVM): 0
Hypertension (Random_Forest): 1
Hypertension (Logistic_Regression): 1
Hypertension (SVM): 1


In [11]:
import pandas as pd
import numpy as np
import joblib
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC

# === Step 1: Load cleaned dataset ===
df = pd.read_csv("cleaned_diabetes_112.csv")  # Ensure this is your updated dataset

# === Step 2: Define target-feature mapping ===
targets_features = {
    'prediabetic': ['Glucose', 'BloodPressure', 'Insulin'],
    'diabetes': ['Glucose', 'BMI', 'Insulin'],
    'prehypertension': ['Age', 'BloodPressure', 'Insulin'],
    'hypertension': ['BloodPressure', 'Age', 'Glucose']
}

# === Step 3: Define models ===
models = {
    'Random_Forest': RandomForestClassifier(random_state=42),
    'Logistic_Regression': LogisticRegression(max_iter=1000),
    'SVM': SVC(probability=True)
}

saved_models = {}

# === Step 4: Train models and save ===
for target, features in targets_features.items():
    print(f"\n=== Training for: {target} ===")
    X = df[features].dropna()
    y = df.loc[X.index, target]

    X_train, X_test, y_train, y_test = train_test_split(X, y, stratify=y, test_size=0.2, random_state=42)

    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)

    for name, model in models.items():
        model.fit(X_train_scaled, y_train)
        y_pred = model.predict(X_test_scaled)

        print(f"\n--- {name} for {target} ---")
        print(confusion_matrix(y_test, y_pred))
        print(classification_report(y_test, y_pred))

        model_filename = f"model_{target}_{name}.pkl"
        joblib.dump(model, model_filename)
        saved_models[f"{target}_{name}"] = model_filename

print("\n✅ All models trained and saved:")
for k, v in saved_models.items():
    print(f"{k}: {v}")

# === Step 5: User Input Prediction ===
print("\n🧍 Enter Patient Details")
name = input("Name: ")
age = int(input("Age: "))
bmi = int(input("BMI: "))
bp = int(input("BloodPressure: "))
glucose = int(input("Glucose: "))
insulin = int(input("Insulin: "))

input_data = {
    'Age': age,
    'BMI': bmi,
    'BloodPressure': bp,
    'Glucose': glucose,
    'Insulin': insulin
}

print(f"\n📊 Predictions for {name}:\n")

for target, features in targets_features.items():
    for model_name in models.keys():
        model_file = f"model_{target}_{model_name}.pkl"
        try:
            model = joblib.load(model_file)
            sample = np.array([input_data[feat] for feat in features]).reshape(1, -1)
            prediction = model.predict(sample)[0]
            print(f"{target.capitalize()} ({model_name}): {prediction}")
        except Exception as e:
            print(f"[ERROR] Could not predict {target} with {model_name}: {e}")
if 80 < glucose < 125 and insulin > 25:
    prediabetic = 1
else:
    prediabetic = 0



=== Training for: prediabetic ===

--- Random_Forest for prediabetic ---
[[142   0]
 [  0  82]]
              precision    recall  f1-score   support

           0       1.00      1.00      1.00       142
           1       1.00      1.00      1.00        82

    accuracy                           1.00       224
   macro avg       1.00      1.00      1.00       224
weighted avg       1.00      1.00      1.00       224


--- Logistic_Regression for prediabetic ---
[[131  11]
 [ 15  67]]
              precision    recall  f1-score   support

           0       0.90      0.92      0.91       142
           1       0.86      0.82      0.84        82

    accuracy                           0.88       224
   macro avg       0.88      0.87      0.87       224
weighted avg       0.88      0.88      0.88       224


--- SVM for prediabetic ---
[[138   4]
 [  1  81]]
              precision    recall  f1-score   support

           0       0.99      0.97      0.98       142
           1       0

Name:  aditya
Age:  21
BMI:  21
BloodPressure:  100
Glucose:  90
Insulin:  10



📊 Predictions for aditya:

Prediabetic (Random_Forest): 0
Prediabetic (Logistic_Regression): 0
Prediabetic (SVM): 0
Diabetes (Random_Forest): 1
Diabetes (Logistic_Regression): 1
Diabetes (SVM): 1
Prehypertension (Random_Forest): 0
Prehypertension (Logistic_Regression): 1
Prehypertension (SVM): 0
Hypertension (Random_Forest): 1
Hypertension (Logistic_Regression): 1
Hypertension (SVM): 0
