In [8]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.ensemble import RandomForestClassifier

# Load and preprocess dataset
df = pd.read_csv(r"C:\Users\Dhrumil\Desktop\diabetes-classifier\Backend\Patient_Data_Updated.csv")
df.fillna(method='ffill', inplace=True)

# Initialize encoders
le_type = LabelEncoder()
le_complication = LabelEncoder()
le_heart = LabelEncoder()
le_kidney = LabelEncoder()
le_nerve = LabelEncoder()
le_eye = LabelEncoder()

# Encode labels
df['Diabetes_Type_Label'] = le_type.fit_transform(df['Diabetes_Type_Label'])
df['Heart_Disease_'] = le_heart.fit_transform(df['Heart_Disease_Risk'])
df['Kidney_Issues'] = le_kidney.fit_transform(df['Kidney_Issues'])
df['Nerve_Damage'] = le_nerve.fit_transform(df['Nerve_Damage'])
df['Eye_Problems'] = le_eye.fit_transform(df['Eye_Problems'])
df['Diabetes_Complications'] = le_complication.fit_transform(df['Diabetes_Complications'])

df = df.drop(columns=["Patient_ID"])



In [9]:
# Train Diabetes Type model
features_type = ['Age', 'BMI', 'Fasting_Glucose', 'HbA1c', 'C_Peptide', 'Insulin_Level', 'Autoantibody_Presence']
X_type = df[features_type]
y_type = df['Diabetes_Type_Label']
X_train, X_test, y_train, y_test = train_test_split(X_type, y_type, test_size=0.2, random_state=42)

type_model = RandomForestClassifier(n_estimators=100, random_state=42)
type_model.fit(X_train, y_train)
print("Type model trained. Accuracy: {:.2f}%".format(type_model.score(X_test, y_test) * 100))


Type model trained. Accuracy: 92.86%


In [10]:
# Train disease-specific models
risk_models = {}
for type_val, type_name in zip([0, 1], le_type.inverse_transform([0, 1])):
    df_filtered = df[df['Diabetes_Type_Label'] == type_val]
    X_risk = df_filtered[features_type]

In [11]:
# Train complication model
y_comp = df_filtered['Diabetes_Complications']
comp_model = RandomForestClassifier(n_estimators=100, random_state=42)
comp_model.fit(X_risk, y_comp)

models = {"complication": comp_model}


In [12]:
for col, encoder in zip(['Heart_Disease_', 'Kidney_Issues', 'Nerve_Damage', 'Eye_Problems'],
                            [le_heart, le_kidney, le_nerve, le_eye]):
        y = df_filtered[col]
        model = RandomForestClassifier(n_estimators=100, random_state=42)
        model.fit(X_risk, y)
        models[col] = model

risk_models[type_name] = models

In [14]:
# Prediction function
def make_prediction(sample_input):
    pred_type = type_model.predict(sample_input)[0]
    type_label = le_type.inverse_transform([pred_type])[0]

    # Predict complication stage
    comp_model = risk_models[type_label]['complication']
    comp_probs = comp_model.predict_proba(sample_input)[0]
    comp_classes = le_complication.inverse_transform(range(len(comp_probs)))
    comp_index = np.argmax(comp_probs)
    complication_stage = comp_classes[comp_index]

    # Analyze disease risks
    concerns = []
    probs_sum = 0
    count = 0

    for label, model in risk_models[type_label].items():
        if label == 'complication':
            continue

        probs = model.predict_proba(sample_input)[0]
        encoder = {
            'Heart_Disease_': le_heart,
            'Kidney_Issues': le_kidney,
            'Nerve_Damage': le_nerve,
            'Eye_Problems': le_eye
        }[label]

        pred_idx = np.argmax(probs)
        pred_label = encoder.inverse_transform([pred_idx])[0]
        confidence = probs[pred_idx]

        # Set concern level even if label is "None"
        if confidence >= 0.75:
            concern_level = "High"
        elif confidence >= 0.5:
            concern_level = "Moderate"
        else:
            concern_level = "Low"

        # Format output
        if label == "Eye_Problems":
            concern_str = f"Eye Problems : {pred_label}"
        elif label == "Heart_Disease_":
            concern_str = f"Heart Disease : {pred_label}"
        elif label == "Kidney_Issues":
            concern_str = f"Kidney Issues : {pred_label}"
        elif label == "Nerve_Damage":
            concern_str = f"Nerve Damage : {pred_label}"
        else:
            concern_str = f"{label.replace('_', ' ')} : {pred_label}"

        concerns.append(concern_str)

        if pred_label != "None":
            probs_sum += confidence
            count += 1

    overall_damage = (probs_sum / count) * 100 if count else 0.0

    # Output format
    print(f"\nDiabetes_Type : {type_label}")
    for concern in concerns:
        print(concern)
    print(f"Diabetes_Complications: {complication_stage}")
    print(f"Overall Damage Probability: {overall_damage:.1f}%\n")



In [16]:
# Sample test input
custom_input = pd.DataFrame([{
    "Age": 61,
    "BMI": 23.7,
    "Fasting_Glucose": 231,
    "HbA1c": 4.9,
    "C_Peptide": 2.2,
    "Insulin_Level": 4.5,
    "Autoantibody_Presence": 1
}])

make_prediction(custom_input)


Diabetes_Type : Type 2 Diabetes
Heart Disease : Low
Kidney Issues : Mild
Nerve Damage : Mild
Eye Problems : Retinopathy
Diabetes_Complications: Controlled
Overall Damage Probability: 73.8%



#Save The Model :

In [18]:
import joblib
import os

# Create directory to save models
save_dir = "saved_models"
os.makedirs(save_dir, exist_ok=True)

# Save the main diabetes type classifier
joblib.dump(type_model, os.path.join(save_dir, "type_model.pkl"))

# Save risk prediction models for each diabetes type dynamically
for type_val in df['Diabetes_Type_Label'].unique():
    type_name = le_type.inverse_transform([type_val])[0]
    df_filtered = df[df['Diabetes_Type_Label'] == type_val]

    if df_filtered.shape[0] < 10:
        print(f"⚠️ Skipping {type_name} — not enough data to train reliable models.")
        continue

    X_risk = df_filtered[features_type]

    # Train and save complication model
    comp_model = RandomForestClassifier(n_estimators=100, random_state=42)
    comp_model.fit(X_risk, df_filtered['Diabetes_Complications'])
    joblib.dump(comp_model, os.path.join(save_dir, f"{type_name}_complication_model.pkl"))

    # Train and save organ-specific models
    for col, encoder, label in zip(
        ['Heart_Disease_', 'Kidney_Issues', 'Nerve_Damage', 'Eye_Problems'],
        [le_heart, le_kidney, le_nerve, le_eye],
        ['Heart_Disease', 'Kidney_Issues', 'Nerve_Damage', 'Eye_Problems']
    ):
        model = RandomForestClassifier(n_estimators=100, random_state=42)
        model.fit(X_risk, df_filtered[col])
        joblib.dump(model, os.path.join(save_dir, f"{type_name}_{label}_model.pkl"))

# Save all label encoders
joblib.dump(le_type, os.path.join(save_dir, "le_type.pkl"))
joblib.dump(le_complication, os.path.join(save_dir, "le_complication.pkl"))
joblib.dump(le_heart, os.path.join(save_dir, "le_heart.pkl"))
joblib.dump(le_kidney, os.path.join(save_dir, "le_kidney.pkl"))
joblib.dump(le_nerve, os.path.join(save_dir, "le_nerve.pkl"))
joblib.dump(le_eye, os.path.join(save_dir, "le_eye.pkl"))

print("✅ All models and encoders saved successfully including Type 1 (if data exists).")


✅ All models and encoders saved successfully including Type 1 (if data exists).
