In [7]:
import joblib
import pandas as pd
import numpy as np

# Load model, encoder, selected symptoms
rf_model = joblib.load('final_rf_model_top30.pkl')
label_encoder = joblib.load('label_encoder.pkl')
selected_symptoms = pd.read_csv('selected_symptoms.csv')['0'].tolist()

# Load training dataset for verification
training_df = pd.read_csv('/mnt/k/ml/clg_ml/symptom_based_diseaese_detection/Training.csv')

def predict_disease(user_symptoms):
    """
    Predict disease based on user symptoms and provide verification info.
    """
    # Binary input vector
    input_vector = [1 if symptom in user_symptoms else 0 for symptom in selected_symptoms]
    input_df = pd.DataFrame([input_vector], columns=selected_symptoms)

    # Predict primary disease
    pred_encoded = rf_model.predict(input_df)[0]
    predicted_disease = label_encoder.inverse_transform([pred_encoded])[0]

    # Show top-3 predicted diseases with probabilities
    probs = rf_model.predict_proba(input_df)[0]
    top3_indices = probs.argsort()[::-1][:3]
    top3_diseases = label_encoder.inverse_transform(top3_indices)
    top3_probs = probs[top3_indices]

    # Search training data for similar historical cases
    matching_cases = find_matching_cases(user_symptoms, threshold=3)

    return {
        'predicted_disease': predicted_disease,
        'top_3': list(zip(top3_diseases, top3_probs)),
        'matches': matching_cases
    }

def find_matching_cases(user_symptoms, threshold=3, max_results=5):
    """
    Find historical cases with at least 'threshold' symptoms in common.
    """
    matches = []
    features = training_df.drop('prognosis', axis=1)
    for i, row in features.iterrows():
        row_symptoms = set(symptom for symptom in selected_symptoms if row[symptom] == 1)
        common_symptoms = set(user_symptoms).intersection(row_symptoms)
        if len(common_symptoms) >= threshold:
            disease = training_df.loc[i, 'prognosis']
            matches.append({
                'index': i,
                'disease': disease,
                'common_symptoms': list(common_symptoms),
                'match_count': len(common_symptoms)
            })
            if len(matches) >= max_results:
                break
    return matches


# For standalone testing
if __name__ == "__main__":
    # Example user symptoms
    user_input = [
    "fatigue",
    "weight_loss",
    "mild_fever",
    "chest_pain",
    "loss_of_appetite",
    "rusty_sputum",
    "joint_pain",
    "headache",
    "sweating"
    ]


    result = predict_disease(user_input)

    print(f"\n🎯 Predicted Disease: {result['predicted_disease']}\n")

    print("🔍 Top 3 Predictions with Confidence:")
    for disease, prob in result['top_3']:
        print(f"  - {disease}: {prob:.2f}")

    top_disease, top_prob = result['top_3'][0]
    if top_prob < 0.4:
        print("\n⚠️ Low confidence in prediction.")
        print("🔎 Checking historical cases for alternative suggestions...")

        # Collect alternative disease suggestions from matching cases
        alt_diseases = list(set(match['disease'] for match in result['matches']))
        if alt_diseases:
            if top_disease not in alt_diseases:
                print(f"📌 Based on similar past cases, a more likely disease might be: {', '.join(alt_diseases)}")
            else:
                print(f"📌 Historical evidence supports: {top_disease}")
        else:
            print("📌 No similar past cases found to suggest alternatives.")

    print("\n📚 Matching Historical Cases:")
    if result['matches']:
        for match in result['matches']:
            print(f"  - Index {match['index']}: Disease = {match['disease']}, "
                  f"Common Symptoms = {match['common_symptoms']} ({match['match_count']} matched)")
    else:
        print("  - No close matches found.")


🎯 Predicted Disease: Pneumonia

🔍 Top 3 Predictions with Confidence:
  - Pneumonia: 0.32
  - Tuberculosis: 0.31
  - Hepatitis D: 0.07

⚠️ Low confidence in prediction.
🔎 Checking historical cases for alternative suggestions...
📌 Based on similar past cases, a more likely disease might be: Chicken pox

📚 Matching Historical Cases:
  - Index 160: Disease = Chicken pox, Common Symptoms = ['mild_fever', 'headache', 'fatigue', 'loss_of_appetite'] (4 matched)
  - Index 161: Disease = Chicken pox, Common Symptoms = ['mild_fever', 'headache', 'fatigue', 'loss_of_appetite'] (4 matched)
  - Index 162: Disease = Chicken pox, Common Symptoms = ['mild_fever', 'headache', 'fatigue', 'loss_of_appetite'] (4 matched)
  - Index 163: Disease = Chicken pox, Common Symptoms = ['mild_fever', 'headache', 'loss_of_appetite'] (3 matched)
  - Index 164: Disease = Chicken pox, Common Symptoms = ['mild_fever', 'headache', 'fatigue', 'loss_of_appetite'] (4 matched)


In [2]:
len(selected_symptoms)

30