In [8]:
# full_symptom_triage_api_with_risk.py

from flask import Flask, request, jsonify
import pandas as pd
import re
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
import joblib
import os

# ------------------------------
# Load dataset (adjusted for provided columns)
# ------------------------------
df = pd.read_csv("Diseases_Symptoms.csv")  # Make sure this file exists and has columns: Code, Name, Symptoms, Treatments

# ------------------------------
# CLEANING & RISK LABELING
# ------------------------------
def clean_text(text):
    text = str(text).lower()
    text = re.sub(r'[^a-zA-Z0-9, ]', '', text)
    text = re.sub(r'\s+', ' ', text).strip()
    return text

df = df.dropna(subset=['Symptoms', 'Treatments'])
df['Symptoms'] = df['Symptoms'].apply(clean_text)
df['Treatments'] = df['Treatments'].apply(clean_text)

# Add synthetic Risk column using keyword heuristics
def classify_risk(symptoms):
    high_keywords = ['chest pain', 'seizure', 'stroke', 'unconscious', 'shortness of breath', 'palpitation', 'bleeding', 'confusion']
    for kw in high_keywords:
        if kw in symptoms:
            return 'HIGH'
    return 'LOW'

df['Risk'] = df['Symptoms'].apply(classify_risk)
df['Risk_Label'] = df['Risk'].map({'LOW': 0, 'HIGH': 1})

# ------------------------------
# MODEL TRAINING
# ------------------------------
X = df['Symptoms']
y = df['Risk_Label']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

pipeline = Pipeline([
    ('tfidf', TfidfVectorizer()),
    ('clf', RandomForestClassifier(n_estimators=100, random_state=42))
])

pipeline.fit(X_train, y_train)
joblib.dump(pipeline, "triage_model.pkl")

# ------------------------------
# FLASK API
# ------------------------------
app = Flask(__name__)
model = joblib.load("triage_model.pkl")

@app.route("/predict", methods=["POST"])
def predict():
    data = request.get_json()
    symptoms = data.get("symptoms", "")
    if not symptoms:
        return jsonify({"error": "No symptoms provided"}), 400

    clean_input = clean_text(symptoms)
    risk_pred = model.predict([clean_input])[0]

    # Find closest row from dataset
    match = df[df['Symptoms'].str.contains(clean_input.split()[0], na=False)]
    if match.empty:
        match_row = df.sample(1).iloc[0]
    else:
        match_row = match.iloc[0]

    response = {
        "input_symptoms": symptoms,
        "predicted_risk": "HIGH" if risk_pred else "LOW",
        "treatment": match_row['Treatments'] if risk_pred == 0 else None,
        "recommend_doctor": "Consult Doctor" if risk_pred == 1 else None
    }
    return jsonify(response)

if __name__ == '__main__':
    port = int(os.environ.get("PORT", 5000))
    app.run(debug=True, host='0.0.0.0', port=port)


 * Serving Flask app '__main__'
 * Debug mode: on


 * Running on all addresses (0.0.0.0)
 * Running on http://127.0.0.1:5000
 * Running on http://192.168.1.68:5000
Press CTRL+C to quit
 * Restarting with watchdog (windowsapi)


SystemExit: 1

  warn("To exit: use 'exit', 'quit', or Ctrl-D.", stacklevel=1)


In [2]:
df.head()

Unnamed: 0,Code,Name,Symptoms,Treatments
0,1,Panic disorder,"Palpitations, Sweating, Trembling, Shortness o...","Antidepressant medications, Cognitive Behavior..."
1,2,Vocal cord polyp,"Hoarseness, Vocal Changes, Vocal Fatigue","Voice Rest, Speech Therapy, Surgical Removal"
2,3,Turner syndrome,"Short stature, Gonadal dysgenesis, Webbed neck...","Growth hormone therapy, Estrogen replacement t..."
3,4,Cryptorchidism,"Absence or undescended testicle(s), empty scro...",Observation and monitoring (in cases of mild o...
4,5,Ethylene glycol poisoning-1,"Nausea, vomiting, abdominal pain, General mala...","Supportive Measures, Gastric Decontamination, ..."
