In [3]:
import pandas as pd

# Create a small dataset to simulate MedDialog format
data = {
    "question": [
        "I have a fever and a sore throat, what should I do?",
        "My child is coughing and has a runny nose.",
        "I feel chest tightness and shortness of breath.",
        "I have a rash on my back and it's very itchy.",
        "I’m experiencing dizziness after standing up.",
    ],
    "symptom": [
        "fever, sore throat",
        "cough, runny nose",
        "chest tightness, shortness of breath",
        "rash, itchy skin",
        "dizziness"
    ],
    "triage_level": [
        "self-care",
        "consult GP",
        "emergency",
        "consult GP",
        "consult GP"
    ]
}

# Create DataFrame
df = pd.DataFrame(data)

# Save for export later if needed
df.to_csv("medical_dialog.csv", index=False)

# Preview
df.head()

Unnamed: 0,question,symptom,triage_level
0,"I have a fever and a sore throat, what should ...","fever, sore throat",self-care
1,My child is coughing and has a runny nose.,"cough, runny nose",consult GP
2,I feel chest tightness and shortness of breath.,"chest tightness, shortness of breath",emergency
3,I have a rash on my back and it's very itchy.,"rash, itchy skin",consult GP
4,I’m experiencing dizziness after standing up.,dizziness,consult GP


In [4]:
# Install spaCy (run only once)
!pip install -U spacy

# Download English language model
!python -m spacy download en_core_web_sm

Collecting en-core-web-sm==3.8.0
  Downloading https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.8.0/en_core_web_sm-3.8.0-py3-none-any.whl (12.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m12.8/12.8 MB[0m [31m43.4 MB/s[0m eta [36m0:00:00[0m
[?25h[38;5;2m✔ Download and installation successful[0m
You can now load the package via spacy.load('en_core_web_sm')
[38;5;3m⚠ Restart to reload dependencies[0m
If you are in a Jupyter or Colab notebook, you may need to restart Python in
order to load all the package's dependencies. You can do this by selecting the
'Restart kernel' or 'Restart runtime' option.


In [5]:
import spacy

# Load English spaCy model
nlp = spacy.load("en_core_web_sm")

# Function to extract potential symptoms (noun phrases + adjectives)
def extract_symptoms(text):
    doc = nlp(text)
    return [chunk.text for chunk in doc.noun_chunks if len(chunk.text) > 2]

# Apply to each question
df["extracted_symptoms"] = df["question"].apply(extract_symptoms)

# Show result
df[["question", "extracted_symptoms"]]

Unnamed: 0,question,extracted_symptoms
0,"I have a fever and a sore throat, what should ...","[a fever, a sore throat, what]"
1,My child is coughing and has a runny nose.,"[My child, a runny nose]"
2,I feel chest tightness and shortness of breath.,"[chest tightness, shortness, breath]"
3,I have a rash on my back and it's very itchy.,"[a rash, my back]"
4,I’m experiencing dizziness after standing up.,[dizziness]


In [6]:
# Triage Level Classifier

from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import classification_report

# Encode target labels (if not already)
label_encoder = LabelEncoder()
y = label_encoder.fit_transform(df["triage_level"])

# Create a pipeline: TF-IDF + Logistic Regression
pipeline = Pipeline([
    ('tfidf', TfidfVectorizer(ngram_range=(1, 2))),
    ('clf', LogisticRegression(max_iter=200))
])

# Fit the model
pipeline.fit(df["question"], y)

# Predict on same data (since we're just demoing)
y_pred = pipeline.predict(df["question"])

# Report
print(classification_report(y, y_pred, target_names=label_encoder.classes_))

              precision    recall  f1-score   support

  consult GP       0.60      1.00      0.75         3
   emergency       0.00      0.00      0.00         1
   self-care       0.00      0.00      0.00         1

    accuracy                           0.60         5
   macro avg       0.20      0.33      0.25         5
weighted avg       0.36      0.60      0.45         5



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


 Chatbot Simulation (Triage Assistant)

In [12]:
def triage_chatbot():
    print("🩺 Hello! I'm your Symptom Triage Assistant.")

    while True:
        user_input = input("\n🔹 Please describe your symptoms: ")

        # Predict triage level
        pred = pipeline.predict([user_input])
        label = label_encoder.inverse_transform(pred)[0]

        print(f"🧠 Based on your symptoms, I recommend: **{label.upper()}**")

        # Ask if user has more symptoms
        follow_up = input("🤔 Any other symptoms? (yes/no): ").strip().lower()
        if follow_up not in ['yes', 'y']:
            print("👋 Take care! Goodbye.")
            break

# Call chatbot
triage_chatbot()

🩺 Hello! I'm your Symptom Triage Assistant.

🔹 Please describe your symptoms: I have a fever and chills.
🧠 Based on your symptoms, I recommend: **CONSULT GP**
🤔 Any other symptoms? (yes/no): yes

🔹 Please describe your symptoms: stomac pain
🧠 Based on your symptoms, I recommend: **CONSULT GP**
🤔 Any other symptoms? (yes/no): no
👋 Take care! Goodbye.
