In [1]:
import random
import pandas as pd

In [2]:


# Define example symptoms for each category
urgent_care_symptoms = [
    "Mild rash with itching", "Sore throat for 3 days", "Earache and slight fever", "Red eye with discharge",
    "Low-grade fever and headache", "Minor burn on hand", "Cut on finger, bleeding controlled", "Sinus congestion",
    "Mild allergic reaction", "Muscle strain in back", "Nausea and vomiting for 1 day", "Cough and nasal congestion",
    "Painful urination", "Swollen ankle, no deformity", "Skin infection on arm", "Bug bite swelling",
    "Bruised knee from fall", "Blister from shoe", "Scalp tenderness", "Toothache and gum swelling"
]

er_symptoms = [
    "Chest pain and shortness of breath", "Severe abdominal pain", "Fever of 105°F", "Head trauma from fall",
    "Seizure episode", "Uncontrolled bleeding", "Difficulty breathing", "Loss of consciousness",
    "Suspected stroke symptoms", "Severe allergic reaction", "Compound fracture", "Severe burn on arm",
    "Intense migraine with vomiting", "Sudden confusion", "Vision loss in one eye", "Severe dehydration",
    "Gunshot wound", "Broken leg with deformity", "Heart palpitations and dizziness", "Suicidal thoughts"
]

In [3]:


# Function to create varied natural-sounding patient phrasing
def generate_variants(base, label, n=25):
    variants = []
    for _ in range(n):
        symptom = random.choice(base)
        intro = random.choice([
            "I'm experiencing", "I've got", "I have", "My child has", "Feeling like", "Dealing with",
            "Noticed", "Started having"
        ])
        suffix = random.choice([
            "", "since yesterday", "for a couple of hours", "and it's getting worse", "and it's painful",
            "and it just started", "and I'm not sure what to do", "should I go to a doctor?"
        ])
        sentence = f"{intro} {symptom.lower()} {suffix}".strip()
        variants.append({"text": sentence, "label": label})
    return variants

In [4]:

# Generate 250 from each class
uc_data = generate_variants(urgent_care_symptoms, "Urgent Care", n=25)
er_data = generate_variants(er_symptoms, "ER", n=25)

# Multiply to reach 500 (20 variants * 25 = 500)
for i in range(4):
    uc_data.extend(generate_variants(urgent_care_symptoms, "Urgent Care", n=25))
    er_data.extend(generate_variants(er_symptoms, "ER", n=25))

# Combine and shuffle
all_data = uc_data + er_data
random.shuffle(all_data)

# Convert to DataFrame
df = pd.DataFrame(all_data)




In [7]:


df.shape       # Show total rows and columns



(250, 2)

In [8]:
df.tail()      # Last few rows

Unnamed: 0,text,label
245,Noticed low-grade fever and headache and it's ...,Urgent Care
246,I'm experiencing sinus congestion and it just ...,Urgent Care
247,"Feeling like swollen ankle, no deformity and i...",Urgent Care
248,Dealing with toothache and gum swelling and it...,Urgent Care
249,My child has sudden confusion and I'm not sure...,ER


In [9]:
df.sample(10)  # Random 10

Unnamed: 0,text,label
12,Dealing with seizure episode and it just started,ER
171,Dealing with suspected stroke symptoms should ...,ER
124,Started having low-grade fever and headache si...,Urgent Care
21,Dealing with fever of 105°f since yesterday,ER
75,My child has blister from shoe since yesterday,Urgent Care
4,I'm experiencing severe burn on arm and it's p...,ER
227,Feeling like severe abdominal pain for a coupl...,ER
139,My child has compound fracture and it just sta...,ER
130,Started having sudden confusion since yesterday,ER
185,I'm experiencing vision loss in one eye since ...,ER


In [13]:


# Save the DataFrame in the current working directory instead of /mnt/data
local_csv_path = "urgentcare_symptoms_dataset.csv"
df.to_csv(local_csv_path, index=False)

local_csv_path

'urgentcare_symptoms_dataset.csv'