In [1]:
import pandas as pd
import json

# Load the dataset
df = pd.read_csv('respiratory symptoms and treatment.csv')

# Clean column names
df.columns = [c.strip() for c in df.columns]

# Clean string values
for col in ['Symptoms', 'Sex', 'Disease', 'Treatment', 'Nature']:
    if col in df.columns:
        df[col] = df[col].astype(str).str.strip()

# Convert Age to numeric, coercing errors to NaN
df['Age'] = pd.to_numeric(df['Age'], errors='coerce')

# Initialize knowledge base
knowledge_base = {
    "symptoms_list": sorted(df['Symptoms'].unique().tolist()),
    "diseases": {}
}

# Group by Disease to extract rules
diseases = df['Disease'].unique()

for disease in diseases:
    subset = df[df['Disease'] == disease]
    total_cases = len(subset)
    
    # 1. Calculate Symptom Weights (Frequency / Total Cases)
    symptom_counts = subset['Symptoms'].value_counts()
    symptom_weights = (symptom_counts / total_cases).to_dict()
    
    # 2. Calculate Age Stats for Fuzzy Membership
    valid_ages = subset['Age'].dropna()
    if not valid_ages.empty:
        age_stats = {
            "min": float(valid_ages.min()),
            "max": float(valid_ages.max()),
            "mean": float(valid_ages.mean()),
            "std": float(valid_ages.std()) if len(valid_ages) > 1 else 5.0
        }
    else:
        age_stats = {"min": 0, "max": 100, "mean": 40, "std": 20}

    # 3. Get most common treatment
    try:
        treatment = subset['Treatment'].mode()[0]
    except:
        treatment = "Consult a Doctor"

    knowledge_base["diseases"][disease] = {
        "weights": symptom_weights,
        "age_profile": age_stats,
        "treatment": treatment
    }

# Output valid JSON
print(json.dumps(knowledge_base, indent=2))

{
  "symptoms_list": [
    "A cough that lasts more than three weeks",
    "A dry, crackling sound in the lungs while breathing in",
    "Bluish skin",
    "Chest congestion",
    "Chest pain",
    "Chest tightness or chest pain",
    "Chills",
    "Coughing up blood",
    "Coughing up yellow or green mucus daily",
    "Daytime sleepiness",
    "Difficulties with memory and concentration",
    "Dry mouth",
    "Fatigue",
    "Fatigue, feeling run-down or tired",
    "Feeling run-down or tired",
    "Fever",
    "Frequently waking",
    "Headache",
    "Loss of appetite",
    "Loss of appetite and unintentional weight loss",
    "Low-grade fever",
    "Morning headaches",
    "Nasal congestion",
    "Nausea",
    "Night sweats",
    "Pauses in breathing",
    "Persistent dry coug",
    "Persistent dry cough",
    "Rapid breathing",
    "Rapid heartbeat",
    "Runny nose",
    "Shortness of breath",
    "Shortness of breath that gets worse during flare-ups",
    "Snoring",
    "Sore thro