In [1]:
import numpy as np
import pandas as pd

np.random.seed(42)
n = 30000

# ===== 1. التوزيع الواقعي للأمراض =====
conditions = np.random.choice(
    ["Hypertension","Diabetes","Obesity","Asthma","Arthritis","Cancer"],
    size=n,
    p=[0.32,0.28,0.18,0.10,0.07,0.05]
)

genders = np.random.choice(["Male","Female"], size=n)
ages = []
glucose = []
bp = []
bmi = []
oxygen = []
hr = []
rr = []
temp = []
length_stay = []

# أعراض إضافية
symptom_cols = [
 'Frequent Urination','Excessive Thirst','Blurred Vision','Fatigue','Slow Healing Wounds',
 'Unexplained Weight Loss','Tingling Hands/Feet','Increased Hunger',
 'Headache','Chest Pain','Dizziness','Nosebleeds','Vision Problems','Shortness of Breath',
 'High BMI','Snoring','Joint Pain','Back Pain','Sweating Excessively','Increased Appetite',
 'Wheezing','Coughing','Chest Tightness','Nighttime Cough','Rapid Breathing','Fatigue after Exercise','Difficulty Sleeping',
 'Pain','Lumps','Persistent Fatigue','Fever/Night Sweats','Persistent Cough','Skin Changes','Loss of Appetite',
 'Swelling','Stiffness','Reduced Mobility','Warm Joints','Tenderness','Morning Stiffness >1h'
]
symptoms = {col:[] for col in symptom_cols}
waist_circ = []

for c in conditions:
    # ---- القيم الحيوية ----
    if c=="Diabetes":
        age = np.random.randint(40,75)
        g = np.random.normal(200,25)
        b = np.random.normal(135,10)
        m = np.random.normal(30,3)
        o2 = np.random.normal(95,2)
        stay = np.random.randint(4,10)
    elif c=="Hypertension":
        age = np.random.randint(45,85)
        g = np.random.normal(110,15)
        b = np.random.normal(165,15)
        m = np.random.normal(28,3)
        o2 = np.random.normal(95,2)
        stay = np.random.randint(3,7)
    elif c=="Obesity":
        age = np.random.randint(25,65)
        g = np.random.normal(115,15)
        b = np.random.normal(140,10)
        m = np.random.normal(37,4)
        o2 = np.random.normal(96,2)
        stay = np.random.randint(2,5)
    elif c=="Asthma":
        age = np.random.randint(10,60)
        g = np.random.normal(100,10)
        b = np.random.normal(120,10)
        m = np.random.normal(25,3)
        o2 = np.random.normal(90,4)
        stay = np.random.randint(3,8)
    elif c=="Cancer":
        age = np.random.randint(30,80)
        g = np.random.normal(105,15)
        b = np.random.normal(125,10)
        m = np.random.normal(24,3)
        o2 = np.random.normal(94,3)
        stay = np.random.randint(7,20)
    else: # Arthritis
        age = np.random.randint(50,90)
        g = np.random.normal(100,10)
        b = np.random.normal(135,10)
        m = np.random.normal(27,3)
        o2 = np.random.normal(96,2)
        stay = np.random.randint(2,6)

    # vital signs
    heart = np.random.normal(80,8)
    resp = np.random.normal(18,3)
    temperature = np.random.normal(37,0.5)

    ages.append(age); glucose.append(g); bp.append(b); bmi.append(m)
    oxygen.append(o2); hr.append(heart); rr.append(resp); temp.append(temperature)
    length_stay.append(stay)

    # ---- محيط الخصر ----
    waist = m*2.5 + np.random.normal(0,2)
    waist_circ.append(waist)

    # ---- أعراض (قيم ثنائية) ----
    vals = {col:0 for col in symptom_cols}
    if c=="Diabetes":
        for s in ['Frequent Urination','Excessive Thirst','Blurred Vision','Fatigue',
                  'Slow Healing Wounds','Tingling Hands/Feet','Increased Hunger']:
            vals[s]=1
        if np.random.rand()<0.3: vals['Unexplained Weight Loss']=1
    elif c=="Hypertension":
        for s in ['Headache','Chest Pain','Dizziness','Nosebleeds','Vision Problems','Shortness of Breath','Fatigue']:
            vals[s]=1
    elif c=="Obesity":
        for s in ['High BMI','Snoring','Joint Pain','Back Pain','Sweating Excessively','Increased Appetite','Shortness of Breath','Fatigue']:
            vals[s]=1
    elif c=="Asthma":
        for s in ['Wheezing','Coughing','Shortness of Breath','Chest Tightness','Nighttime Cough','Rapid Breathing','Fatigue after Exercise','Difficulty Sleeping']:
            vals[s]=1
    elif c=="Cancer":
        for s in ['Unexplained Weight Loss','Pain','Lumps','Persistent Fatigue','Fever/Night Sweats','Persistent Cough','Skin Changes','Loss of Appetite']:
            vals[s]=1
    else: # Arthritis
        for s in ['Joint Pain','Swelling','Stiffness','Reduced Mobility','Fatigue','Warm Joints','Tenderness','Morning Stiffness >1h']:
            vals[s]=1

    for col in symptom_cols:
        symptoms[col].append(vals[col])

# ===== 2. بناء الداتا =====
df = pd.DataFrame({
    "Age":np.round(ages,2),
    "Gender":genders,
    "Medical Condition":conditions,
    "Glucose":np.round(glucose,2),
    "Blood Pressure":np.round(bp,2),
    "BMI":np.round(bmi,2),
    "Oxygen Saturation":np.round(oxygen,2),
    "Heart Rate":np.round(hr,2),
    "Respiratory Rate":np.round(rr,2),
    "Temperature":np.round(temp,2),
    "LengthOfStay":length_stay,
    "Waist Circumference":np.round(waist_circ,2)
})
for col in symptom_cols:
    df[col]=symptoms[col]

print(df.head())
print(df['Medical Condition'].value_counts(normalize=True).round(3)*100)
print("✅ عدد الأعمدة:", df.shape[1])


   Age  Gender Medical Condition  Glucose  Blood Pressure    BMI  \
0   46    Male          Diabetes   173.15          131.84  29.94   
1   45    Male            Cancer    94.15          142.85  29.06   
2   50  Female           Obesity   118.41          144.44  38.65   
3   48    Male          Diabetes   172.04          143.56  32.08   
4   77  Female      Hypertension   105.53          173.66  28.60   

   Oxygen Saturation  Heart Rate  Respiratory Rate  Temperature  ...  \
0              95.69       85.83             20.29        36.84  ...   
1              97.02       81.58             16.14        36.99  ...   
2              93.12       83.66             18.36        37.83  ...   
3              96.72       78.11             13.64        36.75  ...   
4              93.51       77.62             21.44        37.58  ...   

   Fever/Night Sweats  Persistent Cough  Skin Changes  Loss of Appetite  \
0                   0                 0             0                 0   
1       