In [2]:
import random
import datetime
import json

In [3]:
# Expanded sample data for diversity
conditions = ["hypertension", "diabetes", "asthma", "arthritis", "migraine", "heart disease", "influenza", "gastritis", "anxiety", "eczema"]
medications = ["aspirin", "metformin", "albuterol", "ibuprofen", "sumatriptan", "lisinopril", "prednisone", "atorvastatin", "sertraline", "omeprazole"]
activities = ["exercises", "walks", "yoga", "swimming", "cycling", "meditation", "weightlifting", "running", "gardening", "pilates"]
symptoms = ["pain", "fatigue", "dizziness", "nausea", "shortness of breath", "cough", "fever", "headache", "swelling", "insomnia"]
procedures = ["surgery", "test", "checkup", "MRI", "biopsy", "X-ray", "blood test", "CT scan", "ultrasound", "consultation"]
avoidances = ["sugar", "salt", "dairy", "gluten", "red meat", "alcohol", "caffeine", "processed foods", "nuts", "soy"]
moods = ["stress", "sleep issues", "good mood", "anxiety", "depression", "relaxation", "irritability", "energy boost"]

In [4]:
# Generate dates from 2018 to 2025
start_date = datetime.date(2018, 1, 1)
end_date = datetime.date(2025, 6, 26)  # Today's date
dates = [start_date + datetime.timedelta(days=random.randint(0, (end_date - start_date).days)) for _ in range(50)]

In [5]:
def generate_patient_data(num_patients=50):
    patients = {}
    for i in range(1, num_patients + 1):
        patient_id = f"P{i:03d}"
        conversations = " ".join([
            f"Patient discussed {random.choice(conditions)} on {random.choice(dates).strftime('%B %d, %Y')}",
            f"Mentioned {random.choice(medications)} usage on {random.choice(dates).strftime('%B %d, %Y')}"
        ] + [
            f"Reported {random.choice(symptoms)} on {random.choice(dates).strftime('%B %d, %Y')}" for _ in range(3)
        ] + [
            f"Questioned about {random.choice(medications)} side effects on {random.choice(dates).strftime('%B %d, %Y')}",
            f"Discussed {random.choice(procedures)} results on {random.choice(dates).strftime('%B %d, %Y')}",
            f"Expressed concern over {random.choice(symptoms)} on {random.choice(dates).strftime('%B %d, %Y')}"
        ])
       
        history = " ".join([
            f"Diagnosed with {random.choice(conditions)} on {random.choice(dates).strftime('%Y')}",
            f"Prescribed {random.choice(medications)} since {random.choice(dates).strftime('%Y')}"
        ] + [
            f"Recorded {random.choice(procedures)} on {random.choice(dates).strftime('%B %d, %Y')}" for _ in range(3)
        ] + [
            f"Noted {random.choice(conditions)} progression on {random.choice(dates).strftime('%B %d, %Y')}",
            f"Updated {random.choice(medications)} dosage on {random.choice(dates).strftime('%B %d, %Y')}",
            f"Completed {random.choice(procedures)} follow-up on {random.choice(dates).strftime('%B %d, %Y')}"
        ])
       
        behavior = " ".join([
            f"Patient {random.choice(activities)} {random.randint(1, 5)} times a week",
            f"Avoids {random.choice(avoidances)}"
        ] + [
            f"Notes {random.choice(moods)} on {random.choice(dates).strftime('%B %d, %Y')}" for _ in range(3)
        ] + [
            f"Prefers {random.choice(activities)} over others on {random.choice(dates).strftime('%B %d, %Y')}",
            f"Reports {random.randint(6, 10)} hours of sleep on {random.choice(dates).strftime('%B %d, %Y')}",
            f"Maintains {random.choice(['low', 'moderate', 'high'])} activity level on {random.choice(dates).strftime('%B %d, %Y')}"
        ])
       
        patients[patient_id] = {
            "Conversations": conversations,
            "History": history,
            "Behavior": behavior
        }
    return patients

In [6]:
patient_data = generate_patient_data()
with open("diverse_patient_data.json", "w") as f:
    json.dump(patient_data, f, indent=2)
print(f"Generated diverse data for {len(patient_data)} patients and saved to diverse_patient_data.json")

Generated diverse data for 50 patients and saved to diverse_patient_data.json
