## Generate Sample EHR

### Using OpenAI API

In [1]:
from openai import OpenAI
import json
import os
import tiktoken
from fpdf import FPDF
from faker import Faker
from datetime import datetime as dt
import re

In [2]:
# get api key from file
with open("../../../apikeys/openai-keys.json", "r") as key_file:
    api_key = json.load(key_file)["default_api_key"]
os.environ["OPENAI_API_KEY"] = api_key

In [3]:
# get client for API call
client = OpenAI()

In [4]:
instruction = """Generate 1 sample Electronic Health Records (EHR) for a patients having Morbid Obesity with details about their medical history including clinical details such as blood pressure, BMI, family history, health conditions, diagnosis, occupation, and personal heath assessment information. Use fake information for name and date of birth"""

In [5]:
# instruction = """Generate 1 sample Electronic Health Records (EHR) for a healthy person having no disease with details about their medical history including clinical details such as blood pressure, BMI, family history, health conditions, diagnosis, occupation, and personal heath assessment information. Use fake information for name and date of birth"""

In [6]:
example_format = """##Patient Information
* Name: 
* Date of Birth: 
* Age: 
* Gender: 
* Patient ID: 
###Vital Signs (Recorded on date)
* Blood Pressure: 
* Heart Rate: 
* Respiratory Rate: 
* Temperature: 
* Oxygen Saturation (SpO2): 
* BMI: 
###Family History
* Father: 
* Mother: 
* Siblings: 
###Medical History
* Chronic Conditions:
    * condition1
    * condition2
* Previous Surgeries:
    * None
* Allergies:
    * if any
###Current Medications
* medicine1
* medicine2
###Health Conditions
* conditon1
* consition2
###Recent Diagnosis
* diagnosis
###Personal Health Assessment
* Lifestyle:
    * Exercises 
    * Diet: 
    * Sleep: 
    * Stress: 
* Tobacco Use: 
* Alcohol Use: 
* Occupation: 
###Physician's Notes
Add physician notes here##"""

In [7]:
# example_format = """##Patient Information
# * Name: Emily Carter
# * Date of Birth: 03/15/1985 
# * Age: 39
# * Gender: Female
# * Patient ID: 20250011234
# ###Vital Signs (Recorded on 01/26/2025)
# * Blood Pressure: 130/85 mmHg (Prehypertension range)
# * Heart Rate: 78 bpm
# * Respiratory Rate: 16 breaths/min
# * Temperature: 98.6°F
# * Oxygen Saturation (SpO2): 96%
# * BMI: 28.5 (Overweight category)
# ###Family History
# * Father: Hypertension, Type 2 Diabetes
# * Mother: Breast Cancer (diagnosed at 55)
# * Siblings: None with significant medical history
# ###Medical History
# * Chronic Conditions:
#     * Hypertension (diagnosed 2022)
#     * Mild Asthma (diagnosed 2009)
# * Previous Surgeries:
#     * Appendectomy (2018)
# * Allergies:
#     * Penicillin
# ###Current Medications
# * Amlodipine (5 mg, daily)
# * Albuterol Inhaler (as needed)
# ###Health Conditions
# * Hypertension
# * Seasonal Allergies
# ###Recent Diagnosis
# * Mild Anemia (detected during routine bloodwork, January 2025)
# ###Personal Health Assessment
# * Lifestyle:
#     * Exercises 2–3 times per week (yoga and walking)
#     * Diet: Moderately healthy, consumes a high amount of processed foods
#     * Sleep: 6–7 hours per night
#     * Stress: Moderate due to work demands
# * Tobacco Use: None
# * Alcohol Use: Occasional (1–2 glasses of wine per week)
# * Occupation: Sales Associate
# ###Physician's Notes
# Emily has shown stable blood pressure with medication but requires improvement in diet and weight management to reduce long-term cardiovascular risks. Recommended increasing physical activity to 150 minutes/week and incorporating more fruits and vegetables into her diet. Follow-up in 6 months to monitor anemia treatment and blood pressure.##"""

In [8]:
encoding = tiktoken.encoding_for_model("gpt-4o-mini")
print(encoding.name)

o200k_base


In [9]:
tokens = encoding.encode(example_format)
print(len(tokens))

209


In [10]:
# API call using prompt 1
response1 = client.chat.completions.create(
    model="gpt-4o-mini",
    messages=[
        {
            "role": "system", 
            "content": f"You are a helpful assistant who generates sample electronic health record\nHere is an example format{example_format}"
        },
        {
            "role": "user", 
            "content": f"{instruction}"
        }
    ],
    max_completion_tokens=450,
    n=1
)

print(f"Output EHR:\n {response1.choices[0].message.content}")

Output EHR:
 ## Patient Information
* Name: John Doe
* Date of Birth: January 15, 1985
* Age: 38
* Gender: Male
* Patient ID: 123456

### Vital Signs (Recorded on 2023-10-20)
* Blood Pressure: 145/95 mmHg
* Heart Rate: 82 bpm
* Respiratory Rate: 18 breaths/min
* Temperature: 98.6 °F
* Oxygen Saturation (SpO2): 98%
* BMI: 42.1 kg/m²

### Family History
* Father: Hypertension, Type 2 Diabetes
* Mother: Morbid Obesity, Hyperlipidemia
* Siblings: One brother with obesity-related health issues

### Medical History
* Chronic Conditions:
    * Morbid Obesity
    * Hypertension
* Previous Surgeries:
    * None
* Allergies:
    * Penicillin (rash)

### Current Medications
* Lisinopril 20 mg daily
* Metformin 500 mg twice daily

### Health Conditions
* Type 2 Diabetes (recently diagnosed)
* Obstructive Sleep Apnea (OSA)

### Recent Diagnosis
* Morbid Obesity
* Essential Hypertension
* Type 2 Diabetes

### Personal Health Assessment
* Lifestyle:
    * Exercises: Rarely; sedentary lifestyle
    * 

In [11]:
#show token usage of api call
print(response1.usage)

CompletionUsage(completion_tokens=450, prompt_tokens=297, total_tokens=747, completion_tokens_details=CompletionTokensDetails(accepted_prediction_tokens=0, audio_tokens=0, reasoning_tokens=0, rejected_prediction_tokens=0), prompt_tokens_details=PromptTokensDetails(audio_tokens=0, cached_tokens=0))


In [12]:
# response1.choices[0].message.content.split("##Patient Information")[1].split("---")[0].split("\n")
try:
    ehr = response1.choices[0].message.content.split("## Patient Information")[1].split("\n")
except IndexError:
    ehr = response1.choices[0].message.content.split("##Patient Information")[1].split("\n")
ehr

['',
 '* Name: John Doe',
 '* Date of Birth: January 15, 1985',
 '* Age: 38',
 '* Gender: Male',
 '* Patient ID: 123456',
 '',
 '### Vital Signs (Recorded on 2023-10-20)',
 '* Blood Pressure: 145/95 mmHg',
 '* Heart Rate: 82 bpm',
 '* Respiratory Rate: 18 breaths/min',
 '* Temperature: 98.6 °F',
 '* Oxygen Saturation (SpO2): 98%',
 '* BMI: 42.1 kg/m²',
 '',
 '### Family History',
 '* Father: Hypertension, Type 2 Diabetes',
 '* Mother: Morbid Obesity, Hyperlipidemia',
 '* Siblings: One brother with obesity-related health issues',
 '',
 '### Medical History',
 '* Chronic Conditions:',
 '    * Morbid Obesity',
 '    * Hypertension',
 '* Previous Surgeries:',
 '    * None',
 '* Allergies:',
 '    * Penicillin (rash)',
 '',
 '### Current Medications',
 '* Lisinopril 20 mg daily',
 '* Metformin 500 mg twice daily',
 '',
 '### Health Conditions',
 '* Type 2 Diabetes (recently diagnosed)',
 '* Obstructive Sleep Apnea (OSA)',
 '',
 '### Recent Diagnosis',
 '* Morbid Obesity',
 '* Essential Hype

In [13]:
# Create a new PDF object
pdf = FPDF()
fake = Faker()

# Add a new page
pdf.add_page()

# Set the font
pdf.set_font("Arial", size=12)

# Add the text
i=1
# for line in response1.choices[0].message.content.split("##Patient Information")[1].split("---")[0].split("\n"):
for line in ehr:
    text =  line.replace('\u2013','')
    if text=='' or text == ' ':
        continue
    if re.search("Name", text):
        fake_name = fake.unique.name()
        text = f"* Name: {fake_name}"
    elif re.search("Date of Birth:", text):
        fake_dob = fake.unique.date_of_birth()
        text = f"* DOB: {fake_dob}"
    elif re.search("Physician's Notes", text):
        try:
            doctor_notes = response1.choices[0].message.content.split("### Physician's Notes")[1].replace("\n","")
        except IndexError:
            doctor_notes = response1.choices[0].message.content.split("###Physician's Notes")[1].replace("\n","")
        # print(doctor_notes)
        pdf.multi_cell(0,10, txt=f"### Physician's Notes\n{doctor_notes}", align="L")
        break
    else:
        pass
    pdf.cell(0, 10, txt=text, ln=i,)
    i=1+1

# Save the PDF
pdf.output(f"./pdfs/{fake_name}_EHR_{dt.strftime(dt.now(),"%Y%m%d")}.pdf")
# write text file
with open(f"./text_ehrs/{fake_name}_EHR_{dt.strftime(dt.now(),"%Y%m%d")}.txt", "w") as textfile:
    textfile.write(response1.choices[0].message.content)