In [79]:
import pandas as pd
import numpy as np
from faker import Faker

# Initialize Faker
fake = Faker()

admission_types = [
    'URGENT', 'ELECTIVE', 'EW EMER.', 'DIRECT EMER.', 'EU OBSERVATION',
    'OBSERVATION ADMIT', 'DIRECT OBSERVATION',
    'AMBULATORY OBSERVATION', 'SURGICAL SAME DAY ADMISSION'
]
admission_locations = [
    'TRANSFER FROM HOSPITAL', 'TRANSFER FROM SKILLED NURSING FACILITY',
    'INTERNAL TRANSFER TO OR FROM PSYCH', 'PHYSICIAN REFERRAL',
    'EMERGENCY ROOM', 'PACU', 'PROCEDURE SITE',
    'WALK-IN/SELF REFERRAL', 'INFORMATION NOT AVAILABLE',
    'CLINIC REFERRAL'
]
insurance_types = [
    'Medicaid', 'Medicare', 'Other'
]
languages = [
    'ENGLISH', '?'
]
marital_statuses = [
    'SINGLE', 'MARRIED', 'DIVORCED', 'WIDOWED'
]
races = [
    'BLACK/CAPE VERDEAN', 'HISPANIC/LATINO - PUERTO RICAN', 'WHITE',
    'UNKNOWN', 'OTHER', 'BLACK/AFRICAN AMERICAN',
    'HISPANIC/LATINO - SALVADORAN', 'UNABLE TO OBTAIN',
    'WHITE - OTHER EUROPEAN', 'PORTUGUESE', 'HISPANIC/LATINO - CUBAN',
    'PATIENT DECLINED TO ANSWER', 'WHITE - BRAZILIAN',
    'HISPANIC OR LATINO'
]

def generate_patients(n):
    patients = []
    for i in range(n):
        gender = np.random.choice(['M', 'F'])
        patients.append({
            'id': i,
            'name': fake.name_male() if gender == "M" else fake.name_female(),
            'age': np.random.randint(1, 90),
            'gender': gender,
            'insurance': np.random.choice(insurance_types),
            'language': np.random.choice(languages),
            'maritalStatus': np.random.choice(marital_statuses),
            'race': np.random.choice(races),
        })
    return pd.DataFrame(patients)

def generate_staff(n):
    staff = []
    for i in range(n):
        staff.append({
            'id': i,
            'name': fake.name(),
            'role': np.random.choice(['Physician', 'Nurse', 'Admin']),
            'shift_start': fake.time(),
            'shift_end': fake.time()
        })
    return pd.DataFrame(staff)

def generate_admissions(patients:pd.DataFrame, n):
    admissions = []
    for i in range(n):
        admissions.append({
            'id': patients.sample(1).id.values[0],
            'adm_id': i,
            'admit_time': fake.date_time(),
            'discharge_time': None,
            'type': np.random.choice(admission_types),
            'location': np.random.choice(admission_locations),
            
        })
    return pd.DataFrame(admissions)

def generate_omr_entry(id, adm_id):
    omr = {
        'id': id,
        'adm_id': adm_id,
        'omr_time': fake.date_time(),
        'weight': np.random.randint(60, 120),
        ##'height': np.random.randint(150, 220)/ 10.0, #not used in modeling
        'bp_systolic': np.random.randint(80, 220),
        'bp_diastolic': np.random.randint(60, 120),
    }
    return omr
def generate_omr(admissions:pd.DataFrame):
    omr = pd.DataFrame([])
    for i in range(len(admissions)):
        adm_id = admissions.iloc[i].adm_id
        if omr is None:
            id = 0
        else:
            id = len(omr)
        entry = generate_omr_entry(id, adm_id)
        if omr is None:
            omr = pd.DataFrame([entry])
        else:
            pd.concat([omr, pd.DataFrame([entry])])
    return omr

In [80]:
staff = generate_staff(10)
patients = generate_patients(10)
admissions = generate_admissions(patients, 5)
admissions.discharge_time = pd.to_datetime(admissions.discharge_time)
omr = generate_omr(admissions)

In [81]:
staff.head()

Unnamed: 0,id,name,role,shift_start,shift_end
0,0,Kimberly Malone,Admin,17:16:29,15:58:32
1,1,Bryan Smith,Physician,20:43:53,16:31:37
2,2,Destiny Torres,Admin,03:13:09,16:12:46
3,3,Matthew Russo,Nurse,06:28:28,11:29:34
4,4,Bruce Johnson,Nurse,06:05:19,09:30:08


In [82]:
patients.head()

Unnamed: 0,id,name,age,gender,insurance,language,maritalStatus,race
0,0,Johnny Castaneda,25,M,Medicare,?,SINGLE,WHITE
1,1,Robert Wilson,42,M,Other,ENGLISH,MARRIED,WHITE
2,2,Adam Johnson,52,M,Other,?,WIDOWED,HISPANIC/LATINO - SALVADORAN
3,3,Erin Santos,38,F,Medicare,?,DIVORCED,HISPANIC/LATINO - CUBAN
4,4,Patrick Chen,27,M,Other,?,SINGLE,BLACK/CAPE VERDEAN


In [83]:
admissions.head()

Unnamed: 0,id,adm_id,admit_time,discharge_time,type,location
0,2,0,2002-05-02 01:43:42.319441,NaT,OBSERVATION ADMIT,EMERGENCY ROOM
1,1,1,1976-06-20 19:55:11.470970,NaT,OBSERVATION ADMIT,PACU
2,0,2,2010-05-16 19:41:13.834798,NaT,ELECTIVE,PACU
3,5,3,1980-09-25 17:45:43.418315,NaT,DIRECT EMER.,INFORMATION NOT AVAILABLE
4,6,4,1977-02-22 22:55:46.443629,NaT,OBSERVATION ADMIT,WALK-IN/SELF REFERRAL


In [85]:
omr.head

<bound method NDFrame.head of Empty DataFrame
Columns: []
Index: []>

In [84]:
admissions.dtypes

id                         int64
adm_id                     int64
admit_time        datetime64[ns]
discharge_time    datetime64[ns]
type                      object
location                  object
dtype: object