In [178]:
import pandas as pd
import numpy as np
from faker import Faker

# Initialize Faker
fake = Faker()

admission_types = [
    'URGENT', 'ELECTIVE', 'EW EMER.', 'DIRECT EMER.', 'EU OBSERVATION',
    'OBSERVATION ADMIT', 'DIRECT OBSERVATION',
    'AMBULATORY OBSERVATION', 'SURGICAL SAME DAY ADMISSION'
]
admission_locations = [
    'TRANSFER FROM HOSPITAL', 'TRANSFER FROM SKILLED NURSING FACILITY',
    'INTERNAL TRANSFER TO OR FROM PSYCH', 'PHYSICIAN REFERRAL',
    'EMERGENCY ROOM', 'PACU', 'PROCEDURE SITE',
    'WALK-IN/SELF REFERRAL', 'INFORMATION NOT AVAILABLE',
    'CLINIC REFERRAL'
]
insurance_types = [
    'Medicaid', 'Medicare', 'Other'
]
languages = [
    'ENGLISH', '?'
]
marital_statuses = [
    'SINGLE', 'MARRIED', 'DIVORCED', 'WIDOWED'
]
races = [
    'BLACK/CAPE VERDEAN', 'HISPANIC/LATINO - PUERTO RICAN', 'WHITE',
    'UNKNOWN', 'OTHER', 'BLACK/AFRICAN AMERICAN',
    'HISPANIC/LATINO - SALVADORAN', 'UNABLE TO OBTAIN',
    'WHITE - OTHER EUROPEAN', 'PORTUGUESE', 'HISPANIC/LATINO - CUBAN',
    'PATIENT DECLINED TO ANSWER', 'WHITE - BRAZILIAN',
    'HISPANIC OR LATINO'
]

def generate_patients(n):
    patients = []
    for i in range(n):
        gender = np.random.choice(['M', 'F'])
        patients.append({
            'patient_id': i,
            'name': fake.name_male() if gender == "M" else fake.name_female(),
            'age': np.random.randint(1, 90),
            'gender': gender,
            'insurance': np.random.choice(insurance_types),
            'language': np.random.choice(languages),
            'maritalStatus': np.random.choice(marital_statuses),
            'race': np.random.choice(races),
        })
    return pd.DataFrame(patients)

def generate_staff(n):
    staff = []
    for i in range(n):
        staff.append({
            'staff_id': i,
            'name': fake.name(),
            'role': np.random.choice(['Physician', 'Nurse', 'Admin']),
            'shift_start': fake.time(),
            'shift_end': fake.time()
        })
    return pd.DataFrame(staff)

def generate_admissions(patients:pd.DataFrame, n):
    admissions = []
    for i in range(n):
        admissions.append({
            'adm_id': i,
            'patient_id': patients.sample(1).patient_id.values[0],
            'admit_time': fake.date_time(),
            'discharge_time': None,
            'type': np.random.choice(admission_types),
            'location': np.random.choice(admission_locations),
            
        })
    return pd.DataFrame(admissions)

def generate_omr(admissions, n):
    table = []
    for i in range(n):
        omr = {
            'omr_id': i,
            'adm_id': admissions.sample(1).adm_id.values[0],
            'omr_time': fake.date_time(),
            'weight': np.random.randint(60, 120),
            ##'height': np.random.randint(150, 220)/ 10.0, #not used in modeling
            'bp_systolic': np.random.randint(80, 220),
            'bp_diastolic': np.random.randint(60, 120),
        }
        table.append(omr)
    return pd.DataFrame(table)

In [179]:
staff = generate_staff(10)
patients = generate_patients(10)
admissions = generate_admissions(patients, 5)
admissions.discharge_time = pd.to_datetime(admissions.discharge_time)
omr = generate_omr(admissions, 5)

In [180]:
staff.head()

Unnamed: 0,staff_id,name,role,shift_start,shift_end
0,0,Joshua Johnston,Admin,10:36:35,12:43:19
1,1,Michelle Morris,Physician,20:29:35,15:02:53
2,2,Danielle Wise,Physician,18:52:00,05:46:37
3,3,William Ramirez,Admin,14:09:56,03:31:29
4,4,Charles Day,Admin,14:48:33,05:44:21


In [181]:
patients.head()

Unnamed: 0,patient_id,name,age,gender,insurance,language,maritalStatus,race
0,0,Anthony Gregory,77,M,Medicaid,?,DIVORCED,HISPANIC/LATINO - PUERTO RICAN
1,1,Marie Waters,55,F,Medicare,?,SINGLE,UNKNOWN
2,2,Daniel Campbell,20,M,Medicare,ENGLISH,MARRIED,UNKNOWN
3,3,Sherry Merritt,20,F,Other,?,DIVORCED,PORTUGUESE
4,4,Jennifer Williams,64,F,Other,?,MARRIED,PATIENT DECLINED TO ANSWER


In [182]:
admissions.head()

Unnamed: 0,adm_id,patient_id,admit_time,discharge_time,type,location
0,0,5,2021-06-30 08:44:32.580594,NaT,EW EMER.,PHYSICIAN REFERRAL
1,1,4,2001-01-08 16:28:27.022862,NaT,EU OBSERVATION,WALK-IN/SELF REFERRAL
2,2,6,1999-01-06 11:24:32.302527,NaT,EW EMER.,PROCEDURE SITE
3,3,8,1979-07-08 16:10:52.826061,NaT,OBSERVATION ADMIT,PHYSICIAN REFERRAL
4,4,2,2010-11-26 15:20:34.160891,NaT,EW EMER.,INTERNAL TRANSFER TO OR FROM PSYCH


In [183]:
omr.head()

Unnamed: 0,omr_id,adm_id,omr_time,weight,bp_systolic,bp_diastolic
0,0,2,1983-10-04 00:51:17.720076,76,85,101
1,1,4,1992-02-14 04:12:36.459153,118,102,74
2,2,3,1978-06-30 14:53:39.506357,90,136,64
3,3,4,2002-05-28 13:57:11.023112,88,134,61
4,4,0,1990-10-13 17:50:27.275299,74,208,118


In [184]:
admissions.dtypes

adm_id                     int64
patient_id                 int64
admit_time        datetime64[ns]
discharge_time    datetime64[ns]
type                      object
location                  object
dtype: object