In [38]:
import pandas as pd
import numpy as np
import pickle
from faker import Faker

# Initialize Faker
fake = Faker()

admission_types = [
    'URGENT', 'ELECTIVE', 'EW EMER.', 'DIRECT EMER.', 'EU OBSERVATION',
    'OBSERVATION ADMIT', 'DIRECT OBSERVATION',
    'AMBULATORY OBSERVATION', 'SURGICAL SAME DAY ADMISSION'
]
admission_locations = [
    'TRANSFER FROM HOSPITAL', 'TRANSFER FROM SKILLED NURSING FACILITY',
    'INTERNAL TRANSFER TO OR FROM PSYCH', 'PHYSICIAN REFERRAL',
    'EMERGENCY ROOM', 'PACU', 'PROCEDURE SITE',
    'WALK-IN/SELF REFERRAL', 'INFORMATION NOT AVAILABLE',
    'CLINIC REFERRAL'
]
insurance_types = [
    'Medicaid', 'Medicare', 'Other'
]
languages = [
    'ENGLISH', '?'
]
marital_statuses = [
    'SINGLE', 'MARRIED', 'DIVORCED', 'WIDOWED'
]
races = [
    'BLACK/CAPE VERDEAN', 'HISPANIC/LATINO - PUERTO RICAN', 'WHITE',
    'UNKNOWN', 'OTHER', 'BLACK/AFRICAN AMERICAN',
    'HISPANIC/LATINO - SALVADORAN', 'UNABLE TO OBTAIN',
    'WHITE - OTHER EUROPEAN', 'PORTUGUESE', 'HISPANIC/LATINO - CUBAN',
    'PATIENT DECLINED TO ANSWER', 'WHITE - BRAZILIAN',
    'HISPANIC OR LATINO'
]

drugs = [
    'Midodrine', 'Multivitamins W/minerals', 'Sodium Chloride',
    'Nephrocaps', 'Nicotine Polacrilex', 'OLANZapine', 'Ondansetron',
    'OxyCODONE (Immediate Release)'
]

diagnoses_types = [
    'Urinary tract infection, site not specified',
    'Acute respiratory failure',
    'Asthma, unspecified type, unspecified',
    'Solitary pulmonary nodule',
    'Streptococcus infection in conditions classified elsewhere and of unspecified site, streptococcus, group D [Enterococcus]',
    'Dysphagia, oropharyngeal phase', 'Esophageal reflux', 'Hypoxemia',
    'Methicillin resistant pneumonia due to Staphylococcus aureus',
    'Attention deficit disorder with hyperactivity'
]

def generate_patients(n):
    patients = []
    for i in range(n):
        gender = np.random.choice(['M', 'F'])
        patients.append({
            'patient_id': i,
            'name': fake.name_male() if gender == "M" else fake.name_female(),
            'age': np.random.randint(1, 90),
            'gender': gender,
            'insurance': np.random.choice(insurance_types),
            'language': np.random.choice(languages),
            'maritalStatus': np.random.choice(marital_statuses),
            'race': np.random.choice(races),
        })
    return pd.DataFrame(patients)

def generate_staff(n):
    staff = []
    for i in range(n):
        staff.append({
            'staff_id': i,
            'name': fake.name(),
            'role': np.random.choice(['Physician', 'Nurse', 'Admin']),
            'shift_start': fake.time(),
            'shift_end': fake.time()
        })
    return pd.DataFrame(staff)

def generate_admissions(patients:pd.DataFrame, n):
    admissions = []
    for i in range(n):
        admissions.append({
            'adm_id': i,
            'patient_id': patients.sample(1).patient_id.values[0],
            'admit_time': fake.date_time(),
            'discharge_time': None,
            'type': np.random.choice(admission_types),
            'location': np.random.choice(admission_locations),
            
        })
    return pd.DataFrame(admissions)

def generate_omr(admissions, n):
    table = []
    for i in range(n):
        omr = {
            'omr_id': i,
            'adm_id': admissions.sample(1).adm_id.values[0],
            'omr_time': fake.date_time(),
            'weight': np.random.randint(60, 120),
            ##'height': np.random.randint(150, 220)/ 10.0, #not used in modeling
            'bp_systolic': np.random.randint(80, 220),
            'bp_diastolic': np.random.randint(60, 120),
        }
        table.append(omr)
    return pd.DataFrame(table)

def generate_prescriptions(admissions, n):
    prescriptions = []
    for i in range(n):
        prescriptions.append({
            'prescription_id': i,
            'adm_id': admissions.sample(1).adm_id.values[0],
            'drug': np.random.choice(drugs),
        })
    return pd.DataFrame(prescriptions)

def generate_diagnoses(admissions, n):
    prescriptions = []
    for i in range(n):
        prescriptions.append({
            'diagnosis_id': i,
            'adm_id': admissions.sample(1).adm_id.values[0],
            'diagnosis': np.random.choice(diagnoses_types),
        })
    return pd.DataFrame(prescriptions)

In [27]:
staff = generate_staff(10)
patients = generate_patients(10)
admissions = generate_admissions(patients, 5)
admissions.discharge_time = pd.to_datetime(admissions.discharge_time)
omr = generate_omr(admissions, 5)
prescriptions = generate_prescriptions(admissions, 5)
diagnoses = generate_diagnoses(admissions, 5)

In [28]:
staff.head()

Unnamed: 0,staff_id,name,role,shift_start,shift_end
0,0,Jacob Mckay,Physician,08:24:34,08:26:12
1,1,Jessica Rowland,Physician,17:02:10,06:17:45
2,2,Susan James,Admin,06:01:23,21:21:33
3,3,Jeremy Moore,Nurse,04:45:10,15:13:09
4,4,Mrs. Casey Lopez MD,Physician,20:50:59,01:31:15


In [29]:
patients.head()

Unnamed: 0,patient_id,name,age,gender,insurance,language,maritalStatus,race
0,0,Amber Nelson,19,F,Medicare,ENGLISH,MARRIED,WHITE - OTHER EUROPEAN
1,1,Laura Jones,5,F,Other,?,SINGLE,PATIENT DECLINED TO ANSWER
2,2,Ronald Castro,66,M,Medicaid,?,SINGLE,BLACK/CAPE VERDEAN
3,3,Steven Watts,84,M,Other,ENGLISH,WIDOWED,HISPANIC/LATINO - PUERTO RICAN
4,4,Mr. Joseph Taylor,76,M,Medicare,ENGLISH,SINGLE,UNABLE TO OBTAIN


In [30]:
admissions.head()

Unnamed: 0,adm_id,patient_id,admit_time,discharge_time,type,location
0,0,7,2003-05-01 07:05:29.399338,NaT,EU OBSERVATION,PHYSICIAN REFERRAL
1,1,9,2013-10-21 20:15:52.848541,NaT,SURGICAL SAME DAY ADMISSION,TRANSFER FROM SKILLED NURSING FACILITY
2,2,2,2021-05-01 21:46:02.764500,NaT,EW EMER.,PHYSICIAN REFERRAL
3,3,1,2009-05-24 03:05:02.335876,NaT,EW EMER.,PROCEDURE SITE
4,4,9,2014-03-28 18:20:14.336934,NaT,OBSERVATION ADMIT,PROCEDURE SITE


In [31]:
omr.head()

Unnamed: 0,omr_id,adm_id,omr_time,weight,bp_systolic,bp_diastolic
0,0,0,1979-06-20 17:33:13.304728,107,180,64
1,1,2,1996-08-07 22:58:34.943086,67,131,101
2,2,0,1986-05-10 05:43:09.035017,101,182,72
3,3,2,2019-01-14 23:22:00.864498,104,110,86
4,4,0,2011-07-08 20:12:24.303791,96,98,62


In [32]:
prescriptions.head()

Unnamed: 0,prescription_id,adm_id,drug
0,0,1,Midodrine
1,1,3,Multivitamins W/minerals
2,2,1,Multivitamins W/minerals
3,3,3,Nicotine Polacrilex
4,4,4,Sodium Chloride


In [33]:
diagnoses.head()

Unnamed: 0,diagnosis_id,adm_id,diagnosis
0,0,1,Attention deficit disorder with hyperactivity
1,1,0,Streptococcus infection in conditions classifi...
2,2,2,"Urinary tract infection, site not specified"
3,3,1,Esophageal reflux
4,4,3,Hypoxemia


In [36]:
"""with open('app/data/staff.csv', 'w') as f:
    staff.to_csv(f, index=False)
with open('app/data/patients.csv', 'w') as f:
    patients.to_csv(f, index=False)
with open('app/data/admissions.csv', 'w') as f:
    admissions.to_csv(f, index=False)
with open('app/data/omr.csv', 'w') as f:
    omr.to_csv('app/data/omr.csv', index=False)
with open('app/data/prescriptions.csv', 'w') as f:
    prescriptions.to_csv("app/data/prescriptions.csv", index=False)
with open("app/data/diagnoses.csv", "w") as f:
    diagnoses.to_csv("app/data/diagnoses.csv", index=False)"""

In [39]:
with open("app/data/admission_types.pkl", "wb") as f:
    pickle.dump(admission_types, f)
with open("app/data/admission_locations.pkl", "wb") as f:
    pickle.dump(admission_locations, f)
with open("app/data/insurance_types.pkl", "wb") as f:
    pickle.dump(insurance_types, f)
with open("app/data/languages.pkl", "wb") as f:
    pickle.dump(languages, f)
with open("app/data/marital_statuses.pkl", "wb") as f:
    pickle.dump(marital_statuses, f)
with open("app/data/races.pkl", "wb") as f:
    pickle.dump(races, f)
with open("app/data/drugs.pkl", "wb") as f:
    pickle.dump(drugs, f)
with open("app/data/diagnoses_types.pkl", "wb") as f:
    pickle.dump(diagnoses_types, f)