# EHR Data Profiler

Documentation of the functions available in the library as well as an in-depth tutorial on the use of `text_search` can be found on the project's GitHub page:
<a href="https://github.com/ctsidev/ehr-data-profiler#function-library">https://github.com/ctsidev/ehr-data-profiler#function-library</a>

### Run the next cell to make all the imports, which include Pandas and the EHR data anaylsis functions:


In [None]:
import pandas as pd
import matplotlib.pyplot as plt
from lib.ehr_dp_lib import *
pd.set_option('display.max_colwidth', None)
pd.set_option('display.max_rows', 500)

### Run the following block to describe the tables in your Data folder:

In [None]:
describe_tables()

### PATIENT_DEMOGRAPHICS

In [None]:
patient_demographics_df = pd.read_csv('Data/Patient_Demographics.csv')
patient_demographics_df

In [None]:
missingness(patient_demographics_df)

In [None]:
table_1(patient_demographics_df)

In [None]:
catbar(patient_demographics_df, 'LANGUAGE', graph=False) ## Set graph=True for Bar graph

In [None]:
catbar(patient_demographics_df, 'SEX', graph=False) ## Set graph=True for Bar graph

In [None]:
catbar(patient_demographics_df, 'MARITAL_STATUS', graph=False) ## Set graph=True for Bar graph

In [None]:
catbar(patient_demographics_df, 'ETHNICITY', graph=False) ## Set graph=True for Bar graph

In [None]:
numstats(patient_demographics_df, 'AGE')

In [None]:
catbar(patient_demographics_df, 'RELIGION', graph=False) ## Set graph=True for Bar graph

In [None]:
catbar(patient_demographics_df, 'RACE', graph=False) ## Set graph=True for Bar graph

In [None]:
catbar(patient_demographics_df, 'SEXUAL_ORIENTATION', graph=False) ## Set graph=True for Bar graph

### ENCOUNTERS

In [None]:
encounters_df = pd.read_csv('Data/Encounters.csv')
encounters_df

In [None]:
missingness(encounters_df)

In [None]:
occurrence_stats(encounters_df, 'IP_ENC_ID')

In [None]:
dateline(encounters_df, 'ENCOUNTER_DATE')

In [None]:
numstats(encounters_df, 'ENCOUNTER_AGE')

In [None]:
catbar(encounters_df, 'EPIC_ENCOUNTER_TYPE', graph=False) ## Set graph=True for Bar graph

In [None]:
catbar(encounters_df, 'IP_VISIT_TYPE', graph=False) ## Set graph=True for Bar graph

In [None]:
catbar(encounters_df, 'EPIC_DEPARTMENT_NAME', graph=False) ## Set graph=True for Bar graph

In [None]:
catbar(encounters_df, 'HOSP_DISCHARGE_DISPOSITION', graph=False) ## Set graph=True for Bar graph

In [None]:
catbar(encounters_df, 'ED_DISPOSITION', graph=False) ## Set graph=True for Bar graph

In [None]:
catbar(encounters_df, 'DEPARTMENT_SPECIALTY', graph=False) ## Set graph=True for Bar graph

In [None]:
catbar(encounters_df, 'LOCATION', graph=False) ## Set graph=True for Bar graph

### ENCOUNTER_DIAGNOSES

In [None]:
encounter_diagnoses_df = pd.read_csv('Data/Encounter_Diagnoses.csv')
encounter_diagnoses_df

In [None]:
missingness(encounter_diagnoses_df)

In [None]:
occurrence_stats(encounter_diagnoses_df, 'IP_ENC_ID')

In [None]:
dateline(encounter_diagnoses_df, 'DIAGNOSIS_DATE')

In [None]:
catbar(encounter_diagnoses_df, 'PRESENT_ON_ADMISSION', graph=False) ## Set graph=True for Bar graph

In [None]:
catbar(encounter_diagnoses_df, 'ADMISSION_DIAGNOSIS_FLAG', graph=False) ## Set graph=True for Bar graph

In [None]:
catbar(encounter_diagnoses_df, 'HOSPITAL_FINAL_DIAGNOSIS', graph=False) ## Set graph=True for Bar graph

In [None]:
catbar(encounter_diagnoses_df, 'PRIMARY_DIAGNOSIS_FLAG', graph=False) ## Set graph=True for Bar graph

### PROCEDURES

In [None]:
procedures_df = pd.read_csv('Data/Procedures.csv')
procedures_df

In [None]:
missingness(procedures_df)

In [None]:
occurrence_stats(procedures_df, 'IP_ENC_ID')

In [None]:
dateline(procedures_df, 'PROCEDURE_DATE')

In [None]:
catbar(procedures_df, 'PROCEDURE_DESCRIPTION', graph=False) ## Set graph=True for Bar graph

In [None]:
catbar(procedures_df, 'PROCEDURE_CODE', graph=False) ## Set graph=True for Bar graph

In [None]:
catbar(procedures_df, 'PROCEDURE_TYPE', graph=False) ## Set graph=True for Bar graph

### FLOWSHEET_VITALS

In [None]:
flowsheet_vitals_df = pd.read_csv('Data/Flowsheet_Vitals.csv')
flowsheet_vitals_df

In [None]:
missingness(flowsheet_vitals_df)

In [None]:
occurrence_stats(flowsheet_vitals_df, 'IP_ENC_ID')

In [None]:
dateline(flowsheet_vitals_df, 'VITAL_SIGN_TAKEN_TIME')

In [None]:
catbar(flowsheet_vitals_df, 'VITAL_SIGN_TYPE', graph=False) ## Set graph=True for Bar graph

In [None]:
flow_stats(flowsheet_vitals_df)

### LABS

In [None]:
labs_df = pd.read_csv('Data/Labs.csv')
labs_df

In [None]:
missingness(labs_df)

In [None]:
occurrence_stats(labs_df, 'IP_ORDER_PROC_ID')

In [None]:
dateline(labs_df, 'ORDER_TIME')

In [None]:
catbar(labs_df, 'PROCEDURE_CODE', graph=False) ## Set graph=True for Bar graph

In [None]:
catbar(labs_df, 'COMPONENT_NAME', graph=False) ## Set graph=True for Bar graph

In [None]:
catbar(labs_df, 'PROCEDURE_DESCRIPTION', graph=False) ## Set graph=True for Bar graph

In [None]:
lab_stats(labs_df, top=10)

### MEDICATIONS

In [None]:
medications_df = pd.read_csv('Data/Medications.csv')
medications_df

In [None]:
missingness(medications_df)

In [None]:
occurrence_stats(medications_df, 'IP_ORDER_MED_ID')

In [None]:
dateline(medications_df, 'ORDER_DATE')

In [None]:
catbar(medications_df, 'EPIC_MEDICATION_NAME', graph=False) ## Set graph=True for Bar graph

In [None]:
catbar(medications_df, 'MEDISPAN_GENERIC_NAME', graph=False) ## Set graph=True for Bar graph

In [None]:
catbar(medications_df, 'MEDISPAN_CLASS_NAME', graph=False) ## Set graph=True for Bar graph