In [30]:
import pandas as pd
import numpy as np
import plotly.express as px
from lib.DrugUtil import flatten, DrugUtil

import plotly.express as px
from lib.FhirDataUtil import FhirDataUtil
from lib.CCSDataUtil import CCSDataUtil
from IPython.display import display, HTML


def printmd(string):
    display(string)

%load_ext autoreload
%autoreload 2

pd.set_option('display.max_columns', 50)
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_colwidth', None)

_output_dir = 'output/'

_rxnorm_file = "input_static/RxTerms202203/RxTerms202203.txt"
_rxnorm_ingredients_file = "input_static/RxTerms202203/RxTermsIngredients202203.txt"
_rxclass_file = "input_static/_rxclass_2022-04-10.csv"

_ccs_demographics_file = "input/ccs/ccs_demographics.csv"
_ccs_medications_file = "input/ccs/ccs_medications.csv"
_ccs_conditions_file = "input/ccs/ccs_conditions.csv"

_fhir_demographics_file = "input/fhir_20230423/fhir_demographics.csv"

_pc_demographics_file = "input/pcornet/demographic_fhir.csv"
_pc_conditions_file = "input/pcornet2/diagnosis_fhir_v2.csv"

_cache_dir = 'cache/'

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [32]:
fhirUtil = FhirDataUtil()
drugUtil = DrugUtil()
drugUtil.load(rxnorm_file=_rxnorm_file,
                        rxnorm_ingredients_file=_rxnorm_ingredients_file, 
                        rxclass_file=_rxclass_file)

ccsUtil = CCSDataUtil(drugUtil)
ccsUtil.load_demographics(_ccs_demographics_file)
ccs_demo = ccsUtil.demographics
ccsUtil.load_conditions(_ccs_conditions_file)
ccs_cond = ccsUtil.conditions

fhirUtil.load_demographics(fhir_demographics_file=_fhir_demographics_file)
fhir_demo = fhirUtil.demographics

pc_demo = pd.read_csv(_pc_demographics_file, low_memory=False)
pc_cond = pd.read_csv(_pc_conditions_file, low_memory=False)


Reading rxclass file...
Reading rxnorm file...
Loaded demographics file with entries:  99064


  self.demographics = pd.read_csv(ccs_demographics)
  self.conditions = pd.read_csv(ccs_conditions_file, delimiter=',')


Loaded CCS conditions records: 95301


In [33]:
# Load PCORnet conditions

pc_cond['dx_date_d'] = pd.to_datetime(pc_cond['dx_date'], format="%d%b%Y", errors="coerce").fillna(pd.to_datetime(pc_cond['admit_date'], format="%d%b%Y", errors="coerce"))
# fhir_enc_raw['start_d'] = pd.to_datetime(fhir_enc_raw['start'], format="%Y-%m-%dT%H:%M:%SZ", errors="coerce").fillna(pd.to_datetime(fhir_enc_raw['start'], format="%Y-%m-%d", errors="coerce"))

site_config = {
    'UCSFHEALTHPARTN': 'openepic_shared_ucsf_health',
    'UT1PARTNER': 'openepic_shared_university_of_utah_healthcare',
    'NY1PARTNER': 'openepic_shared_nyu_langone_medical_center',
    'SH2PARTNER': 'openepic_shared_sutter_health',
    'OS2PARTNER': 'openepic_shared_ochsner_health_system',
    'MF1PARTNER': 'openepic_shared_montefiore_medical_center',
    'BA1PARTNER': 'openepic_shared_baylor_scott_white'
}
pc_cond['site'] = pc_cond['siteid']
pc_cond = pc_cond.replace({'site': site_config})

pc_demo['provider'] = pc_demo['siteid']
pc_demo = pc_demo.replace({'provider': site_config})


In [5]:

ccs_demo.drop_duplicates('user_id').merge(fhir_demo.drop_duplicates('user_id'), on='user_id', how="outer", indicator=True)['_merge'].value_counts()

left_only     97508
both           1555
right_only       48
Name: _merge, dtype: int64

In [6]:

ccs_demo.drop_duplicates('user_id').merge(pc_demo.drop_duplicates('user_id'), on='user_id', how="outer", indicator=True)['_merge'].value_counts()


left_only     98436
both            627
right_only        2
Name: _merge, dtype: int64

In [8]:
fhir_demo.drop_duplicates('user_id').merge(pc_demo.drop_duplicates('user_id'), on='user_id', how="outer", indicator=True)['_merge'].value_counts()


left_only     985
both          618
right_only     11
Name: _merge, dtype: int64

In [7]:
ccs_demo['user_id'].nunique()

99063

In [20]:
fhir_demo['user_id'].nunique()

1603

In [25]:
c = fhir_demo.drop_duplicates(['user_id','provider']).groupby(['provider'])['user_id'].count().reset_index(name="count")
c.sort_values('count', ascending=False).head(15)

Unnamed: 0,provider,count
155,openepic_shared_sutter_health,272
173,openepic_shared_ucsf_health,234
189,openepic_shared_university_of_utah_healthcare,166
111,openepic_shared_nyu_langone_medical_center,130
113,openepic_shared_ochsner_health_system,73
74,openepic_shared_kaiser_permanente_california_northern,37
17,openepic_shared_baylor_scott_white,36
100,openepic_shared_montefiore_medical_center,36
154,openepic_shared_stanford_health_care,35
72,openepic_shared_john_muir_health,32


In [35]:
pc_demo.groupby('provider')['user_id'].count()

provider
openepic_shared_baylor_scott_white                27
openepic_shared_montefiore_medical_center         31
openepic_shared_nyu_langone_medical_center       112
openepic_shared_ochsner_health_system             56
openepic_shared_sutter_health                    111
openepic_shared_ucsf_health                      156
openepic_shared_university_of_utah_healthcare    136
Name: user_id, dtype: int64