In [1]:
import pandas as pd
import numpy as np
import plotly.express as px
from lib.DrugUtil import flatten, DrugUtil

import plotly.express as px
from lib.FhirDataUtil import FhirDataUtil
from lib.CCSDataUtil import CCSDataUtil

def printmd(string):
    display(string)

%load_ext autoreload
%autoreload 2

pd.set_option('display.max_columns', 50)
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_colwidth', None)

_fhir_conditions_file = "input/fhir_conditions_20221003.csv"
_fhir_medications_file = "input/fhir_medications_20221003.csv"
_fhir_medications_ref_file = "input/fhir_medications_references_20221003.csv"
_fhir_demographics_file = "input/fhir_demographics_20221113.csv"
_output_dir = 'output/'

_rxnorm_file = "input_static/RxTerms202203/RxTerms202203.txt"
_rxnorm_ingredients_file = "input_static/RxTerms202203/RxTermsIngredients202203.txt"
_rxclass_file = "input_static/_rxclass_2022-04-10.csv"

_ccs_demographics_file = "input/ccs_demographics.csv"
_ccs_medications_file = "input/ccs_medications.csv"
_ccs_conditions_file = "input/ccs_conditions.csv"

_pc_demographics_file = "input/pcornet/demographic_fhir.csv"
_pc_medications_file = "input/pcornet/med_adm_fhir.csv"


In [2]:
drugUtil = DrugUtil()
drugUtil.load(rxnorm_file=_rxnorm_file,
                        rxnorm_ingredients_file=_rxnorm_ingredients_file, 
                        rxclass_file=_rxclass_file)

fhirUtil = FhirDataUtil()
ccsUtil = CCSDataUtil(drugUtil)

fhirUtil.load_conditions(fhir_conditions_file=_fhir_conditions_file, remove_health_concerns=True)
fhirUtil.load_demographics(fhir_demographics_file=_fhir_demographics_file)

ccsUtil.load_conditions(ccs_conditions_file= _ccs_conditions_file)
ccsUtil.load_demographics(_ccs_demographics_file)


Reading rxclass file...
Reading rxnorm file...
Loaded FHIR conditions records: 37248


  self.conditions = pd.read_csv(ccs_conditions_file, delimiter=',')


Loaded CCS conditions records: 94735
Loaded demographics file with entries:  98877


  self.demographics = pd.read_csv(ccs_demographics)


In [3]:
snomed = fhirUtil.conditions[fhirUtil.conditions['coding_system'] == 'SNOMED']
print(f'Number of snomeds: {len(snomed)}')
icd10 = fhirUtil.conditions[fhirUtil.conditions['coding_system'] == 'ICD10']
print(f'Number of ICD10s: {len(icd10)}')


Number of snomeds: 19850
Number of ICD10s: 17398


In [4]:
pc_cond = pd.read_csv('input/pcornet/diagnosis_fhir.csv')
pc_demo = pd.read_csv('input/pcornet/demographic_fhir.csv')

  pc_cond = pd.read_csv('input/pcornet/diagnosis_fhir.csv')


In [70]:
fhir_cond = icd10
fhir_demo = fhirUtil.demographics
ccs_cond = ccsUtil.conditions
ccs_demo = ccsUtil.demographics

# Common Users

In [10]:
print("Comparision of FHIR to PC")
common_users = fhir_demo.drop_duplicates("user_id").merge(pc_demo.drop_duplicates("user_id"), on="user_id", how="outer", indicator="demo_merge1")
print(common_users['demo_merge1'].value_counts())
common_users = common_users[common_users['demo_merge1'] == 'both']

print("Comparison of CCS To others")
common_users = common_users.merge(ccs_demo.drop_duplicates("user_id"), on="user_id", how="outer", indicator="demo_merge2")
print(common_users['demo_merge2'].value_counts())

common_users = common_users[common_users['demo_merge2'] == 'both'].drop_duplicates("user_id")
print('Total common users: ', len(common_users))

Comparision of FHIR to PC
left_only     844
both          618
right_only     11
Name: demo_merge1, dtype: int64
Comparison of CCS To others
right_only    98260
both            616
left_only         2
Name: demo_merge2, dtype: int64
Total common users:  616


# Filter FHIR users (run if want to look at subset)

In [73]:
site_config = {
    'UCSFHEALTHPARTN': 'openepic_shared_ucsf_health',
    'UT1PARTNER': 'openepic_shared_university_of_utah_healthcare',
    'NY1PARTNER': 'openepic_shared_nyu_langone_medical_center',
    'SH2PARTNER': 'openepic_shared_sutter_health',
    'OS2PARTNER': 'openepic_shared_ochsner_health_system',
    'MF1PARTNER': 'openepic_shared_montefiore_medical_center',
    'BA1PARTNER': 'openepic_shared_baylor_scott_white'
}
pc_cond['site'] = pc_cond['siteid']
pc_cond = pc_cond.replace({'site': site_config})

FHIR conditions all:  17398


In [88]:
# get users common to both datasets
common_users_fp = fhir_demo.drop_duplicates("user_id").merge(pc_demo.drop_duplicates("user_id"), on='user_id', how="inner")

print('FHIR conditions all: ', len(fhir_cond), ' users:', len(fhir_cond.drop_duplicates("user_id")))
print('PCORNet conditions all: ', len(pc_cond), ' users:', len(pc_cond.drop_duplicates("user_id")))
fhir_cond_fp = fhir_cond[fhir_cond['user_id'].isin(common_users_fp['user_id'])]
pc_cond_fp = pc_cond[pc_cond['user_id'].isin(common_users_fp['user_id'])]
print("-------- KEEPING ONLY USERS IN BOTH DATASETS -------")
print('FHIR conditions all: ', len(fhir_cond_fp), ' users:', len(fhir_cond_fp.drop_duplicates("user_id")))
print('PCORNet conditions all: ', len(pc_cond_fp), ' users:', len(pc_cond_fp.drop_duplicates("user_id")))
print("-------- KEEPING ONLY SITES IN PCORNET -------")
fhir_cond_fp = fhir_cond_fp[fhir_cond_fp['provider'].isin(site_config.values())]
print('FHIR conditions all: ', len(fhir_cond_fp), ' users:', len(fhir_cond_fp.drop_duplicates("user_id")))
print("-------- KEEPING ONLY ONE SITE PER USER (PCORNET SITE) -------")
common_records = fhir_cond_fp.merge(pc_cond_fp, on=['provider', 'user_id'], how="inner")
common_records


FHIR conditions all:  17398  users: 1144
PCORNet conditions all:  120358  users: 622
-------- KEEPING ONLY USERS IN BOTH DATASETS -------
FHIR conditions all:  9535  users: 600
PCORNet conditions all:  118373  users: 611
-------- KEEPING ONLY SITES IN PCORNET -------
FHIR conditions all:  8348  users: 531
-------- KEEPING ONLY ONE SITE PER USER (PCORNET SITE) -------


KeyError: 'provider'

In [72]:
fhir_cond['provider'].value_counts()

openepic_shared_sutter_health                                              4008
openepic_shared_ucsf_health                                                1999
openepic_shared_university_of_utah_healthcare                              1929
openepic_shared_nyu_langone_medical_center                                 1231
openepic_shared_ochsner_health_system                                      1078
openepic_shared_baylor_scott_white                                          474
openepic_shared_montefiore_medical_center                                   461
openepic_shared_john_muir_health                                            303
openepic_shared_stanford_health_care                                        286
openepic_shared_brown_toland_physicians                                     284
openepic_shared_kaiser_permanente_california_northern                       211
openepic_shared_texas_health_resources                                      171
openepic_shared_penn_medicine           

# Run Comparison Tables

In [80]:
comparison_config = [
    {
        'title': 'Diabetes', 
        'codes': ['E10','E11'], 
        'ccs_field': 'diabetes', 
        'ccs_yes': [1], 
        'ccs_no': [2]
    },
#     {
#         'title': 'Diabetes General', 
#         'codes': ['E08','E09','E10','E11','E13'], 
#         'ccs_field': 'diabetes', 
#         'ccs_yes': [1], 
#         'ccs_no': [2]
#     },
    {
        'title': 'Hypertension', 
        'codes': ['I10'], 
        'ccs_field': 'hbp', 
        'ccs_yes': [1], 
        'ccs_no': [2]
    },
#     {
#         'title': 'Hypertension General', 
#         'codes': ['I10', 'I11'], 
#         'ccs_field': 'hbp', 
#         'ccs_yes': [1], 
#         'ccs_no': [2]
#     },
    {
        'title': 'Myocardial Infarction',
        'codes': ['I21','I22','I24'],
        'ccs_field': 'heart_attack',
        'ccs_yes': [1], 'ccs_no': [2]
    },
#     {
#         'title': 'Myocardial Infarction General',
#         'codes': ['I20.0', 'I21', 'I22', 'I24', 'I25.2', 'I25.5'],
#         'ccs_field': 'heart_attack',
#         'ccs_yes': [1], 'ccs_no': [2]
#     },
    {
        'title': 'Coronary Artery Disease',
        'codes': ['I20','I21','I22','I23','I24','I25'],
        'ccs_field': 'blockages_in_your_coronary',
        'ccs_yes': [1], 'ccs_no': [2]
    },
    {
        'title': 'Heart Failure',
        'codes': ['I50'],
        'ccs_field': 'chf',
        'ccs_yes': [1], 'ccs_no': [2]
    },
#     {
#         'title': 'Heart Failure General',
#         'codes': ['I11.0','I13.0','I50','I97.13'],
#         'ccs_field': 'chf',
#         'ccs_yes': [1], 'ccs_no': [2]
#     },
    {
        'title': 'Stroke',
        'codes': ['I63'],
        'ccs_field': 'stroke',
        'ccs_yes': [1], 'ccs_no': [2]
    },
    {
        'title': 'AFib',
        'codes': ['I48'],
        'ccs_field': 'afib',
        'ccs_yes': [1], 'ccs_no': [2]
    },
    {
        'title': 'Sleep Apnea',
        'codes': ['G47.3'],
        'ccs_field': 'sleep_apnea',
        'ccs_yes': [1], 'ccs_no': [2]
    },
    {
        'title': 'COPD',
        'codes': ['J44'],
        'ccs_field': 'copd',
        'ccs_yes': [1], 'ccs_no': [2]
    },
    {
        'title': 'Asthma',
        'codes': ['J45'],
        'ccs_field': 'asthma',
        'ccs_yes': [1], 'ccs_no': [2]
    },
    {
        'title': 'Cancer',
        'codes': ['C'],
        'ccs_field': 'cancer',
        'ccs_yes': [1], 'ccs_no': [2]
    },
    {
        'title': 'Immunodeficiency',
        'codes': ['D80', 'D81', 'D82', 'D83', 'D84', 'D70', 'O98.7', 'Z21', 'B20', 'B97.35'],
        'ccs_field': 'immunodeficiency',
        'ccs_yes': [1], 'ccs_no': [2]
    },
    {
        'title': 'HIV',
        'codes': ['Z21', 'B20', 'B97.35', 'O98.7'],
        'ccs_field': 'HIV',
        'ccs_yes': [1], 'ccs_no': [2]
    },
    {
        'title': 'Anemia',
        'codes': ['D50','D51','D52','D53.0','D53.2','D53.9','D64.9'],
        'ccs_field': 'anemia',
        'ccs_yes': [1], 'ccs_no': [2]
    },
#     {
#         'title': 'Anemia General',
#         'codes': ['D46.0','D46.1','D46.4','D46.2','D50','D51','D52','D53.0','D53.2','D53.9','D58.9','D59','D61','D55','D63','D64.9', 'D64.81', 'D64.1', 'D64.2'],
#         'ccs_field': 'anemia',
#         'ccs_yes': [1], 'ccs_no': [2]
#     }
]
# ['diabetes', 'hbp', 'blockages_in_your_coronary', 'heart_attack', 'chf', 'stroke', 'afib', 'sleep_apnea', 'copd', 'asthma', 'cancer', 'immunodeficiency', 'HIV', 'anemia', 'pregnant']



In [81]:
def compare(ccs_cond, 
                     fhir_cond, 
                     pc_cond, 
                     search_icd10_codes, 
                     common_users,
                     ccs_field, 
                     ccs_yes_values=[1], 
                     ccs_no_values=[2], 
                     condition_title=None, 
                     verbose=True):
    fhir_filter = fhir_cond['coding_code'].str.startswith(tuple(search_icd10_codes))
    fhir_cond_s = fhir_cond[fhir_filter].drop_duplicates("user_id")
    
    pc_filter = pc_cond['dx'].str.startswith(tuple(search_icd10_codes))
    pc_cond_s = pc_cond[pc_filter].drop_duplicates("user_id")
    
    ccs_filter = ccs_cond[ccs_field].isin(ccs_yes_values)
    ccs_cond_s = ccs_cond[ccs_filter].drop_duplicates('user_id')
    
    fp_all = fhir_cond_s.merge(pc_cond_s, on="user_id", how="inner")
    
    fp_any = pd.concat([fhir_cond_s['user_id'], pc_cond_s['user_id']]).drop_duplicates()
    
    
    fp_none = common_users[~common_users['user_id'].isin(fp_any)]
    
    return { 'fp_all': len(fp_all),
            'fp_any': len(fp_any),
            'fhir': len(fhir_cond_s),
            'pc': len(pc_cond_s),
            'fp_none': len(fp_none),
    }

fhir_cond_c = fhir_cond[fhir_cond['user_id'].isin(common_users_fp['user_id'])].drop_duplicates(['user_id', 'coding_code'])
pc_cond_c = pc_cond[pc_cond['user_id'].isin(common_users_fp['user_id'])].drop_duplicates(['user_id', 'dx'])
ccs_cond_c = ccs_cond[ccs_cond['user_id'].isin(common_users['user_id'])]
print("test run...")
compare(ccs_cond_c, fhir_cond_c, pc_cond_c, ['E10', 'E11'], common_users, 'diabetes', [1], [2])
    

test run...


{'fp_all': 46, 'fp_any': 82, 'fhir': 57, 'pc': 71, 'fp_none': 535}

In [84]:
result = []
for config in comparison_config:
    print('Processing ', config['title'])
    comp = compare(ccs_cond_c, fhir_cond_c, pc_cond_c, config['codes'], common_users, config['ccs_field'], [1], [2])

    resultRow = {
                'Diagnosis Group': config['title'],
                'FHIR Only': str(comp['fhir']) + ' (' + str(round((comp['fhir']*100)/len(common_users))) + '%)',
                'PC Only': str(comp['pc']) + ' (' + str(round((comp['pc']*100)/len(common_users))) + '%)',
                'Both': str(comp['fp_all']) + ' (' + str(round((comp['fp_all']*100)/len(common_users))) + '%)',
                'Either': str(comp['fp_any']) + ' (' + str(round((comp['fp_any']*100)/len(common_users))) + '%)',
                'Neither': comp['fp_none'],
                }
    result.append(resultRow)
result = pd.DataFrame(result)

Processing  Diabetes
Processing  Hypertension
Processing  Myocardial Infarction
Processing  Coronary Artery Disease
Processing  Heart Failure
Processing  Stroke
Processing  AFib
Processing  Sleep Apnea
Processing  COPD
Processing  Asthma
Processing  Cancer
Processing  Immunodeficiency
Processing  HIV
Processing  Anemia


In [85]:
print(f'     Diagnosis of {len(common_users)} patients with diagnosis present in:')
grey_columns = [c for c in result.columns if c.startswith('%')]
red_columns = ['Neither']
def highlight_cols(x):
    df = x.copy()
    df.loc[:, :] = 'background-color: none'
    df[grey_columns] = 'color: grey'
    df[red_columns] = 'color: red'
    return df 
display(result.style.apply(highlight_cols, axis = None))

     Diagnosis of 616 patients with diagnosis present in:


Unnamed: 0,Diagnosis Group,FHIR Only,PC Only,Both,Either,Neither
0,Diabetes,57 (9%),71 (12%),46 (7%),82 (13%),535
1,Hypertension,194 (31%),213 (35%),161 (26%),246 (40%),371
2,Myocardial Infarction,8 (1%),8 (1%),3 (0%),13 (2%),603
3,Coronary Artery Disease,37 (6%),60 (10%),31 (5%),66 (11%),551
4,Heart Failure,10 (2%),22 (4%),9 (1%),23 (4%),594
5,Stroke,4 (1%),5 (1%),2 (0%),7 (1%),609
6,AFib,30 (5%),29 (5%),21 (3%),38 (6%),579
7,Sleep Apnea,89 (14%),99 (16%),67 (11%),121 (20%),496
8,COPD,17 (3%),22 (4%),12 (2%),27 (4%),590
9,Asthma,79 (13%),98 (16%),62 (10%),115 (19%),502


# PcorNet

In [6]:
pc_cond = pd.read_csv('input/pcornet/diagnosis_fhir.csv')
pc_demo = pd.read_csv('input/pcornet/demographic_fhir.csv')

  pc_cond = pd.read_csv('input/pcornet/diagnosis_fhir.csv')


In [9]:
fhir_demo = fhirUtil.demographics.drop_duplicates(subset="user_id")
fhir_cond = fhirUtil.conditions[fhirUtil.conditions['coding_system'] == 'ICD10']

In [12]:
ccs_cond = ccsUtil.conditions

In [10]:
m_demo = pc_demo.drop_duplicates(subset="user_id").merge(fhir_demo.drop_duplicates(subset="user_id"), on="user_id", indicator=True, how="outer")
common_users = m_demo[m_demo['_merge'] == 'both']
m_demo['_merge'].value_counts()


right_only    844
both          618
left_only      11
Name: _merge, dtype: int64

In [69]:
print('Common total fhir users:', len(fhir_demo[fhir_demo['user_id'].isin(common_users['user_id'])].drop_duplicates('user_id')))
print('Common total pcornet users:', len(pc_demo[pc_demo['user_id'].isin(common_users['user_id'])].drop_duplicates('user_id')))
fhir_cond_pc = fhir_cond[fhir_cond['user_id'].isin(common_users['user_id'])]
pc_cond_fhir = pc_cond[pc_cond['user_id'].isin(common_users['user_id'])]
print('Common fhir users with conditions: ', len(fhir_cond_pc.drop_duplicates('user_id')))
print('Common Pcornet users with conditions:', len(pc_cond_fhir.drop_duplicates('user_id')))

Common total fhir users: 618
Common total pcornet users: 618
Common fhir users with conditions:  600
Common Pcornet users with conditions: 611


In [102]:
def compare(left, right, codes, left_field, right_field, verbose=True):
#         {
#         'title': 'Diabetes', 
#         'codes': ['E10','E11'], 
#         'ccs_field': 'diabetes', 
#         'ccs_yes': [1], 
#         'ccs_no': [2]
#     },
    
    left_filter = left[left_field].str.startswith(tuple(codes))
    left_s = left[left_filter].drop_duplicates(subset='user_id')
    left_no_s = common_users[~common_users["user_id"].isin(left_s['user_id'])].drop_duplicates(subset='user_id')
    
    right_filter = right[right_field].str.startswith(tuple(codes))
    right_s = right[right_filter].drop_duplicates(subset="user_id")
    right_no_s = common_users[~common_users["user_id"].isin(right_s['user_id'])].drop_duplicates(subset='user_id')
    
    m = left_s.merge(right_s, on="user_id", how="outer", indicator=True)
    
    left_no_s.drop('_merge', axis=1, inplace=True)
    right_no_s.drop('_merge', axis=1, inplace=True)
    m_no = left_no_s.merge(right_no_s, on="user_id", how="outer", indicator=True)
    
    both_positive = m[m['_merge']=='both']
    left_only = m[m['_merge'] == 'left_only']
    right_only = m[m['_merge'] == 'right_only']
    
    both_negative = m_no[m_no['_merge'] == 'both']
    
    r = {
        'both_len': len(both_positive),
        'left_only_len': len(left_only),
        'right_only_len': len(right_only),
        'both_negative_len': len(both_negative),
        'total_yes': len(both_positive)+len(left_only)+len(right_only),
        'total_no': len(left_only)+len(right_only)+len(both_negative)
    }
    return r
    
    
result = []
for config in comparison_config:
    print('Processing ', config['title'])
    comp = compare(
                               left=fhir_cond_pc,
                               right=pc_cond_fhir,
                               codes=config['codes'], 
                               left_field= 'coding_code',
                               right_field='dx',
                               verbose=False)

    resultRow = {
    'title': config['title'],
    'Both Pos': comp['both_len'],
    'CF %': round((comp['both_len']*100)/comp['total_yes']),
    'FHIR Pos': comp['left_only_len'],
    'C %': round((comp['left_only_len']*100)/comp['total_yes']),
    'PCORNET Pos': comp['right_only_len'],
    'F %': round((comp['right_only_len']*100)/comp['total_yes']),
    'Both Negative': comp['both_negative_len'],
    'BN %': round((comp['both_negative_len']*100)/(comp['total_no']))
    }
    result.append(resultRow)
result_df = pd.DataFrame(result)

grey_columns = ['CF %', 'F %', 'C %', 'BN %']
red_columns = ['FHIR Pos', 'PCORNET Pos']
green_columns = ['Both Pos', 'Both Negative']
def highlight_cols(x):
    df = x.copy()
    df.loc[:, :] = 'background-color: none'
    df[grey_columns] = 'color: grey'
    df[red_columns] = 'color: red'
    df[green_columns] = 'color: green'
    return df 
display(result_df.style.apply(highlight_cols, axis = None))

# result_df


Processing  Diabetes
Processing  Diabetes General
Processing  Hypertension
Processing  Hypertension General
Processing  Myocardial Infarction
Processing  Myocardial Infarction General
Processing  Coronary Artery Disease
Processing  Heart Failure
Processing  Heart Failure General
Processing  Stroke
Processing  AFib
Processing  Sleep Apnea
Processing  COPD
Processing  Asthma
Processing  Cancer
Processing  Immunodeficiency
Processing  HIV
Processing  Anemia
Processing  Anemia General


Unnamed: 0,title,Both Pos,CF %,FHIR Pos,C %,PCORNET Pos,F %,Both Negative,BN %
0,Diabetes,46,56,11,13,25,30,536,94
1,Diabetes General,47,57,10,12,25,30,536,94
2,Hypertension,161,65,33,13,52,21,372,81
3,Hypertension General,161,65,33,13,54,22,370,81
4,Myocardial Infarction,3,23,5,38,5,38,605,98
5,Myocardial Infarction General,6,27,5,23,11,50,596,97
6,Coronary Artery Disease,31,47,6,9,29,44,552,94
7,Heart Failure,9,39,1,4,13,57,595,98
8,Heart Failure General,9,39,1,4,13,57,595,98
9,Stroke,2,29,2,29,3,43,611,99


In [70]:
# Check dataset
fhir_cond_pc
pc_cond_fhir

Unnamed: 0,v1,user_id,diagnosisid,encounterid,enc_type,admit_date,dx,dx_type,dx_date,dx_source,pdx,datamartid,siteid,query_date,cdrn_facilityid
0,26,502639,{C21B53E3-3272-44D4-B1F8-78F25CC597AC},1038398261,OA,14jun2020,J31.0,10,14jun2020,FI,S,SUTTER,SH2PARTNER,14jun2022,
1,26,502639,{59185F2E-E7D9-46E3-A311-FAC5D67022FE},1136833765,OA,23nov2021,M79.18,10,23nov2021,FI,S,SUTTER,SH2PARTNER,14jun2022,
2,26,502639,{C9E93863-78D8-4CDC-90F5-D7C65E02D443},892747432,AV,19jan2018,Z23,10,19jan2018,FI,S,SUTTER,SH2PARTNER,14jun2022,
3,26,502639,{A5730640-15ED-411F-BD29-6181EB5C662D},936074887,AV,14sep2018,R92.2,10,14sep2018,FI,S,SUTTER,SH2PARTNER,14jun2022,
4,26,502639,{7F719426-21A7-463B-84F0-1A29012FF1C3},1072911350,OA,30dec2020,I49.9,10,30dec2020,FI,S,SUTTER,SH2PARTNER,14jun2022,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
120339,48641,547213,842348168,UTAH10581382,AV,10aug2017,Z00.00,10,,FI,P,C4UU,UT1PARTNER,13jun2022,
120340,48641,547213,548738098,UTAH11794463,AV,18dec2017,J01.00,10,,FI,P,C4UU,UT1PARTNER,13jun2022,
120341,48641,547213,25928655,UTAH10581382,AV,10aug2017,Z12.4,10,,FI,S,C4UU,UT1PARTNER,13jun2022,
120342,48641,547213,873757065,UTAH21035279,AV,03mar2021,Z23,10,,FI,P,C4UU,UT1PARTNER,13jun2022,


In [93]:
pc_users = pc_cond_fhir[pc_cond_fhir['dx'].str.startswith(('E10', 'E11'))].drop_duplicates(subset="user_id")
fhir_users = fhir_cond_pc[fhir_cond_pc['coding_code'].str.startswith(('E10', 'E11'))].drop_duplicates(subset="user_id")

In [96]:
len(fhir_users[~fhir_users['user_id'].isin(pc_users['user_id'])])

11

In [87]:
len(common_users[~common_users['user_id'].isin((*pc_users, *fhir_users))])

536

In [103]:
len([1647, 16853, 102891, 117086, 200552, 302382, 324260, 324726, 324961, 331019, 331110, 331135, 331300, 331450, 331467, 331515, 331920, 333749, 344968, 357029, 383858, 386617, 387173, 387417, 388923, 389802, 395485, 395659, 395756, 397160, 399087]
)

31