In [1]:
# Add any needed packages
%load_ext autoreload
%autoreload 2

from google.cloud import bigquery
import networkx as nx
from collections import defaultdict
from pyvis.network import Network
from tqdm import tqdm
from scipy.interpolate import interp1d
import numpy as np

In [2]:
CURATION_PROJECT_ID = ""
EHR_OPS_DATASET_ID = ""

In [3]:
client = bigquery.Client(project=CURATION_PROJECT_ID)



### Relevant Concepts 

#### Concepts (including Spirometry) 

In [4]:
copd_measurement_concept_ids = [
    #37024761, # FEV1/FEVC
    4147814, # VC, Vital capacity, above this would be dynamic lung measures 
    4090320, # Respiratory observation functions (should include DLCO)
    30080905, #Diffusion Capacity for the lung (loinc  ?) -- not categorized? 
    
    #4058335 #Radiology Procedure CT Scan of Chest 

    4090335, #FVC
    44790324, #Diffusion Capacity for the lung (snomed ?)
    44790324, #Diffusion Capacity for the lung (snomed ?)

    4239808, # TLC, Total lung capacity, above this would be static lung measures 
    4227420, # RC, Residual volume, expiratory residual volume (snomed)
    3025560, # RC, Residual volume, expiratory residual volume (loinc), w/ 37061829 above it classifier
    
    
    
   
    
    #
    #Other 
]

### Queries 

In [5]:
q = f"""
    
WITH RECURSIVE 
  base_concepts AS (
    SELECT
      *
    FROM `aou-res-curation-prod.ehr_ops.concept` c
    WHERE c.concept_id IN ({", ".join(map(str, copd_measurement_concept_ids))})
  ),
  concept_children AS (
      SELECT
          concept_id child_concept_id, NULL parent_concept_id
      FROM base_concepts
      UNION ALL
      SELECT cr.concept_id_1 child_concept_id, cr.concept_id_2 parent_concept_id
      FROM concept_children par
      JOIN `{CURATION_PROJECT_ID}.{EHR_OPS_DATASET_ID}.concept_relationship` cr
          ON cr.concept_id_2 = par.child_concept_id
              AND cr.relationship_id = 'Is a'
  )
  SELECT DISTINCT
    child_concept_id, child_concept.concept_name child_concept_name, child_concept.domain_id child_concept_domain,
    parent_concept_id, parent_concept.concept_name parent_concept_name, parent_concept.domain_id parent_concept_domain
  FROM concept_children cc
  JOIN `{CURATION_PROJECT_ID}.{EHR_OPS_DATASET_ID}.concept` child_concept
    ON child_concept.concept_id = cc.child_concept_id
  JOIN `{CURATION_PROJECT_ID}.{EHR_OPS_DATASET_ID}.concept` parent_concept
    ON parent_concept.concept_id = cc.parent_concept_id
"""

results = client.query(q).to_dataframe()
results


Unnamed: 0,child_concept_id,child_concept_name,child_concept_domain,parent_concept_id,parent_concept_name,parent_concept_domain
0,4197936,Expired carbon dioxide concentration,Measurement,4090658,Expired gas concentration,Measurement
1,44813867,Target forced expired volume in 1 second,Measurement,4241837,Forced expired volume in 1 second,Measurement
2,4147723,Expected forced expired volume in 1 second,Measurement,4241837,Forced expired volume in 1 second,Measurement
3,4294875,Peak expiratory flow rate pre steroids,Measurement,4087260,Peak expiratory flow rate,Measurement
4,4265219,Minimum volume,Measurement,4090661,Respiratory volume,Measurement
...,...,...,...,...,...,...
210,4090664,Expiratory vital capacity,Measurement,4147814,Vital capacity,Measurement
211,4092629,Inspiratory vital capacity,Measurement,4147814,Vital capacity,Measurement
212,4092508,Airway conductance,Measurement,4090320,Respiratory measure,Measurement
213,4247649,Maximum expiratory flow-static recoil curve,Measurement,4090320,Respiratory measure,Measurement


In [6]:
# Define Query 1
q = f"""
    SELECT
        t.procedure_occurrence_id event_id, par_c.domain_id domain_id, t.procedure_date event_date,
        par_c.concept_id ancestor_concept_id, par_c.concept_name ancestor_concept_name,
        chd_c.concept_id child_concept_id, chd_c.concept_name child_concept_name
    FROM `{CURATION_PROJECT_ID}.{EHR_OPS_DATASET_ID}.unioned_ehr_procedure_occurrence` t
    JOIN `{CURATION_PROJECT_ID}.{EHR_OPS_DATASET_ID}.concept` chd_c
      ON chd_c.concept_id = t.procedure_concept_id
    JOIN `{CURATION_PROJECT_ID}.{EHR_OPS_DATASET_ID}.concept_ancestor` ca
      ON ca.descendant_concept_id = chd_c.concept_id
    JOIN `{CURATION_PROJECT_ID}.{EHR_OPS_DATASET_ID}.concept` par_c
      ON par_c.concept_id = ca.ancestor_concept_id
        AND par_c.concept_id IN ({", ".join(map(str, copd_measurement_concept_ids))})
        AND par_c.domain_id = 'Procedure'
    UNION ALL
    SELECT
        t.condition_occurrence_id event_id, par_c.domain_id domain_id, t.condition_start_date event_date,
        par_c.concept_id ancestor_concept_id, par_c.concept_name ancestor_concept_name,
        chd_c.concept_id child_concept_id, chd_c.concept_name child_concept_name
    FROM `{CURATION_PROJECT_ID}.{EHR_OPS_DATASET_ID}.unioned_ehr_condition_occurrence` t
    JOIN `{CURATION_PROJECT_ID}.{EHR_OPS_DATASET_ID}.concept` chd_c
      ON chd_c.concept_id = t.condition_concept_id
    JOIN `{CURATION_PROJECT_ID}.{EHR_OPS_DATASET_ID}.concept_ancestor` ca
      ON ca.descendant_concept_id = chd_c.concept_id
    JOIN `{CURATION_PROJECT_ID}.{EHR_OPS_DATASET_ID}.concept` par_c
      ON par_c.concept_id = ca.ancestor_concept_id
        AND par_c.concept_id IN ({", ".join(map(str, copd_measurement_concept_ids))})
        AND par_c.domain_id = 'Condition'
    UNION ALL
    SELECT
        t.observation_id event_id, par_c.domain_id domain_id, t.observation_date event_date,
        par_c.concept_id ancestor_concept_id, par_c.concept_name ancestor_concept_name,
        chd_c.concept_id child_concept_id, chd_c.concept_name child_concept_name
    FROM `{CURATION_PROJECT_ID}.{EHR_OPS_DATASET_ID}.unioned_ehr_observation` t
    JOIN `{CURATION_PROJECT_ID}.{EHR_OPS_DATASET_ID}.concept` chd_c
      ON chd_c.concept_id = t.observation_concept_id
    JOIN `{CURATION_PROJECT_ID}.{EHR_OPS_DATASET_ID}.concept_ancestor` ca
      ON ca.descendant_concept_id = chd_c.concept_id
    JOIN `{CURATION_PROJECT_ID}.{EHR_OPS_DATASET_ID}.concept` par_c
      ON par_c.concept_id = ca.ancestor_concept_id
        AND par_c.concept_id IN ({", ".join(map(str, copd_measurement_concept_ids))})
        AND par_c.domain_id = 'Observation'
    UNION ALL
    SELECT
        t.measurement_id event_id, par_c.domain_id domain_id, t.measurement_date event_date,
        par_c.concept_id ancestor_concept_id, par_c.concept_name ancestor_concept_name,
        chd_c.concept_id child_concept_id, chd_c.concept_name child_concept_name
    FROM `{CURATION_PROJECT_ID}.{EHR_OPS_DATASET_ID}.unioned_ehr_measurement` t
    JOIN `{CURATION_PROJECT_ID}.{EHR_OPS_DATASET_ID}.concept` chd_c
      ON chd_c.concept_id = t.measurement_concept_id
    JOIN `{CURATION_PROJECT_ID}.{EHR_OPS_DATASET_ID}.concept_ancestor` ca
      ON ca.descendant_concept_id = chd_c.concept_id
    JOIN `{CURATION_PROJECT_ID}.{EHR_OPS_DATASET_ID}.concept` par_c
      ON par_c.concept_id = ca.ancestor_concept_id
        AND par_c.concept_id IN ({", ".join(map(str, copd_measurement_concept_ids))})
        AND par_c.domain_id = 'Measurement'
    
"""

In [7]:
copd_measurement_events = client.query(q).to_dataframe()
copd_measurement_events


Unnamed: 0,event_id,domain_id,event_date,ancestor_concept_id,ancestor_concept_name,child_concept_id,child_concept_name
0,11000000032211174,Measurement,2021-10-11,4090320,Respiratory measure,4353938,End tidal carbon dioxide concentration
1,11000000027209259,Measurement,2021-10-11,4090320,Respiratory measure,4353938,End tidal carbon dioxide concentration
2,11000000025102183,Measurement,2021-10-11,4090320,Respiratory measure,4353938,End tidal carbon dioxide concentration
3,11000000032189211,Measurement,2022-12-16,4090320,Respiratory measure,4353938,End tidal carbon dioxide concentration
4,11000000031374408,Measurement,2022-12-16,4090320,Respiratory measure,4353938,End tidal carbon dioxide concentration
...,...,...,...,...,...,...,...
11918064,11000000025100900,Measurement,2020-02-25,4090320,Respiratory measure,4353938,End tidal carbon dioxide concentration
11918065,11000000026495127,Measurement,2021-10-11,4090320,Respiratory measure,4353938,End tidal carbon dioxide concentration
11918066,11000000025918915,Measurement,2021-10-11,4090320,Respiratory measure,4353938,End tidal carbon dioxide concentration
11918067,11000000024325439,Measurement,2020-02-25,4090320,Respiratory measure,4353938,End tidal carbon dioxide concentration


In [8]:
dict(copd_measurement_events.child_concept_id.value_counts())


{4353938: 4714061,
 44782827: 2118432,
 4313591: 1710716,
 4101694: 1654589,
 4141684: 828596,
 4029625: 714370,
 4353947: 99575,
 44782824: 47617,
 4108448: 20629,
 4197461: 2402,
 44782659: 1341,
 44806798: 995,
 4208972: 391,
 4193585: 371,
 4269010: 371,
 40480905: 371,
 4193576: 368,
 4248526: 368,
 4294874: 368,
 4228814: 367,
 4253179: 363,
 40491860: 360,
 44811460: 360,
 4090788: 358,
 4147814: 306,
 4090668: 21,
 40488828: 3}

### Network Graph 

In [9]:
def build_nx_graph(rel_df, concept_counts={}):
    rel_df['child_concept_id'] = rel_df['child_concept_id'].astype(int)
    rel_df['parent_concept_id'] = rel_df['parent_concept_id'].astype(int)

    colors = defaultdict(lambda: 'gray')
    colors.update({'Measurement':'green', 'Condition': 'blue', 'Procedure': 'red'})
    G = nx.DiGraph()

    interp_concept_counts = {}
    if concept_counts:
        f = np.interp(list(concept_counts.values()),
            [0,
             np.median(list(concept_counts.values())),
             max(concept_counts.values())], [1, 10, 20])
        
        interp_concept_counts = dict(zip(list(concept_counts.keys()), f))

    for i, rel in tqdm(rel_df.iterrows()):
        child_node_size = 1
        child_count = 0
        parent_node_size = 1
        parent_count=0
        
        if interp_concept_counts:
            if rel['child_concept_id'] in interp_concept_counts:
                child_node_size = interp_concept_counts[
                    rel['child_concept_id']]
                
                child_count = concept_counts[rel['child_concept_id']]

            if rel['parent_concept_id'] in interp_concept_counts:
                parent_node_size = interp_concept_counts[
                    rel['parent_concept_id']]
                
                parent_count = concept_counts[rel['parent_concept_id']]
                
        G.add_node(str(rel['child_concept_id']),
                   concept_id=rel['child_concept_id'],
                   concept_name=rel['child_concept_name'],
                   title=f"{rel['child_concept_name']}\nCount: {child_count}",
                   domain_id=rel['child_concept_domain'],
                   color=colors[rel['child_concept_domain']],
                   size=child_node_size,
                  )

        G.add_node(str(rel['parent_concept_id']),
                   concept_id=rel['parent_concept_id'],
                   concept_name=rel['parent_concept_name'],
                   title=f"{rel['parent_concept_name']}\nCount: {parent_count}",
                   domain_id=rel['parent_concept_domain'],
                   color=colors[rel['parent_concept_domain']],
                   size=parent_node_size)

        G.add_edge(rel['parent_concept_id'], rel['child_concept_id'], id=i)

    return G

In [10]:
nx_graph = build_nx_graph(results, dict(copd_measurement_events.child_concept_id.value_counts()))


215it [00:00, 8861.26it/s]


In [11]:
nt = Network('1300px', '1300px', notebook=True,directed = True)
nt.from_nx(nx_graph) 
nt.repulsion()
nt.show('nx.html')

nx.html
