# Load Packages

In [1]:
# Add any needed packages
%load_ext autoreload
%autoreload 2

from google.cloud import bigquery
import networkx as nx
from collections import defaultdict
from pyvis.network import Network
from tqdm import tqdm
from scipy.interpolate import interp1d
import numpy as np

# Setup Connection

In [2]:
# Fill in identifiers

CURATION_PROJECT_ID = ""
EHR_OPS_DATASET_ID = ""

# Add any other needed identifiers

In [3]:
client = bigquery.Client(project=CURATION_PROJECT_ID)

# Execute Queries

## Query 1: COPD-related Diagnosis Occurrences

Description: Finds occurrences of useful concepts for COPD diagnosis

In [5]:
copd_diagnosis_concept_ids = [
    255573, # COPD
    4337138, #lung transplant  
    4222731, # bronchitis obliterans
    317009, # asthma
    256449, # bronchiectasis
]

In [6]:
q = f"""
    
WITH RECURSIVE 
  base_concepts AS (
    SELECT
      *
    FROM `aou-res-curation-prod.ehr_ops.concept` c
    WHERE c.concept_id IN (
    255573, # COPD
    4337138, #lung transplant  
    4222731, # bronchitis obliterans
    317009, # asthma
    256449 # bronchiectasis
    )
  ),
  concept_children AS (
      SELECT
          concept_id child_concept_id, NULL parent_concept_id
      FROM base_concepts
      UNION ALL
      SELECT cr.concept_id_1 child_concept_id, cr.concept_id_2 parent_concept_id
      FROM concept_children par
      JOIN `aou-res-curation-prod.ehr_ops.concept_relationship` cr
          ON cr.concept_id_2 = par.child_concept_id
              AND cr.relationship_id = 'Is a'
  )
  SELECT DISTINCT
    child_concept_id, child_concept.concept_name child_concept_name, child_concept.domain_id child_concept_domain,
    parent_concept_id, parent_concept.concept_name parent_concept_name, parent_concept.domain_id parent_concept_domain
  FROM concept_children cc
  JOIN `aou-res-curation-prod.ehr_ops.concept` child_concept
    ON child_concept.concept_id = cc.child_concept_id
  JOIN `aou-res-curation-prod.ehr_ops.concept` parent_concept
    ON parent_concept.concept_id = cc.parent_concept_id
"""

results = client.query(q).to_dataframe()
results

Unnamed: 0,child_concept_id,child_concept_name,child_concept_domain,parent_concept_id,parent_concept_name,parent_concept_domain
0,45768911,Exacerbation of mild persistent asthma,Condition,257581,Acute asthma,Condition
1,4217558,Detergent asthma,Condition,4212099,Occupational asthma,Condition
2,46269771,Acute severe exacerbation of severe persistent...,Condition,46273452,Acute exacerbation of asthma co-occurrent with...,Condition
3,42538810,Bronchiolitis obliterans syndrome due to and a...,Condition,4222731,Obliterative bronchiolitis,Condition
4,4243668,Acute obliterating bronchiolitis,Condition,4222731,Obliterative bronchiolitis,Condition
...,...,...,...,...,...,...
301,4211530,Asthma caused by wood dust,Condition,4312524,Substance induced asthma,Condition
302,4232595,Platinum asthma,Condition,4245676,Chemical-induced asthma,Condition
303,3661412,Thunderstorm asthma,Condition,45769441,Acute exacerbation of allergic asthma,Condition
304,3654571,Chronic emphysema due to vapor,Condition,261325,Pulmonary emphysema,Condition


## Query 2: COPD-related Diagnosis Hierarchy

In [8]:
# Define Query 1
q = f"""
    SELECT
        t.procedure_occurrence_id event_id, par_c.domain_id domain_id, t.procedure_date event_date,
        par_c.concept_id ancestor_concept_id, par_c.concept_name ancestor_concept_name,
        chd_c.concept_id child_concept_id, chd_c.concept_name child_concept_name
    FROM `{CURATION_PROJECT_ID}.{EHR_OPS_DATASET_ID}.unioned_ehr_procedure_occurrence` t
    JOIN `{CURATION_PROJECT_ID}.{EHR_OPS_DATASET_ID}.concept` chd_c
      ON chd_c.concept_id = t.procedure_concept_id
    JOIN `{CURATION_PROJECT_ID}.{EHR_OPS_DATASET_ID}.concept_ancestor` ca
      ON ca.descendant_concept_id = chd_c.concept_id
    JOIN `{CURATION_PROJECT_ID}.{EHR_OPS_DATASET_ID}.concept` par_c
      ON par_c.concept_id = ca.ancestor_concept_id
        AND par_c.concept_id IN ({", ".join(map(str, copd_diagnosis_concept_ids))})
        AND par_c.domain_id = 'Procedure'
    UNION ALL
    SELECT
        t.condition_occurrence_id event_id, par_c.domain_id domain_id, t.condition_start_date event_date,
        par_c.concept_id ancestor_concept_id, par_c.concept_name ancestor_concept_name,
        chd_c.concept_id child_concept_id, chd_c.concept_name child_concept_name
    FROM `{CURATION_PROJECT_ID}.{EHR_OPS_DATASET_ID}.unioned_ehr_condition_occurrence` t
    JOIN `{CURATION_PROJECT_ID}.{EHR_OPS_DATASET_ID}.concept` chd_c
      ON chd_c.concept_id = t.condition_concept_id
    JOIN `{CURATION_PROJECT_ID}.{EHR_OPS_DATASET_ID}.concept_ancestor` ca
      ON ca.descendant_concept_id = chd_c.concept_id
    JOIN `{CURATION_PROJECT_ID}.{EHR_OPS_DATASET_ID}.concept` par_c
      ON par_c.concept_id = ca.ancestor_concept_id
        AND par_c.concept_id IN ({", ".join(map(str, copd_diagnosis_concept_ids))})
        AND par_c.domain_id = 'Condition'
    UNION ALL
    SELECT
        t.observation_id event_id, par_c.domain_id domain_id, t.observation_date event_date,
        par_c.concept_id ancestor_concept_id, par_c.concept_name ancestor_concept_name,
        chd_c.concept_id child_concept_id, chd_c.concept_name child_concept_name
    FROM `{CURATION_PROJECT_ID}.{EHR_OPS_DATASET_ID}.unioned_ehr_observation` t
    JOIN `{CURATION_PROJECT_ID}.{EHR_OPS_DATASET_ID}.concept` chd_c
      ON chd_c.concept_id = t.observation_concept_id
    JOIN `{CURATION_PROJECT_ID}.{EHR_OPS_DATASET_ID}.concept_ancestor` ca
      ON ca.descendant_concept_id = chd_c.concept_id
    JOIN `{CURATION_PROJECT_ID}.{EHR_OPS_DATASET_ID}.concept` par_c
      ON par_c.concept_id = ca.ancestor_concept_id
        AND par_c.concept_id IN ({", ".join(map(str, copd_diagnosis_concept_ids))})
        AND par_c.domain_id = 'Observation'
    
"""

In [43]:
copd_concept_events = client.query(q).to_dataframe()
copd_concept_events

Unnamed: 0,event_id,domain_id,event_date,ancestor_concept_id,ancestor_concept_name,child_concept_id,child_concept_name
0,17000000000046684,Procedure,2018-08-06,4337138,Transplant of lung,2106884,"Lung transplant, double (bilateral sequential ..."
1,51000000109808098,Procedure,2011-05-09,4337138,Transplant of lung,2106884,"Lung transplant, double (bilateral sequential ..."
2,51000000135251788,Procedure,2011-05-09,4337138,Transplant of lung,2106884,"Lung transplant, double (bilateral sequential ..."
3,48000000000007424,Procedure,2018-11-03,4337138,Transplant of lung,2106884,"Lung transplant, double (bilateral sequential ..."
4,48000000000007425,Procedure,2018-11-03,4337138,Transplant of lung,2106884,"Lung transplant, double (bilateral sequential ..."
...,...,...,...,...,...,...,...
1632983,7000000003870929,Condition,2018-01-16,255573,Chronic obstructive lung disease,255573,Chronic obstructive lung disease
1632984,7000000008683532,Condition,2019-03-05,255573,Chronic obstructive lung disease,255573,Chronic obstructive lung disease
1632985,7000000008966173,Condition,2021-02-06,255573,Chronic obstructive lung disease,255573,Chronic obstructive lung disease
1632986,55000000004345239,Condition,2021-08-04,255573,Chronic obstructive lung disease,255573,Chronic obstructive lung disease


In [45]:
dict(copd_concept_events.child_concept_id.value_counts())

{255573: 365743,
 317009: 350040,
 45768910: 305259,
 4146581: 94303,
 261325: 65868,
 257004: 65163,
 45768964: 63242,
 256449: 53187,
 45768963: 38500,
 45768965: 37579,
 257581: 33867,
 312950: 32650,
 4286497: 19133,
 4138760: 12512,
 4110056: 10704,
 45769350: 9339,
 4145497: 8353,
 45772937: 7755,
 40483342: 7510,
 313236: 6121,
 43530693: 5572,
 4337138: 4235,
 257583: 4127,
 4177944: 3934,
 37116845: 3908,
 4191479: 3255,
 45768911: 2825,
 4142738: 2795,
 45769441: 2577,
 4143828: 2053,
 46274062: 1628,
 4145356: 813,
 443801: 771,
 259043: 702,
 440748: 666,
 45768912: 622,
 45769442: 552,
 45773005: 498,
 4141978: 429,
 42539549: 408,
 45769438: 397,
 45769351: 370,
 4209097: 268,
 4155469: 211,
 4193588: 180,
 4115044: 166,
 4196712: 158,
 764677: 156,
 4155468: 155,
 4222731: 152,
 4233784: 142,
 261895: 130,
 2106884: 114,
 2106886: 113,
 45769352: 108,
 37108581: 70,
 46273635: 68,
 45769443: 68,
 2106883: 66,
 46269801: 64,
 256448: 58,
 4337612: 50,
 37310241: 42,
 4626

# Build Network Graph

In [4]:
def build_nx_graph(rel_df, concept_counts={}):
    rel_df['child_concept_id'] = rel_df['child_concept_id'].astype(int)
    rel_df['parent_concept_id'] = rel_df['parent_concept_id'].astype(int)

    colors = defaultdict(lambda: 'gray')
    colors.update({'Condition': 'blue', 'Procedure': 'red'})
    G = nx.DiGraph()

    interp_concept_counts = {}
    if concept_counts:
        f = np.interp(list(concept_counts.values()),
            [0,
             np.median(list(concept_counts.values())),
             max(concept_counts.values())], [1, 10, 20])
        
        interp_concept_counts = dict(zip(list(concept_counts.keys()), f))

    for i, rel in tqdm(rel_df.iterrows()):
        child_node_size = 1
        child_count = 0
        parent_node_size = 1
        parent_count=0
        
        if interp_concept_counts:
            if rel['child_concept_id'] in interp_concept_counts:
                child_node_size = interp_concept_counts[
                    rel['child_concept_id']]
                
                child_count = concept_counts[rel['child_concept_id']]

            if rel['parent_concept_id'] in interp_concept_counts:
                parent_node_size = interp_concept_counts[
                    rel['parent_concept_id']]
                
                parent_count = concept_counts[rel['parent_concept_id']]
                
        G.add_node(str(rel['child_concept_id']),
                   concept_id=rel['child_concept_id'],
                   concept_name=rel['child_concept_name'],
                   title=f"{rel['child_concept_name']}\nCount: {child_count}",
                   domain_id=rel['child_concept_domain'],
                   color=colors[rel['child_concept_domain']],
                   size=child_node_size,
                  )

        G.add_node(str(rel['parent_concept_id']),
                   concept_id=rel['parent_concept_id'],
                   concept_name=rel['parent_concept_name'],
                   title=f"{rel['parent_concept_name']}\nCount: {parent_count}",
                   domain_id=rel['parent_concept_domain'],
                   color=colors[rel['parent_concept_domain']],
                   size=parent_node_size)

        G.add_edge(rel['parent_concept_id'], rel['child_concept_id'], id=i)

    return G

In [100]:
nx_graph = build_nx_graph(results, dict(copd_concept_events.child_concept_id.value_counts()))

306it [00:00, 5784.05it/s]


In [106]:
nt = Network('1000px', '1000px', notebook=True,directed = True)
nt.from_nx(nx_graph) 
nt.repulsion()
nt.show('nx.html')

Local cdn resources have problems on chrome/safari when used in jupyter-notebook. 
