# Load Packages

In [None]:
# Add any needed packages
%load_ext autoreload
%autoreload 2

from google.cloud import bigquery
import networkx as nx
from collections import defaultdict
from pyvis.network import Network
from tqdm import tqdm
from scipy.interpolate import interp1d
import numpy as np

# Setup Connection

In [None]:
# Fill in identifiers

CURATION_PROJECT_ID = ""
EHR_OPS_DATASET_ID = ""

# Add any other needed identifiers

In [None]:
client = bigquery.Client(project=CURATION_PROJECT_ID)

# Execute Queries

## Query 1: COPD-related Diagnosis Occurrences

Description: Finds occurrences of useful concepts for COPD diagnosis

In [None]:
copd_diagnosis_concept_ids = [
    255573, # COPD
    4337138, #lung transplant  
    4222731, # bronchitis obliterans
    317009, # asthma
    256449, # bronchiectasis
]

In [None]:
q = f"""
    
WITH RECURSIVE 
  base_concepts AS (
    SELECT
      *
    FROM `aou-res-curation-prod.ehr_ops.concept` c
    WHERE c.concept_id IN (
    255573, # COPD
    4337138, #lung transplant  
    4222731, # bronchitis obliterans
    317009, # asthma
    256449 # bronchiectasis
    )
  ),
  concept_children AS (
      SELECT
          concept_id child_concept_id, NULL parent_concept_id
      FROM base_concepts
      UNION ALL
      SELECT cr.concept_id_1 child_concept_id, cr.concept_id_2 parent_concept_id
      FROM concept_children par
      JOIN `aou-res-curation-prod.ehr_ops.concept_relationship` cr
          ON cr.concept_id_2 = par.child_concept_id
              AND cr.relationship_id = 'Is a'
  )
  SELECT DISTINCT
    child_concept_id, child_concept.concept_name child_concept_name, child_concept.domain_id child_concept_domain,
    parent_concept_id, parent_concept.concept_name parent_concept_name, parent_concept.domain_id parent_concept_domain
  FROM concept_children cc
  JOIN `aou-res-curation-prod.ehr_ops.concept` child_concept
    ON child_concept.concept_id = cc.child_concept_id
  JOIN `aou-res-curation-prod.ehr_ops.concept` parent_concept
    ON parent_concept.concept_id = cc.parent_concept_id
"""

results = client.query(q).to_dataframe()
results

## Query 2: COPD-related Diagnosis Hierarchy

In [None]:
# Define Query 1
q = f"""
    SELECT
        t.procedure_occurrence_id event_id, par_c.domain_id domain_id, t.procedure_date event_date,
        par_c.concept_id ancestor_concept_id, par_c.concept_name ancestor_concept_name,
        chd_c.concept_id child_concept_id, chd_c.concept_name child_concept_name
    FROM `{CURATION_PROJECT_ID}.{EHR_OPS_DATASET_ID}.unioned_ehr_procedure_occurrence` t
    JOIN `{CURATION_PROJECT_ID}.{EHR_OPS_DATASET_ID}.concept` chd_c
      ON chd_c.concept_id = t.procedure_concept_id
    JOIN `{CURATION_PROJECT_ID}.{EHR_OPS_DATASET_ID}.concept_ancestor` ca
      ON ca.descendant_concept_id = chd_c.concept_id
    JOIN `{CURATION_PROJECT_ID}.{EHR_OPS_DATASET_ID}.concept` par_c
      ON par_c.concept_id = ca.ancestor_concept_id
        AND par_c.concept_id IN ({", ".join(map(str, copd_diagnosis_concept_ids))})
        AND par_c.domain_id = 'Procedure'
    UNION ALL
    SELECT
        t.condition_occurrence_id event_id, par_c.domain_id domain_id, t.condition_start_date event_date,
        par_c.concept_id ancestor_concept_id, par_c.concept_name ancestor_concept_name,
        chd_c.concept_id child_concept_id, chd_c.concept_name child_concept_name
    FROM `{CURATION_PROJECT_ID}.{EHR_OPS_DATASET_ID}.unioned_ehr_condition_occurrence` t
    JOIN `{CURATION_PROJECT_ID}.{EHR_OPS_DATASET_ID}.concept` chd_c
      ON chd_c.concept_id = t.condition_concept_id
    JOIN `{CURATION_PROJECT_ID}.{EHR_OPS_DATASET_ID}.concept_ancestor` ca
      ON ca.descendant_concept_id = chd_c.concept_id
    JOIN `{CURATION_PROJECT_ID}.{EHR_OPS_DATASET_ID}.concept` par_c
      ON par_c.concept_id = ca.ancestor_concept_id
        AND par_c.concept_id IN ({", ".join(map(str, copd_diagnosis_concept_ids))})
        AND par_c.domain_id = 'Condition'
    UNION ALL
    SELECT
        t.observation_id event_id, par_c.domain_id domain_id, t.observation_date event_date,
        par_c.concept_id ancestor_concept_id, par_c.concept_name ancestor_concept_name,
        chd_c.concept_id child_concept_id, chd_c.concept_name child_concept_name
    FROM `{CURATION_PROJECT_ID}.{EHR_OPS_DATASET_ID}.unioned_ehr_observation` t
    JOIN `{CURATION_PROJECT_ID}.{EHR_OPS_DATASET_ID}.concept` chd_c
      ON chd_c.concept_id = t.observation_concept_id
    JOIN `{CURATION_PROJECT_ID}.{EHR_OPS_DATASET_ID}.concept_ancestor` ca
      ON ca.descendant_concept_id = chd_c.concept_id
    JOIN `{CURATION_PROJECT_ID}.{EHR_OPS_DATASET_ID}.concept` par_c
      ON par_c.concept_id = ca.ancestor_concept_id
        AND par_c.concept_id IN ({", ".join(map(str, copd_diagnosis_concept_ids))})
        AND par_c.domain_id = 'Observation'
    
"""

In [None]:
copd_concept_events = client.query(q).to_dataframe()
copd_concept_events

In [None]:
dict(copd_concept_events.child_concept_id.value_counts())

# Build Network Graph

In [None]:
def build_nx_graph(rel_df, concept_counts={}):
    rel_df['child_concept_id'] = rel_df['child_concept_id'].astype(int)
    rel_df['parent_concept_id'] = rel_df['parent_concept_id'].astype(int)

    colors = defaultdict(lambda: 'gray')
    colors.update({'Condition': 'blue', 'Procedure': 'red'})
    G = nx.DiGraph()

    interp_concept_counts = {}
    if concept_counts:
        f = np.interp(list(concept_counts.values()),
            [0,
             np.median(list(concept_counts.values())),
             max(concept_counts.values())], [1, 10, 20])
        
        interp_concept_counts = dict(zip(list(concept_counts.keys()), f))

    for i, rel in tqdm(rel_df.iterrows()):
        child_node_size = 1
        child_count = 0
        parent_node_size = 1
        parent_count=0
        
        if interp_concept_counts:
            if rel['child_concept_id'] in interp_concept_counts:
                child_node_size = interp_concept_counts[
                    rel['child_concept_id']]
                
                child_count = concept_counts[rel['child_concept_id']]

            if rel['parent_concept_id'] in interp_concept_counts:
                parent_node_size = interp_concept_counts[
                    rel['parent_concept_id']]
                
                parent_count = concept_counts[rel['parent_concept_id']]
                
        G.add_node(str(rel['child_concept_id']),
                   concept_id=rel['child_concept_id'],
                   concept_name=rel['child_concept_name'],
                   title=f"{rel['child_concept_name']}\nCount: {child_count}",
                   domain_id=rel['child_concept_domain'],
                   color=colors[rel['child_concept_domain']],
                   size=child_node_size,
                  )

        G.add_node(str(rel['parent_concept_id']),
                   concept_id=rel['parent_concept_id'],
                   concept_name=rel['parent_concept_name'],
                   title=f"{rel['parent_concept_name']}\nCount: {parent_count}",
                   domain_id=rel['parent_concept_domain'],
                   color=colors[rel['parent_concept_domain']],
                   size=parent_node_size)

        G.add_edge(rel['parent_concept_id'], rel['child_concept_id'], id=i)

    return G

In [None]:
nx_graph = build_nx_graph(results, dict(copd_concept_events.child_concept_id.value_counts()))

In [None]:
nt = Network('1000px', '1000px', notebook=True,directed = True)
nt.from_nx(nx_graph) 
nt.repulsion()
nt.show('nx.html')