In [None]:
# --- 1. Set up BIAS and connect to OMOP database ---

from biasanalyzer.api import BIAS

bias = BIAS()

# NOTE: change this to your actual config file path.
bias.set_config('/Users/sherrylu/Documents/UNC/26Fall/VAC Lab/BiasAnalyzerYAMLBuilder/JypterNotebook/BiasAnalyzer_Cohort_Builder_Comparison/config_duckdb.yaml')

# Establish connection to OMOP CDM database
bias.set_root_omop()
#print("BIAS is connected to OMOP CDM.")

configuration specified in /Users/sherrylu/Documents/UNC/26Fall/VAC Lab/BiasAnalyzerYAMLBuilder/JypterNotebook/BiasAnalyzer_Cohort_Builder_Comparison/config_duckdb.yaml loaded successfully
Connected to the DuckDB database: /Users/sherrylu/Documents/UNC/26Fall/VAC Lab/data/synpuf_100k_omop_54.duckdb.
Cohort Definition table created.
Cohort table created.
BIAS is connected to OMOP CDM.


In [2]:
# --- 2. Search concepts by keyword (Type 2 diabetes mellitus) ---

import pandas as pd
pd.set_option('display.max_rows', None)

# Use the same API as in BiasAnalyzerConceptBrowsingTutorial.ipynb
# Signature: bias.get_concepts(keyword, domain, vocabulary)
concepts = bias.get_concepts(
    "Type 2 diabetes mellitus",   # keyword
    "Condition",                  # domain
    "SNOMED"                      # vocabulary
)

df = pd.DataFrame(concepts)
df


Unnamed: 0,concept_id,concept_name,valid_start_date,valid_end_date,domain_id,vocabulary_id
0,201530,Hyperosmolar coma due to type 2 diabetes mellitus,2002-01-31,2099-12-31,Condition,SNOMED
1,201826,Type 2 diabetes mellitus,2002-01-31,2099-12-31,Condition,SNOMED
2,376065,Disorder of nervous system due to type 2 diabe...,2006-07-31,2099-12-31,Condition,SNOMED
3,443729,Peripheral circulatory disorder due to type 2 ...,2006-07-31,2099-12-31,Condition,SNOMED
4,443731,Renal disorder due to type 2 diabetes mellitus,2006-07-31,2099-12-31,Condition,SNOMED
5,443732,Disorder due to type 2 diabetes mellitus,2006-07-31,2099-12-31,Condition,SNOMED
6,443733,Disorder of eye due to type 2 diabetes mellitus,2006-07-31,2099-12-31,Condition,SNOMED
7,443734,Ketoacidosis due to type 2 diabetes mellitus,2006-07-31,2099-12-31,Condition,SNOMED
8,608884,Inflammatory dermatosis due to type 2 diabetes...,2021-01-31,2099-12-31,Condition,SNOMED
9,761062,Ischemic heel and/or midfoot ulcer due to type...,2016-03-01,2021-03-01,Condition,SNOMED


In [4]:
# get parent and children concept hierarchical tree for Type 2 diabetes mellitus (SNOMED id: 201826)
parent_concept_tree, children_concept_tree = bias.get_concept_hierarchy(201826)

Concept Hierarchy:   0%|          | 0/3 [00:00<?, ?stage/s]

In [5]:
print('parent concept hierarchy for Type 2 diabetes mellitus in text format:')
print(bias.display_concept_tree(parent_concept_tree))
print('children concept hierarchy for Type 2 diabetes mellitus in text format:')
print(bias.display_concept_tree(children_concept_tree))

parent concept hierarchy for Type 2 diabetes mellitus in text format:
ðŸ”¼ Type 2 diabetes mellitus (ID: 201826, Code: 44054006)
  ðŸ”¼ Metabolic disease (ID: 436670, Code: 75934005)
  ðŸ”¼ Disorder of carbohydrate metabolism (ID: 437515, Code: 20957000)
  ðŸ”¼ Disorder of body system (ID: 4180628, Code: 362965005)
  ðŸ”¼ Clinical finding (ID: 441840, Code: 404684003)
  ðŸ”¼ Disorder of glucose metabolism (ID: 4130526, Code: 126877002)
  ðŸ”¼ Disease (ID: 4274025, Code: 64572001)
  ðŸ”¼ Diabetes mellitus (ID: 201820, Code: 73211009)
  ðŸ”¼ Disorder of endocrine system (ID: 31821, Code: 362969004)

children concept hierarchy for Type 2 diabetes mellitus in text format:
ðŸ”½ Type 2 diabetes mellitus (ID: 201826, Code: 44054006)
  ðŸ”½ Poorly controlled type II diabetes with neuropathy (ID: 3191208, Code: 10170001000004104)
  ðŸ”½ Type 2 diabetes mellitus controlled by diet (ID: 45757508, Code: 164971000119101)
  ðŸ”½ Insulin treated type 2 diabetes mellitus (ID: 4130162, Code: 237599002)

In [None]:
# --- 3 Alternative. pick the first row whose name contains 'Type 2 diabetes mellitus' (or pick whatever you want to use) ---
mask = df["concept_name"].str.contains("Type 2 diabetes mellitus", case=False, na=False)
t2dm_row = df[mask].iloc[0]

t2dm_concept_id = int(t2dm_row["concept_id"])
t2dm_name = t2dm_row["concept_name"]

print("Selected concept:")
print(f"  concept_id   = {t2dm_concept_id}")
print(f"  concept_name = {t2dm_name}")

Selected concept:
  concept_id   = 201530
  concept_name = Hyperosmolar coma due to type 2 diabetes mellitus


In [6]:
# --- 4. Build baseline cohort using CohortDefinition package ---

from CohortDefinition import (
    ConditionOccurrence,
    CohortCriteria,
)

# 1) Define the single clinical event (Type 2 diabetes mellitus).
#    We already resolved the OHDSI concept_id via BIAS above.
db = ConditionOccurrence(
    event_concept_id=t2dm_concept_id,  # this should be 201826
    # id_type defaults to OHDSI, so we don't need to set id_type here.
)

# 2) Assemble the cohort criteria (no demographics for this baseline).
#    Passing a single Event in temporal_blocks makes the builder
#    wrap it as: [{"operator": "AND", "events": [<event>]}]
baseline_py = CohortCriteria(
    temporal_blocks=[db],
)

# 3) Print cohort directly (YAML string).
print("===== Baseline cohort (Python-built) =====")
print(baseline_py)


===== Baseline cohort (Python-built) =====
inclusion_criteria:
  temporal_events:
  - operator: 'AND'
    events:
    - event_type: 'condition_occurrence'
      event_concept_id: 201530

