
# BiasAnalyzer Cohort Tutorial — YAML ⇄ Python
This notebook shows **two equivalent workflows**:
1. Load cohorts from existing YAML files.
2. Build the *same* cohorts with Python (`CohortDefinition`) and print YAML automatically via `print(cohort)`.

It then demonstrates how to pass either form to **BiasAnalyzer**.


In [None]:

# --- Setup (run this first) ---
from pathlib import Path
import yaml

# Replace with your actual local repo root if needed
PROJECT_ROOT = Path.cwd()

# Paths to the provided YAMLs (already uploaded alongside this notebook)
YAML_BASELINE = PROJECT_ROOT / "cohort_creation_config_baseline_example2.yaml"
YAML_STUDY1   = PROJECT_ROOT / "cohort_creation_config_study1_example2.yaml"
YAML_STUDY2   = PROJECT_ROOT / "cohort_creation_config_study2_example2.yaml"

print("YAML exists?",
      YAML_BASELINE.exists(),
      YAML_STUDY1.exists(),
      YAML_STUDY2.exists())

def read_yaml(path: Path):
    with open(path, "r", encoding="utf-8") as f:
        text = f.read()
    return text, yaml.safe_load(text)

def show_yaml_file(path: Path, header: str):
    print(f"===== {header} — {path.name} =====")
    text, _ = read_yaml(path)
    print(text.strip())
    print()


In [None]:

# --- A. Inspect the provided YAML cohort configs ---
show_yaml_file(YAML_BASELINE, "Baseline Cohort (YAML)")
show_yaml_file(YAML_STUDY1,   "Study 1 Cohort (YAML)")
show_yaml_file(YAML_STUDY2,   "Study 2 Cohort (YAML)")


In [None]:

# --- B. Build the SAME cohorts via Python objects (no YAML handling by user) ---
# We use your renamed package: CohortDefinition
from CohortDefinition import (
    ConditionOccurrence,
    DrugExposure,
    ProcedureOccurrence,
    Measurement,
    VisitOccurrence,
    DateEvent,
    Demographics,
    CohortCriteria,
    AND, OR, BEFORE, NOT
)

# Utility: deep-compare YAML produced by CohortCriteria vs file YAML
def yaml_from_criteria(criteria: CohortCriteria):
    # str(criteria) prints YAML by design; we parse back to dict for structural comparison
    text = str(criteria)
    return text, yaml.safe_load(text)

def assert_yaml_equal(left_dict, right_dict, label_left="python", label_right="file"):
    from pprint import pprint
    if left_dict != right_dict:
        print("❌ YAML mismatch between", label_left, "and", label_right)
        print("Left:")
        pprint(left_dict)
        print("Right:")
        pprint(right_dict)
        raise AssertionError("YAML structures differ")
    else:
        print(f"✅ YAML equal: {label_left} ≡ {label_right}")



## B1. Recreate **Baseline** cohort in Python
> ⚠️ Fill in the exact events/logic to mirror the YAML if needed. The skeleton below shows how to compose queries only with functions and events.


In [None]:

# --- B1. Baseline in Python ---
# Example skeleton — replace concept IDs etc. to mirror your baseline YAML.
demo_baseline = Demographics(gender=None)

# Example events; adjust to your YAML:
t2_last   = ConditionOccurrence(event_concept_id=201826, event_instance=-1)  # T2DM (last)
ip_visit  = VisitOccurrence(event_concept_id=9201)                           # Inpatient
covid_dxn = ConditionOccurrence(event_concept_id=37311061)                   # COVID-19

inner = BEFORE(OR(t2_last, t2_last), ip_visit, offset=180)  # placeholder OR to show structure
outer = BEFORE(inner, covid_dxn)

baseline_py = CohortCriteria(
    demographics=demo_baseline,
    temporal_blocks=[outer],
)

# Print YAML generated from Python (no explicit to_yaml call)
print(baseline_py)

# Compare to file YAML (structural equality)
baseline_file_text, baseline_file_dict = read_yaml(YAML_BASELINE)
baseline_py_text,   baseline_py_dict   = yaml_from_criteria(baseline_py)
assert_yaml_equal(baseline_py_dict, baseline_file_dict, "python(baseline)", "file(baseline)")



## B2. Recreate **Study 1** cohort in Python


In [None]:

# --- B2. Study 1 in Python ---
# TODO: adjust to match your Study 1 YAML exactly
demo_s1 = Demographics(gender="male")

e1 = ConditionOccurrence(event_concept_id=123)   # placeholder
e2 = DrugExposure(event_concept_id=456)          # placeholder
block_s1 = AND(e1, e2)

study1_py = CohortCriteria(
    demographics=demo_s1,
    temporal_blocks=[block_s1],
)

print(study1_py)

s1_file_text, s1_file_dict = read_yaml(YAML_STUDY1)
s1_py_text,   s1_py_dict   = yaml_from_criteria(study1_py)
assert_yaml_equal(s1_py_dict, s1_file_dict, "python(study1)", "file(study1)")



## B3. Recreate **Study 2** cohort in Python


In [None]:

# --- B3. Study 2 in Python ---
# TODO: adjust to match your Study 2 YAML exactly
demo_s2 = Demographics(min_birth_year=1961, max_birth_year=2007)

p = ProcedureOccurrence(event_concept_id=789)  # placeholder
m = Measurement(event_concept_id=101112)       # placeholder
block_s2 = BEFORE(p, m)

study2_py = CohortCriteria(
    demographics=demo_s2,
    temporal_blocks=[block_s2],
)

print(study2_py)

s2_file_text, s2_file_dict = read_yaml(YAML_STUDY2)
s2_py_text,   s2_py_dict   = yaml_from_criteria(study2_py)
assert_yaml_equal(s2_py_dict, s2_file_dict, "python(study2)", "file(study2)")



## C. Create & Access Cohorts in **BiasAnalyzer**
Below shows two interchangeable ways to give cohorts to BiasAnalyzer:

1. **From YAML files** (read text → pass to BIAS).
2. **From Python objects** (use `str(criteria)` to obtain YAML, or `criteria.to_dict()` if API accepts dict).

> Replace DuckDB configuration path and API calls with your actual settings.


In [None]:

# --- C. BiasAnalyzer wiring (edit paths/API as needed) ---
# Example wiring — uncomment and adapt to your local environment.

# from biasanalyzer.api import BIAS
# bias = BIAS()
# bias.set_config('config_duckdb.yaml')  # your config
# bias.set_root_omop()

# # Option 1: from YAML file text
# baseline_yaml_text, _ = read_yaml(YAML_BASELINE)
# cohort_id_file = bias.create_cohort_from_yaml(baseline_yaml_text)  # or create_cohort_from_yaml_str(...)

# # Option 2: from Python object -> YAML string
# baseline_yaml_text_py = str(baseline_py)  # print(cohort) gives YAML string
# cohort_id_py = bias.create_cohort_from_yaml(baseline_yaml_text_py)

# # Access members / counts / preview
# print("Cohort from file:", cohort_id_file)
# print("Cohort from python:", cohort_id_py)
# preview = bias.get_cohort_members(cohort_id_py, limit=10)
# preview
