# Cohort YAML — Print files & Recreate via Python

In [13]:
# If needed, add repo path:
# import sys; sys.path.append('/path/to/BiasAnalyzerYAMLBuilder')

from pathlib import Path
import yaml
import os

from python_to_YAML import (
    CohortYAML, Demographics,
    ConditionOccurrence, VisitOccurrence, DateEvent,
    AND, OR, BEFORE, NOT
)

print("Imports OK")

Imports OK


## 1) Print the three reference YAML files

In [6]:
base_dir = Path(os.getcwd())
paths = {
    'baseline': base_dir / 'test_yaml' / 'test_cohort_creation_condition_occurrence_config_baseline.yaml',
    'study':    base_dir / 'test_yaml' / 'test_cohort_creation_condition_occurrence_config_study.yaml',
    'study2':   base_dir / 'test_yaml' / 'test_cohort_creation_condition_occurrence_config_study2.yaml',
}
refs = {}
for name, p in paths.items():
    text = p.read_text(encoding='utf-8')
    obj = yaml.safe_load(text)
    refs[name] = {'text': text, 'obj': obj}
    print(f"\n===== {name.upper()} (file) =====")
    print(text)


===== BASELINE (file) =====
inclusion_criteria:
  demographics:                     # Optional
    gender: 'female'                # accepted values: female or male, optional field
    min_birth_year: 2000            # Born at the year of 2000 or after, optional field
    max_birth_year: 2020            # Born at the year of 2020 or before, optional field


===== STUDY (file) =====
inclusion_criteria:
  demographics:                     # Optional
    gender: 'female'                # accepted values: female or male, optional field
    min_birth_year: 2000            # Born at the year of 2000 or after, optional field
    max_birth_year: 2020            # Born at the year of 2020 or before, optional field
  temporal_events:
    - operator: 'AND'
      events:
        - event_type: 'condition_occurrence'
          event_concept_id: 37311061 # COVID condition


===== STUDY2 (file) =====
inclusion_criteria:
  demographics:                     # Optional
    gender: 'female'              

## 2) Cohort / YAML builder to reproduce the above cohorts
### 2.1 Baseline

In [10]:
"""
Baseline cohort: females born between 2000 and 2020.
YAML has only demographics (no temporal_events).
"""

# 1) Demographics
demo = Demographics(gender="female", min_birth_year=2000, max_birth_year=2020)

# 2) Build & print (no file write)
cohort = CohortYAML(demographics=demo)
print(cohort.to_yaml(sort_keys=False))
cohort.save_yaml("baseline_recreated.yaml")


inclusion_criteria:
  demographics:
    gender: 'female'
    min_birth_year: 2000
    max_birth_year: 2020



PosixPath('baseline_recreated.yaml')

### 2.2 Study

In [None]:
"""
Study cohort: females born 2000–2020 AND a COVID diagnosis.
YAML shows a single temporal group with operator 'AND' and ONE leaf event.
"""
from python_to_YAML.builder import SingleQuoted

# 1) Demographics
demo = Demographics(gender="female", min_birth_year=2000, max_birth_year=2020)

# 2) Leaf event (COVID condition)
covid = ConditionOccurrence(event_concept_id=37311061).to_yaml_event()

# 3) Temporal group: AND with a single event
group_and_single = {
    "operator": SingleQuoted("AND"),
    "events": [covid],
}

# 4) Build & print
cohort = CohortYAML(
    demographics=demo,
    temporal_blocks=[group_and_single],
)
print(cohort.to_yaml(sort_keys=False))
cohort.save_yaml("study_recreated.yaml")


inclusion_criteria:
  demographics:
    gender: 'female'
    min_birth_year: 2000
    max_birth_year: 2020
  temporal_events:
  - operator: 'AND'
    events:
    - event_type: 'condition_occurrence'
      event_concept_id: 37311061



PosixPath('study_recreated.yaml')

### 2.3 Study2

In [None]:
"""
Study2 cohort: females born 2000–2020 AND NOT(COVID diagnosis).
YAML shows a single temporal group with operator 'NOT' and ONE leaf event.
"""
from python_to_YAML.builder import SingleQuoted

# 1) Demographics
demo = Demographics(gender="female", min_birth_year=2000, max_birth_year=2020)

# 2) Leaf event (COVID condition)
covid = ConditionOccurrence(event_concept_id=37311061).to_yaml_event()

# 3) Temporal group: NOT with a single event
group_not_single = {
    "operator": SingleQuoted("NOT"),
    "events": [covid],
}

# 4) Build & print
cohort = CohortYAML(
    demographics=demo,
    temporal_blocks=[group_not_single],
)
print(cohort.to_yaml(sort_keys=False))
cohort.save_yaml("study2_recreated.yaml")


inclusion_criteria:
  demographics:
    gender: 'female'
    min_birth_year: 2000
    max_birth_year: 2020
  temporal_events:
  - operator: 'NOT'
    events:
    - event_type: 'condition_occurrence'
      event_concept_id: 37311061



PosixPath('study2_recreated.yaml')