In [6]:
from pathlib import Path
from omegaconf import OmegaConf
import pandas as pd

In [4]:
def load_config(file_name):
    """
    Loads and returns the configuration stored in a YAML file using OmegaConf.

    Args:
    - file_name (str): The path to the YAML file containing the configuration.

    Returns:
    - An OmegaConf object representing the YAML file's contents.

    Note:
        If the configuration file has a `base_config` attribute, this function will attempt to load and merge the base
        configuration file with the current configuration file. The base configuration file should be a YAML file as well,
        and its path is relative to the current configuration file's directory.

    """
    path = Path(file_name)
    conf = OmegaConf.load(path)
    if base_config_path := conf.get("base_config", None):
        base_conf = OmegaConf.load(path.parent / base_config_path)
        conf = OmegaConf.merge(base_conf, conf)
    return conf

In [78]:
"lang" in cfg.dict.custom

True

In [20]:
cfg = load_config("../conf/mugit.yaml")
cfg

{'name': 'mugit', 'dict': {'custom': {'lang': ['de', 'en'], 'mugit_path': '${oc.env:HOME}/temp/SCT-GIT_de_large.dat'}}}

In [29]:
mugit_snomed_path = cfg.dict.custom.mugit_path
mugit = pd.read_csv(mugit_snomed_path, sep="\t", header=None, encoding="utf8")
mugit.columns = ["snomed_id", "term_id", "en", "de"]
mugit.sort_index(inplace=True)
mugit.dropna(inplace=True)
lang = cfg.dict.custom.lang

In [56]:
mugit.head(30)

Unnamed: 0,snomed_id,term_id,en,de
0,9999005,17461019_000001_20210511,Duodenal ampulla structure (body structure),Ampulla
1,9999005,17460018_000009_20210511,Duodenal ampulla structure (body structure),Duodenal-Ampulle
2,9999005,195252011_000002_20210511,Duodenal ampulla structure (body structure),Bulbus duodenalis
3,9999005,195252011_000001_20210511,Duodenal ampulla structure (body structure),Duodenalbulbus
4,9999005,513493019_000009_20210511,Duodenal ampulla structure (body structure),Duodenumampullastruktur
5,9999005,513493019_000003_20210511,Duodenal ampulla structure (body structure),ZwÃ¶lffingerdarmampullastruktur
6,9999005,513493019_000001_20210511,Duodenal ampulla structure (body structure),Duodenumampullenstruktur
7,9999005,17460018_000011_20210511,Duodenal ampulla structure (body structure),ZwÃ¶lffingerdarmampulle
8,9999005,17460018_000003_20210511,Duodenal ampulla structure (body structure),ZwÃ¶lffingerdarmampulla
9,9999005,17460018_000002_20210511,Duodenal ampulla structure (body structure),Duodenumampulle


In [69]:
lang = ["de"]

In [70]:
concept_details = {}

# both languages selected, english ones go to canonical and all different terms to alias
if lang == ['de', 'en']:
    for _, entry in mugit.iterrows():
        sid = entry.snomed_id
        if not sid in concept_details:
            concept_details[sid] = {"concept_id": sid, "canonical_name": entry.en, "types": [], "aliases": [entry.de]}
        elif sid in concept_details:
            if entry.en not in concept_details[sid]["aliases"] and entry.en != concept_details[sid]["canonical_name"]:
                concept_details[sid]["aliases"].append(entry.en)
            if entry.de not in concept_details[sid]["aliases"]:
                concept_details[sid]["aliases"].append(entry.de)
            
# just one language
elif lang == ["en"] or lang == ["de"]:
    l = lang[0]
    for _, entry in mugit.iterrows():
        sid = entry.snomed_id
        if not sid in concept_details:
            concept_details[sid] = {"concept_id": sid, "canonical_name": entry[l], "types": [], "aliases": []}
        elif sid in concept_details:
            if entry[l] not in concept_details[sid]["aliases"] and entry[l] != concept_details[sid]["canonical_name"]:
                concept_details[sid]["aliases"].append(entry[l])
else:
    print("Languages not supported by MUGIT")

In [71]:
concept_details

{9999005: {'concept_id': 9999005,
  'canonical_name': 'Ampulla',
  'types': [],
  'aliases': ['Duodenal-Ampulle',
   'Bulbus duodenalis',
   'Duodenalbulbus',
   'Duodenumampullastruktur',
   'ZwÃ¶lffingerdarmampullastruktur',
   'Duodenumampullenstruktur',
   'ZwÃ¶lffingerdarmampulle',
   'ZwÃ¶lffingerdarmampulla',
   'Duodenumampulle',
   'Duodenumampulla',
   'Duodenum-Ampulla',
   'Duodenum-Ampulle',
   'ZwÃ¶lffingerdarm-Ampulle',
   'duodenale Ampulle',
   'duodenale Ampulla',
   'Duodenal-Ampulla']},
 9998002: {'concept_id': 9998002,
  'canonical_name': 'Rumex venosus',
  'types': [],
  'aliases': ['Ampfer venosus']},
 9996003: {'concept_id': 9996003,
  'canonical_name': 'Arthrotomie mit Drainage des Tarsometatarsalgelenks',
  'types': [],
  'aliases': ['Arthrotomie mit Drainage eines Tarsometatarsalgelenkes',
   'Arthrotomie mit Drainage des Tarsometatarsalgelenkes',
   'Arthrotomie mit Drainage eines Tarsometatarsalgelenks']},
 9995004: {'concept_id': 9995004,
  'canonical_name