diff --git a/src/dug/core/parsers/__init__.py b/src/dug/core/parsers/__init__.py index aeec0516..8a43ab22 100644 --- a/src/dug/core/parsers/__init__.py +++ b/src/dug/core/parsers/__init__.py @@ -13,6 +13,7 @@ from .bacpac_parser import BACPACParser from .heal_dp_parser import HEALDPParser from .ctn_parser import CTNParser +from .radx_parser import RADxParser logger = logging.getLogger('dug') @@ -35,6 +36,7 @@ def define_parsers(parser_dict: Dict[str, Parser]): parser_dict["heal-studies"] = HEALDPParser(study_type="HEAL Studies") parser_dict["heal-research"] = HEALDPParser(study_type="HEAL Research Programs") parser_dict["ctn"] = CTNParser() + parser_dict["radx"] = RADxParser() diff --git a/src/dug/core/parsers/radx_parser.py b/src/dug/core/parsers/radx_parser.py new file mode 100644 index 00000000..fe4de977 --- /dev/null +++ b/src/dug/core/parsers/radx_parser.py @@ -0,0 +1,36 @@ +import logging +from typing import List +from xml.etree import ElementTree as ET + +from dug import utils as utils +from ._base import DugElement, FileParser, Indexable, InputFile + +logger = logging.getLogger('dug') + + +class RADxParser(FileParser): + + def __call__(self, input_file: InputFile) -> List[Indexable]: + tree = ET.parse(input_file, ET.XMLParser(encoding='utf-8')) + root = tree.getroot() + study_id = root.attrib['id'] + # If still None, raise an error message + study_name = root.attrib['study_name'] + elements = [] + for variable in root.iter('variable'): + desc = variable.find('description').text if variable.find('description') is not None else '' + desc = desc or '' + elem = DugElement(elem_id=f"{variable.attrib['id']}", + name=variable.find('name').text, + desc=desc, + elem_type=root.attrib['module'], + collection_id=f"{study_id}", + collection_name=study_name) + + # Create DBGaP links as study/variable actions + elem.collection_action = utils.get_dbgap_study_link(study_id=elem.collection_id) + logger.debug(elem) + elements.append(elem) + + # You don't actually create any concepts + return elements \ No newline at end of file