In [13]:
import xml.etree.ElementTree as ET
from collections import defaultdict

In [2]:
tree = ET.parse('icd10cm_tabular_2022.xml')
root = tree.getroot()

In [18]:
def extract_icd10_xml(root):
    
    pt2ch = defaultdict(list)
    desc = {'ICD10CM': 'ICD10CM'}
    chapters = [ch for ch in root if ch.tag == 'chapter']
    
    def _traverse_diag_dfs(parent_name, dx_element):
        dx_name = next(e for e in dx_element if e.tag == 'name').text
        dx_desc = next(e for e in dx_element if e.tag == 'desc').text
        dx_name = f'dx:{dx_name}'
        desc[dx_name] = dx_desc
        pt2ch[parent_name].append(dx_name)
        
        diags = [dx for dx in dx_element if dx.tag == 'diag']
        for dx in diags:
            _traverse_diag_dfs(dx_name, dx)
        
    
    for chapter in chapters:
        ch_name = next(e for e in chapter if e.tag == 'name').text
        ch_desc = next(e for e in chapter if e.tag == 'desc').text
        ch_name = f'chapter:{ch_name}'
        pt2ch['ICD10CM'].append(ch_name)
        desc[ch_name] = ch_desc
        
        sections = [sec for sec in chapter if sec.tag == 'section']
        for section in sections:
            sec_name = section.attrib['id']
            sec_desc = next(e for e in section if e.tag == 'desc').text
            sec_name = f'section:{sec_name}'
            
            pt2ch[ch_name].append(sec_name)
            desc[sec_name] = sec_desc
            
            diags = [dx for dx in section if dx.tag == 'diag']
            for dx in diags:
                _traverse_diag_dfs(sec_name, dx)
            
    
    return pt2ch, desc

In [20]:
pt2ch, desc = extract_icd10_xml(root)

In [23]:
sorted(a for a in 'Asem')

['A', 'e', 'm', 's']

In [22]:
{name: des for name, des in desc.items() if 'chapter:' in name}

{'chapter:1': 'Certain infectious and parasitic diseases (A00-B99)',
 'chapter:2': 'Neoplasms (C00-D49)',
 'chapter:3': 'Diseases of the blood and blood-forming organs and certain disorders involving the immune mechanism (D50-D89)',
 'chapter:4': 'Endocrine, nutritional and metabolic diseases (E00-E89)',
 'chapter:5': 'Mental, Behavioral and Neurodevelopmental disorders (F01-F99)',
 'chapter:6': 'Diseases of the nervous system (G00-G99)',
 'chapter:7': 'Diseases of the eye and adnexa (H00-H59)',
 'chapter:8': 'Diseases of the ear and mastoid process (H60-H95)',
 'chapter:9': 'Diseases of the circulatory system (I00-I99)',
 'chapter:10': 'Diseases of the respiratory system (J00-J99)',
 'chapter:11': 'Diseases of the digestive system (K00-K95)',
 'chapter:12': 'Diseases of the skin and subcutaneous tissue (L00-L99)',
 'chapter:13': 'Diseases of the musculoskeletal system and connective tissue (M00-M99)',
 'chapter:14': 'Diseases of the genitourinary system (N00-N99)',
 'chapter:15': 'Pre

In [3]:
chapters = [ch for ch in root if ch.tag == 'chapter']
chapters

[<Element 'chapter' at 0x7f6bb836aae0>,
 <Element 'chapter' at 0x7f6bb814e360>,
 <Element 'chapter' at 0x7f6ba95fba90>,
 <Element 'chapter' at 0x7f6ba95badb0>,
 <Element 'chapter' at 0x7f6ba943a090>,
 <Element 'chapter' at 0x7f6ba9261950>,
 <Element 'chapter' at 0x7f6ba90d5770>,
 <Element 'chapter' at 0x7f6ba8d6d4f0>,
 <Element 'chapter' at 0x7f6ba8c74270>,
 <Element 'chapter' at 0x7f6ba8992590>,
 <Element 'chapter' at 0x7f6ba88892c0>,
 <Element 'chapter' at 0x7f6ba86ced10>,
 <Element 'chapter' at 0x7f6ba855bb80>,
 <Element 'chapter' at 0x7f6b8fd164f0>,
 <Element 'chapter' at 0x7f6b8fbfb540>,
 <Element 'chapter' at 0x7f6b8f96e6d0>,
 <Element 'chapter' at 0x7f6b8f84de00>,
 <Element 'chapter' at 0x7f6b8f6df090>,
 <Element 'chapter' at 0x7f6b8f546680>,
 <Element 'chapter' at 0x7f6b8e57dcc0>,
 <Element 'chapter' at 0x7f6b8dfce090>,
 <Element 'chapter' at 0x7f6b8dd406d0>]

In [4]:
chapter = chapters[0]

In [6]:
for e in chapter:
    print(e.tag)

name
desc
includes
useAdditionalCode
excludes1
excludes2
sectionIndex
section
section
section
section
section
section
section
section
section
section
section
section
section
section
section
section
section
section
section
section
section
section
