In [2]:
import xml.etree.ElementTree as ET
import pandas as pd

In [5]:
df = pd.read_csv('../src/master_thesis/taxonomies/icd10pcs_order_20211.csv')
df2 = df.loc[df['Column3'] == 1]
col2 = df2[['Column2']]
col2
print(col2)
# df

       Column2
1      0016070
2      0016071
3      0016072
4      0016073
5      0016074
...        ...
79019  XW24376
79021  XXE5XM5
79022  XXE5XN6
79023  XXEBXQ6
79025  XY0VX83

[78136 rows x 1 columns]


In [7]:
class MedicalTaxonomy:
    def __init__(self):
        self.taxonomy = {}

    def add_node(self, parent_id, code):
        if parent_id not in self.taxonomy:
            self.taxonomy[parent_id] = []
        self.taxonomy[parent_id].append(code)

    def is_valid_item(self, code):
        for children in self.taxonomy.values():
            for child in children:
                if child == code:
                    return True
        return False
    
    def is_leaf(self, code):
        if self.is_valid_item(code) != True:
            raise ValueError("The code \""+code+"\" does not exist.")
        if self.get_children(code) == []:
            return True
        else:
            return False
        
    def get_all_codes(self):
        all_codes_list = []
        for children in self.taxonomy.values():
            for child in children:
                all_codes_list.append(child)
        return all_codes_list           

    def get_children(self, code):
        if self.is_valid_item(code) != True:
            raise ValueError("The code \""+code+"\" does not exist.")
        return self.taxonomy.get(code, [])
    
    def get_ancestors(self, code):
        ancestors = []
        if self.is_valid_item(code) != True:
            raise ValueError("The code \""+code+"\" does not exist.")
        while code is not None:
            for parent_id, children in self.taxonomy.items():
                for child in children:
                    if child == code and parent_id != None:
                        ancestors.append(parent_id)
                        code = parent_id
                        break
                else:
                    continue  # This line ensures we keep searching in other branches.
                break  # Exit the inner loop if the node is found.
            else:
                # If the node has no parent, break the loop
                break
        return ancestors
    
    def get_descendants(self, code):
        if self.is_valid_item(code) != True:
            raise ValueError("The code \""+code+"\" does not exist.")
        descendants = []

        def find_descendants(code):
            if code in self.taxonomy:
                for child in self.taxonomy[code]:
                    descendants.append(child)
                    find_descendants(child)

        find_descendants(code)
        return descendants
    
    

# Usage example:
taxonomy = MedicalTaxonomy()
taxonomy.add_node(None, "origin")
taxonomy.add_node("origin", "medicine")
taxonomy.add_node("origin", "prescription")
taxonomy.add_node("medicine", "surgery")
taxonomy.add_node("medicine", "internal_medicine")
taxonomy.add_node("surgery", "orthopedics")

# print(taxonomy.get_children("origin"))
# print(taxonomy.get_children("medicine"))
# print(taxonomy.get_ancestors("orthopedics"))
# print(taxonomy.is_valid("medicine"))

def is_valid_item(code):
    return taxonomy.is_valid_item(code)

def is_leaf(code):
    return taxonomy.is_leaf(code)

def get_all_codes():
    return taxonomy.get_all_codes()

def get_children(code):
    # print(taxonomy.get_children(code))
    return taxonomy.get_children(code)

def get_ancestors(code):
    # print(taxonomy.get_ancestors(code))
    return taxonomy.get_ancestors(code)

def get_descendants(code):
    return taxonomy.get_descendants(code)

def get_nearest_common_ancestor(a:str,b:str,prioritize_blocks_a=False,prioritize_blocks_b=False) -> str:
    anc_a = [a] + get_ancestors(a)
    anc_b = [b] + get_ancestors(b)
    if len(anc_b) > len(anc_a):
        temp = anc_a
        anc_a = anc_b
        anc_b = temp
    for anc in anc_a:
        if anc in anc_b:
            return anc
    return ""

get_children('origin')
get_ancestors('origin')
is_valid_item('orthopedics')
is_leaf('orthopedics')
get_all_codes()
get_nearest_common_ancestor('orthopedics','prescription')
get_descendants("medicine")


['surgery', 'orthopedics', 'internal_medicine']

In [8]:
# levels_1 = set()

# for index, row in col2.iterrows():
#     section = row['Column2'][0]
#     level_1.add(section)
# print(level_1)

# all = []

# for e in level_1:
#     all.append([])

# print(all)

# for idx, e in enumerate(level_1):
#     exec("list_" + str(e) + " = []")

# ============
taxonomy = MedicalTaxonomy()
taxonomy.add_node(None, "origin")

sections_1 = set()
level_1 = []
i=-1

for index, row in col2.iterrows():
    section = row['Column2'][0]
    if section not in sections_1:
        sections_1.add(section)
        taxonomy.add_node("origin", f"Section {section}")
        level_1.append([])
        i = i + 1
        # exec("list_" + str(section) + " = []")
        # print(section)
    level_1[i].append(row['Column2'])

# level_1
print(taxonomy.get_children("origin"))
get_ancestors('Section 2')

# ===========

# for a in level_1:
#     # cat00.append(row['Column2'])
#     print(f"Section {a}")
#     # cat f'{a}' = []
#     a.append('yes')
#     print(a)
#     taxonomy.add_node("origin", f"Section {a}")

# print(taxonomy.get_children("origin"))
    

['Section 0', 'Section 1', 'Section 2', 'Section 3', 'Section 4', 'Section 5', 'Section 6', 'Section 7', 'Section 8', 'Section 9', 'Section B', 'Section C', 'Section D', 'Section F', 'Section G', 'Section H', 'Section X']


['origin']

In [9]:
level_1

[['0016070',
  '0016071',
  '0016072',
  '0016073',
  '0016074',
  '0016075',
  '0016076',
  '0016077',
  '0016078',
  '001607A',
  '001607B',
  '00160J0',
  '00160J1',
  '00160J2',
  '00160J3',
  '00160J4',
  '00160J5',
  '00160J6',
  '00160J7',
  '00160J8',
  '00160JA',
  '00160JB',
  '00160K0',
  '00160K1',
  '00160K2',
  '00160K3',
  '00160K4',
  '00160K5',
  '00160K6',
  '00160K7',
  '00160K8',
  '00160KA',
  '00160KB',
  '00160ZB',
  '0016370',
  '0016371',
  '0016372',
  '0016373',
  '0016374',
  '0016375',
  '0016376',
  '0016377',
  '0016378',
  '001637A',
  '001637B',
  '00163J0',
  '00163J1',
  '00163J2',
  '00163J3',
  '00163J4',
  '00163J5',
  '00163J6',
  '00163J7',
  '00163J8',
  '00163JA',
  '00163JB',
  '00163K0',
  '00163K1',
  '00163K2',
  '00163K3',
  '00163K4',
  '00163K5',
  '00163K6',
  '00163K7',
  '00163K8',
  '00163KA',
  '00163KB',
  '00163ZB',
  '0016470',
  '0016471',
  '0016472',
  '0016473',
  '0016474',
  '0016475',
  '0016476',
  '0016477',
  '0016478',

In [10]:
# sections_2 = set()
level_2_0 = []

for a in level_1:
    i=-1
    sections_2 = set()
    level_2 = []
    for b in a:
        section_1 = b[0]
        section_2 = b[1]
        if section_2 not in sections_2:
            sections_2.add(section_2)
            taxonomy.add_node(f"Section {section_1}", f"Section {section_1 + section_2}")
            level_2.append([])
            i = i + 1
            # exec("list_" + str(section) + " = []")
            # print(section)
        level_2[i].append(b)
    level_2_0.append(level_2)
    # print(level_2)

for n in sections_1:
    print(taxonomy.get_children(f"Section {n}"))

['Section 2W', 'Section 2Y']
['Section 5A']
['Section 00', 'Section 01', 'Section 02', 'Section 03', 'Section 04', 'Section 05', 'Section 06', 'Section 07', 'Section 08', 'Section 09', 'Section 0B', 'Section 0C', 'Section 0D', 'Section 0F', 'Section 0G', 'Section 0H', 'Section 0J', 'Section 0K', 'Section 0L', 'Section 0M', 'Section 0N', 'Section 0P', 'Section 0Q', 'Section 0R', 'Section 0S', 'Section 0T', 'Section 0U', 'Section 0V', 'Section 0W', 'Section 0X', 'Section 0Y']
['Section 30', 'Section 3C', 'Section 3E']
['Section B0', 'Section B2', 'Section B3', 'Section B4', 'Section B5', 'Section B7', 'Section B8', 'Section B9', 'Section BB', 'Section BD', 'Section BF', 'Section BG', 'Section BH', 'Section BL', 'Section BN', 'Section BP', 'Section BQ', 'Section BR', 'Section BT', 'Section BU', 'Section BV', 'Section BW', 'Section BY']
['Section GZ']
['Section C0', 'Section C2', 'Section C5', 'Section C7', 'Section C8', 'Section C9', 'Section CB', 'Section CD', 'Section CF', 'Section CG',

In [11]:
level_2_0
# get_ancestors('Section 8E')

[[['0016070',
   '0016071',
   '0016072',
   '0016073',
   '0016074',
   '0016075',
   '0016076',
   '0016077',
   '0016078',
   '001607A',
   '001607B',
   '00160J0',
   '00160J1',
   '00160J2',
   '00160J3',
   '00160J4',
   '00160J5',
   '00160J6',
   '00160J7',
   '00160J8',
   '00160JA',
   '00160JB',
   '00160K0',
   '00160K1',
   '00160K2',
   '00160K3',
   '00160K4',
   '00160K5',
   '00160K6',
   '00160K7',
   '00160K8',
   '00160KA',
   '00160KB',
   '00160ZB',
   '0016370',
   '0016371',
   '0016372',
   '0016373',
   '0016374',
   '0016375',
   '0016376',
   '0016377',
   '0016378',
   '001637A',
   '001637B',
   '00163J0',
   '00163J1',
   '00163J2',
   '00163J3',
   '00163J4',
   '00163J5',
   '00163J6',
   '00163J7',
   '00163J8',
   '00163JA',
   '00163JB',
   '00163K0',
   '00163K1',
   '00163K2',
   '00163K3',
   '00163K4',
   '00163K5',
   '00163K6',
   '00163K7',
   '00163K8',
   '00163KA',
   '00163KB',
   '00163ZB',
   '0016470',
   '0016471',
   '0016472',
   '00

In [12]:
level_3_0 = []

for a in level_2_0:
    for c in a:
        i=-1
        sections_3 = set()
        level_3 = []
        for b in c:
            section_1 = b[0]
            section_2 = b[1]
            section_3 = b[2]
            if section_3 not in sections_3:
                sections_3.add(section_3)
                taxonomy.add_node(f"Section {section_1 + section_2}", f"Section {section_1 + section_2 + section_3}")
                level_3.append([])
                i = i + 1
                # exec("list_" + str(section) + " = []")
                # print(section)
            level_3[i].append(b)
        level_3_0.append(level_3)


In [13]:
# get_children('Section X')
# get_children('Section XH')
level_3_0

[[['0016070',
   '0016071',
   '0016072',
   '0016073',
   '0016074',
   '0016075',
   '0016076',
   '0016077',
   '0016078',
   '001607A',
   '001607B',
   '00160J0',
   '00160J1',
   '00160J2',
   '00160J3',
   '00160J4',
   '00160J5',
   '00160J6',
   '00160J7',
   '00160J8',
   '00160JA',
   '00160JB',
   '00160K0',
   '00160K1',
   '00160K2',
   '00160K3',
   '00160K4',
   '00160K5',
   '00160K6',
   '00160K7',
   '00160K8',
   '00160KA',
   '00160KB',
   '00160ZB',
   '0016370',
   '0016371',
   '0016372',
   '0016373',
   '0016374',
   '0016375',
   '0016376',
   '0016377',
   '0016378',
   '001637A',
   '001637B',
   '00163J0',
   '00163J1',
   '00163J2',
   '00163J3',
   '00163J4',
   '00163J5',
   '00163J6',
   '00163J7',
   '00163J8',
   '00163JA',
   '00163JB',
   '00163K0',
   '00163K1',
   '00163K2',
   '00163K3',
   '00163K4',
   '00163K5',
   '00163K6',
   '00163K7',
   '00163K8',
   '00163KA',
   '00163KB',
   '00163ZB',
   '0016470',
   '0016471',
   '0016472',
   '00

In [14]:
level_4_0 = []

for a in level_3_0:
    for c in a:
        i=-1
        sections_4 = set()
        level_4 = []
        for b in c:
            section_1 = b[0]
            section_2 = b[1]
            section_3 = b[2]
            section_4 = b[3]
            if section_4 not in sections_4:
                sections_4.add(section_4)
                taxonomy.add_node(f"Section {section_1 + section_2 + section_3}", f"Section {section_1 + section_2 + section_3 + section_4}")
                level_4.append([])
                i = i + 1
                # exec("list_" + str(section) + " = []")
                # print(section)
            level_4[i].append(b)
        level_4_0.append(level_4)

In [15]:
get_children('Section 021')
# is_valid_item('0271')
# get_nearest_common_ancestor('Section 0270','Section 027X')

['Section 0210',
 'Section 0211',
 'Section 0212',
 'Section 0213',
 'Section 0216',
 'Section 0217',
 'Section 021K',
 'Section 021L',
 'Section 021P',
 'Section 021Q',
 'Section 021R',
 'Section 021V',
 'Section 021W',
 'Section 021X']

In [16]:
level_5_0 = []

for a in level_4_0:
    for c in a:
        i=-1
        sections_5 = set()
        level_5 = []
        for b in c:
            section_1 = b[0]
            section_2 = b[1]
            section_3 = b[2]
            section_4 = b[3]
            section_5 = b[4]
            if section_5 not in sections_5:
                sections_5.add(section_5)
                taxonomy.add_node(f"Section {section_1 + section_2 + section_3 + section_4}", f"Section {section_1 + section_2 + section_3 + section_4 + section_5}")
                level_5.append([])
                i = i + 1
                # exec("list_" + str(section) + " = []")
                # print(section)
            level_5[i].append(b)
        level_5_0.append(level_5)

In [17]:
get_children('Section 021W')

['Section 021W0', 'Section 021W4']

In [18]:
level_6_0 = []

for a in level_5_0:
    for c in a:
        i=-1
        sections_6 = set()
        level_6 = []
        for b in c:
            section_1 = b[0]
            section_2 = b[1]
            section_3 = b[2]
            section_4 = b[3]
            section_5 = b[4]
            section_6 = b[5]
            if section_6 not in sections_6:
                sections_6.add(section_6)
                taxonomy.add_node(f"Section {section_1 + section_2 + section_3 + section_4 + section_5}", f"Section {section_1 + section_2 + section_3 + section_4 + section_5 + section_6}")
                level_6.append([])
                i = i + 1
                # exec("list_" + str(section) + " = []")
                # print(section)
            level_6[i].append(b)
        level_6_0.append(level_6)

In [19]:
get_children('Section 021W4')

['Section 021W48',
 'Section 021W49',
 'Section 021W4A',
 'Section 021W4J',
 'Section 021W4K',
 'Section 021W4Z']

In [32]:
level_7_0 = []

for a in level_6_0:
    # a is list of lists containing the same digits from 1 to 5
    for c in a:
        #c is a list of codes containing the same digit in position 6
        i=-1
        sections_7 = set()
        level_7 = []
        for b in c:
            section_1 = b[0]
            section_2 = b[1]
            section_3 = b[2]
            section_4 = b[3]
            section_5 = b[4]
            section_6 = b[5]
            section_7 = b[6]
            if section_7 not in sections_7:
                sections_7.add(section_7)
                taxonomy.add_node(f"Section {section_1 + section_2 + section_3 + section_4 + section_5 + section_6}", b)
                level_7.append([])
                i = i + 1
                # exec("list_" + str(section) + " = []")
                # print(section)
            level_7[i].append(b)
        level_7_0.append(level_7)

In [135]:
get_children('Section 021W48')
get_nearest_common_ancestor('0QS734Z','0TTB4ZZ')

'Section 0'