In [1]:
import xml.etree.ElementTree as ET
import pandas as pd

In [2]:
df = pd.read_csv('./data/icd10pcs_order_20211.csv')
col2 = df[['Column2']]
col2
print(col2)
# df

        Column2
0      001     
1      0016070 
2      0016071 
3      0016072 
4      0016073 
...         ...
79021  XXE5XM5 
79022  XXE5XN6 
79023  XXEBXQ6 
79024  XY0     
79025  XY0VX83 

[79026 rows x 1 columns]


In [65]:
class MedicalTaxonomy:
    def __init__(self):
        self.taxonomy = {}

    def add_node(self, parent_id, code):
        if parent_id not in self.taxonomy:
            self.taxonomy[parent_id] = []
        self.taxonomy[parent_id].append(code)

    def is_valid_item(self, code):
        for children in self.taxonomy.values():
            for child in children:
                if child == code:
                    return True
        return False
    
    def is_leaf(self, code):
        if self.is_valid_item(code) != True:
            raise ValueError("The code \""+code+"\" does not exist.")
        if self.get_children(code) == []:
            return True
        else:
            return False
        
    def get_all_codes(self):
        all_codes_list = []
        for children in self.taxonomy.values():
            for child in children:
                all_codes_list.append(child)
        return all_codes_list           

    def get_children(self, code):
        if self.is_valid_item(code) != True:
            raise ValueError("The code \""+code+"\" does not exist.")
        return self.taxonomy.get(code, [])
    
    def get_ancestors(self, code):
        ancestors = []
        if self.is_valid_item(code) != True:
            raise ValueError("The code \""+code+"\" does not exist.")
        while code is not None:
            for parent_id, children in self.taxonomy.items():
                for child in children:
                    if child == code and parent_id != None:
                        ancestors.append(parent_id)
                        code = parent_id
                        break
                else:
                    continue  # This line ensures we keep searching in other branches.
                break  # Exit the inner loop if the node is found.
            else:
                # If the node has no parent, break the loop
                break
        return ancestors
    
    

# Usage example:
taxonomy = MedicalTaxonomy()
taxonomy.add_node(None, "origin")
taxonomy.add_node("origin", "medicine")
taxonomy.add_node("origin", "prescription")
taxonomy.add_node("medicine", "surgery")
taxonomy.add_node("medicine", "internal_medicine")
taxonomy.add_node("surgery", "orthopedics")

# print(taxonomy.get_children("origin"))
# print(taxonomy.get_children("medicine"))
# print(taxonomy.get_ancestors("orthopedics"))
# print(taxonomy.is_valid("medicine"))

def is_valid_item(code):
    return taxonomy.is_valid_item(code)

def is_leaf(code):
    return taxonomy.is_leaf(code)

def get_all_codes():
    return taxonomy.get_all_codes()

def get_children(code):
    # print(taxonomy.get_children(code))
    return taxonomy.get_children(code)

def get_ancestors(code):
    # print(taxonomy.get_ancestors(code))
    return taxonomy.get_ancestors(code)

get_children('origin')
get_ancestors('origin')
is_valid_item('orthopedics')
is_leaf('orthopedics')
get_all_codes()


['origin',
 'medicine',
 'prescription',
 'surgery',
 'internal_medicine',
 'orthopedics']

In [67]:
# levels_1 = set()

# for index, row in col2.iterrows():
#     section = row['Column2'][0]
#     level_1.add(section)
# print(level_1)

# all = []

# for e in level_1:
#     all.append([])

# print(all)

# for idx, e in enumerate(level_1):
#     exec("list_" + str(e) + " = []")

# ============
taxonomy = MedicalTaxonomy()
taxonomy.add_node(None, "origin")

sections_1 = set()
level_1 = []
i=-1

for index, row in col2.iterrows():
    section = row['Column2'][0]
    if section not in sections_1:
        sections_1.add(section)
        taxonomy.add_node("origin", f"Section {section}")
        level_1.append([])
        i = i + 1
        # exec("list_" + str(section) + " = []")
        # print(section)
    level_1[i].append(row['Column2'])

# level_1
print(taxonomy.get_children("origin"))
get_ancestors('Section 2')

# ===========

# for a in level_1:
#     # cat00.append(row['Column2'])
#     print(f"Section {a}")
#     # cat f'{a}' = []
#     a.append('yes')
#     print(a)
#     taxonomy.add_node("origin", f"Section {a}")

# print(taxonomy.get_children("origin"))
    

['Section 0', 'Section 1', 'Section 2', 'Section 3', 'Section 4', 'Section 5', 'Section 6', 'Section 7', 'Section 8', 'Section 9', 'Section B', 'Section C', 'Section D', 'Section F', 'Section G', 'Section H', 'Section X']


['origin']

In [None]:
level_1

In [68]:
# sections_2 = set()
level_2_0 = []

for a in level_1:
    i=-1
    sections_2 = set()
    level_2 = []
    for b in a:
        section_1 = b[0]
        section_2 = b[1]
        if section_2 not in sections_2:
            sections_2.add(section_2)
            taxonomy.add_node(f"Section {section_1}", f"Section {section_1 + section_2}")
            level_2.append([])
            i = i + 1
            # exec("list_" + str(section) + " = []")
            # print(section)
        level_2[i].append(b)
    level_2_0.append(level_2)
    # print(level_2)

for n in sections_1:
    print(taxonomy.get_children(f"Section {n}"))

['Section X2', 'Section XH', 'Section XK', 'Section XN', 'Section XR', 'Section XT', 'Section XV', 'Section XW', 'Section XX', 'Section XY']
['Section 2W', 'Section 2Y']
['Section B0', 'Section B2', 'Section B3', 'Section B4', 'Section B5', 'Section B7', 'Section B8', 'Section B9', 'Section BB', 'Section BD', 'Section BF', 'Section BG', 'Section BH', 'Section BL', 'Section BN', 'Section BP', 'Section BQ', 'Section BR', 'Section BT', 'Section BU', 'Section BV', 'Section BW', 'Section BY']
['Section 30', 'Section 3C', 'Section 3E']
['Section F0', 'Section F1']
['Section HZ']
['Section 7W']
['Section 9W']
['Section 4A', 'Section 4B']
['Section GZ']
['Section 6A']
['Section C0', 'Section C2', 'Section C5', 'Section C7', 'Section C8', 'Section C9', 'Section CB', 'Section CD', 'Section CF', 'Section CG', 'Section CH', 'Section CP', 'Section CT', 'Section CV', 'Section CW']
['Section D0', 'Section D7', 'Section D8', 'Section D9', 'Section DB', 'Section DD', 'Section DF', 'Section DG', 'Sectio

In [69]:
# level_2_0
get_ancestors('Section 8E')

['Section 8', 'origin']

In [195]:

for a in level_2:
    i=-1
    sections_3 = set()
    level_3 = []
    for b in a:
        section_1 = b[0]
        section_2 = b[1]
        section_3 = b[2]
        if section_3 not in sections_3:
            sections_3.add(section_3)
            taxonomy.add_node(f"Section {section_1 + section_2}", f"Section {section_1 + section_2 + section_3}")
            level_3.append([])
            i = i + 1
            # exec("list_" + str(section) + " = []")
            # print(section)
        level_3[i].append(b)
    # print(level_2)

    print(taxonomy.get_children(f"Section 00"))

[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
