In [3]:
import pandas as pd
import re

# Path to SNOMED codes (all codes with code history)
snomed_path = "C:/Users/chris/OneDrive/Dokumenter/SDU/Master's Thesis Project/SNOMED/patoSnoMed_2025-04.xlsx"

# Load the Excel file
xls_snomed = pd.read_excel(snomed_path)

# Check column names
print(f"Columns: {xls_snomed.columns.tolist()}")

Columns: ['Art', 'SKSkode', 'DatoFra', 'DatoÆndring', 'DatoTil', 'Kodetekst', 'AGrp', 'BGrp', 'CGrp', 'DGrp', 'EGrp', 'AVal', 'BVal', 'CVal', 'DVal', 'Inklusion', 'EkstraReg', 'Tags', 'Farvekode', 'Fuldtekst']


In [8]:
# Data Frame with relevant columns
df_snomed = pd.DataFrame(xls_snomed, columns=['SKSkode', 'DatoFra', 'DatoÆndring', 'DatoTil', 'Kodetekst', 'Fuldtekst'])

print("Shape of data frame: ", df_snomed.shape)
print("\nHead of data frame: \n", df_snomed.head())


Shape of data frame:  (20019, 6)

Head of data frame: 
   SKSkode   DatoFra  DatoÆndring   DatoTil  \
0  EYYY00  19400101     19400101  20000101   
1  F00150  19400101     19400101  25000101   
2  F01050  19400101     19400101  25000101   
3  F01051  19400101     20091218  20091231   
4  F01051  20100101     20150921  20150930   

                              Kodetekst                             Fuldtekst  
0  udgået (forkert oprettet, se ÆYYY00)  udgået (forkert oprettet, se ÆYYY00)  
1                               kakeksi                               kakeksi  
2                 postoperativ tilstand                 postoperativ tilstand  
3             komplet mesorektal fascie             komplet mesorektal fascie  
4                      mesorektalt plan                      mesorektalt plan  


In [9]:
# Get unique code prefixes
prefixes = df_snomed['SKSkode'].str[0].value_counts()
print("Unique code prefixes in SKSkode: \n", prefixes)


Unique code prefixes in SKSkode: 
 SKSkode
M    7834
Æ    5169
F    3719
T    2289
S     605
P     366
J      36
E       1
Name: count, dtype: int64


In [10]:
# Filter for T and M codes
t_codes = df_snomed[df_snomed['SKSkode'].str.startswith('T')]
m_codes = df_snomed[df_snomed['SKSkode'].str.startswith('M')]

print(f"\nNumber of T (topography) codes: {len(t_codes)}")
print(f"Number of M (morphology) codes: {len(m_codes)}")


Number of T (topography) codes: 2289
Number of M (morphology) codes: 7834


In [11]:
print("\nSample T codes:")
print(t_codes[['SKSkode', 'Kodetekst']].head())

print("\nSample M codes:")
print(m_codes[['SKSkode', 'Kodetekst']].head())


Sample T codes:
      SKSkode                    Kodetekst
12561  T00001             Topografi ukendt
12562  T00002  Topografi kan ikke anvendes
12563  T00003       Topografi ikke anvendt
12564  T00010           Legemet som helhed
12565  T00020      Multiple lokalisationer

Sample M codes:
     SKSkode                                    Kodetekst
3756  M00010                             ukendt morfologi
3757  M00015                          uafklaret morfologi
3758  M00020                morfologiakse ikke anvendelig
3759  M00030  forandring ikke tildelt morfologikodenummer
3760  M00040                uopklaret, 'mystisk' tilfælde


In [15]:
# Identify hierarchical categories
# M codes
print("M codes - analyzing structure for hierarchy:")
print("\nFirst 50 M codes to identify patterns:")
m_sample = m_codes[['SKSkode', 'Kodetekst']].head(50)
for idx, row in m_sample.iterrows():
    print(f"{row['SKSkode']}: {row['Kodetekst']}")


M codes - analyzing structure for hierarchy:

First 50 M codes to identify patterns:
M00010: ukendt morfologi
M00015: uafklaret morfologi
M00020: morfologiakse ikke anvendelig
M00030: forandring ikke tildelt morfologikodenummer
M00040: uopklaret, 'mystisk' tilfælde
M00050: M-akse ikke anvendt ved konvertering 1978
M00100: normalt væv
M00120: normale celler
M00121: normale celler, ingen endocervikale el. metaplastiske celler
M00122: normale celler, 50-75% af epitelcellerne kan ikke typebestem
M00140: normal struktur
M00150: normal karyotype
M00410: normal cellularitet
M01000: abnormt væv
M01030: abnorm morfologi
M01040: abnorm struktur
M01050: abnormt epitel
M01060: abnorm modning
M01090: atypisk histologisk forandring
M01111: uspecifik reaktiv forandring
M01500: forsnævring
M01520: obliteration
M02010: abnorm form
M02500: abnorm størrelse
M02510: abnormt stor størrelse
M02520: abnormt lille størrelse
M02553: øget diameter
M02554: nedsat diameter
M02561: abnorm forekomst af normale cell

In [16]:
print("Analyzing M code ranges:")
print("\n1. Normal tissue codes (M001xx range):")
normal_codes = m_codes[m_codes['SKSkode'].str.match(r'M001\d{2}')][['SKSkode', 'Kodetekst']].head(20)
for idx, row in normal_codes.iterrows():
    print(f"{row['SKSkode']}: {row['Kodetekst']}")

print("\n2. Abnormal/pathological codes (M01xxx range):")
abnormal_codes = m_codes[m_codes['SKSkode'].str.match(r'M01\d{3}')][['SKSkode', 'Kodetekst']].head(20)
for idx, row in abnormal_codes.iterrows():
    print(f"{row['SKSkode']}: {row['Kodetekst']}")

print("\n3. Looking for malignant codes (higher ranges):")
# Check higher number ranges that typically contain malignant codes
high_range_sample = m_codes[m_codes['SKSkode'].str.match(r'M8\d{4}|M9\d{4}')][['SKSkode', 'Kodetekst']].head(20)
print(f"Found {len(high_range_sample)} codes in M8xxxx-M9xxxx range (typical malignant range)")
for idx, row in high_range_sample.iterrows():
    print(f"{row['SKSkode']}: {row['Kodetekst']}")


Analyzing M code ranges:

1. Normal tissue codes (M001xx range):
M00100: normalt væv
M00120: normale celler
M00121: normale celler, ingen endocervikale el. metaplastiske celler
M00122: normale celler, 50-75% af epitelcellerne kan ikke typebestem
M00140: normal struktur
M00150: normal karyotype

2. Abnormal/pathological codes (M01xxx range):
M01000: abnormt væv
M01030: abnorm morfologi
M01040: abnorm struktur
M01050: abnormt epitel
M01060: abnorm modning
M01090: atypisk histologisk forandring
M01111: uspecifik reaktiv forandring
M01500: forsnævring
M01520: obliteration

3. Looking for malignant codes (higher ranges):
Found 20 codes in M8xxxx-M9xxxx range (typical malignant range)
M80000: benign tumor
M80001: tumor
M80001: tumor, usikkert om benign eller malign
M80003: malign tumor
M80004: tumor, direkte spredning
M80005: mikroinvasiv tumor
M80006: metastase, malign tumor
M80009: malign tumor, usikkert om primær eller metastase
M80010: benigne tumorceller
M80011: tumorceller
M80013: mali

In [17]:
# Search for key hierarchical categories based on SNOMED structure
# Search for benign, malignant, and other key categories

print("Searching for key morphological categories:")

print("\n1. BENIGN TUMORS:")
benign_codes = m_codes[m_codes['Kodetekst'].str.contains('benign', case=False, na=False)][['SKSkode', 'Kodetekst']].head(15)
for idx, row in benign_codes.iterrows():
    print(f"{row['SKSkode']}: {row['Kodetekst']}")

print("\n2. MALIGNANT TUMORS:")
malignant_codes = m_codes[m_codes['Kodetekst'].str.contains('malign', case=False, na=False)][['SKSkode', 'Kodetekst']].head(15)
for idx, row in malignant_codes.iterrows():
    print(f"{row['SKSkode']}: {row['Kodetekst']}")

print("\n3. CARCINOMA:")
carcinoma_codes = m_codes[m_codes['Kodetekst'].str.contains('karcinom|carcinoma', case=False, na=False)][['SKSkode', 'Kodetekst']].head(10)
for idx, row in carcinoma_codes.iterrows():
    print(f"{row['SKSkode']}: {row['Kodetekst']}")

print("\n4. IN SITU:")
insitu_codes = m_codes[m_codes['Kodetekst'].str.contains('in situ', case=False, na=False)][['SKSkode', 'Kodetekst']].head(10)
for idx, row in insitu_codes.iterrows():
    print(f"{row['SKSkode']}: {row['Kodetekst']}")




Searching for key morphological categories:

1. BENIGN TUMORS:
M47250: Jessners benigne lymfocytinfiltration
M72210: Spiegler-Fendts benigne lymfocytom
M72240: benign lymfoepitelial læsion
M72260: benign angiofollikulær hyperplasi
M74480: hereditær benign intraepitelial dyskeratose
M80000: benign tumor
M80001: tumor, usikkert om benign eller malign
M8000S: spredning fra benign tumor
M80010: benigne tumorceller
M80100: benign epitelial tumor
M81401: adenom, usikkert om benignt eller malignt
M83301: follikulær neoplasi, uvist om benign eller malign
M83701: adrenokortikal neoplasi, uvist om benign eller malign
M83900: benign adnextumor
M83900: adnekstumor, benign

2. MALIGNANT TUMORS:
M09450: ingen tegn på malignitet
M09462: ingen malignitetssuspekte celler
M09463: ingen maligne celler
M67102: urotelceller uden høj malignitetsgrad
M67104: urotelceller suspekt for høj malignitetsgrad
M67105: urotelceller med høj malignitetsgrad
M67106: ikke-uroteliale maligne tumorceller
M69760: malignitet

In [None]:
# T codes - topographical hierarchy
print("Analyzing T (Topography) codes for hierarchical structure:")

print("\n1. First 30 T codes to see the general structure:")
t_sample = t_codes[['SKSkode', 'Kodetekst']].head(30)
for idx, row in t_sample.iterrows():
    print(f"{row['SKSkode']}: {row['Kodetekst']}")

print("\n2. Looking for major anatomical categories:")
# Look for main organ systems
major_organs = t_codes[t_codes['SKSkode'].str.match(r'T[0-4]\d{4}')][['SKSkode', 'Kodetekst']].head(30)
print(f"\nMajor organ system codes (T0xxxx-T4xxxx range):")
for idx, row in major_organs.iterrows():
    print(f"{row['SKSkode']}: {row['Kodetekst']}")

Analyzing T (Topography) codes for hierarchical structure:

1. First 30 T codes to see the general structure:
T00001: Topografi ukendt
T00002: Topografi kan ikke anvendes
T00003: Topografi ikke anvendt
T00010: Legemet som helhed
T00020: Multiple lokalisationer
T00050: T-akse ikke anvendt ved konvertering 1978
T00100: Resektionslinie
T00100: Resektionsrand
T00101: Resektionsrand, oral
T00102: Resektionsrand, anal
T00103: Resektionsrand, distal
T00104: Resektionsrand, proksimal
T00105: Resektionsrand, kraniel
T00106: Resektionsrand, kaudal
T00107: Resektionsrand, ventral
T00108: Resektionsrand, dorsal
T00109: Resektionsrand, posterior
T00110: Resektionsrand, anterior
T00111: Sideresektionsrand
T00112: Resektionsrand, profund
T00260: Pladeepitel
T00460: Cylinderepitel
T00600: Ukendt primærtumorfokus
T00XX0: Hunligt individ
T00XY0: Hanligt individ
T01000: Hud
T01100: Epidermis
T01110: Stratum corneum
T01151: Melanocyt
T01152: Keratinocyt

2. Looking for major anatomical categories:

Major 

In [22]:
import re

def analyze_hierarchy():
    print("="*60)
    print("DANISH SNOMED HIERARCHY ANALYSIS - DENMARK 2012-2013")
    print("="*60)
    
    # Morphology (M) code hierarchy
    print("\nMORPHOLOGY (M) CODE HIERARCHY:")
    print("-" * 40)
    
    # Major categories based on code ranges
    categories = {
        "Unknown/Unspecified": r"M000\d{2}",
        "Normal Tissue": r"M001\d{2}",
        "Abnormal/Pathological Changes": r"M01\d{3}",
        "Material Quality Issues": r"M09\d{3}",
        "Inflammatory/Reactive": r"M[4-6]\d{4}",
        "Neoplastic/Tumorous": r"M8\d{4}|M9\d{4}"
    }
    
    for category, pattern in categories.items():
        matches = m_codes[m_codes['SKSkode'].str.match(pattern)]
        print(f"\n{category} ({len(matches)} codes):")
        
        # Show sample codes for each category
        sample = matches[['SKSkode', 'Kodetekst']].head(8)
        for idx, row in sample.iterrows():
            print(f"  {row['SKSkode']}: {row['Kodetekst']}")
        if len(matches) > 8:
            print(f"  ... and {len(matches) - 8} more codes")
    
    print("\n" + "="*60)
    print("TOPOGRAPHY (T) CODE HIERARCHY:")
    print("-" * 40)
    
    # Topography major anatomical systems
    anatomical_systems = {}
    
    # Group T codes by first 2-3 digits to identify major anatomical regions
    for idx, row in t_codes.iterrows():
        code = row['SKSkode']
        text = row['Kodetekst']
        
        if len(code) >= 4:
            prefix = code[:4]  # First 4 characters like T010, T020, etc.
            
            if prefix not in anatomical_systems:
                anatomical_systems[prefix] = []
            anatomical_systems[prefix].append((code, text))
    
    # Sort and display major anatomical regions
    major_regions = {}
    for prefix in sorted(anatomical_systems.keys()):
        region_codes = anatomical_systems[prefix]
        if len(region_codes) > 3:  # Only show regions with multiple codes
            # Try to identify the main region name from first code
            first_code_text = region_codes[0][1]
            major_regions[prefix] = (first_code_text, len(region_codes))
    
    print("\nMajor Anatomical Regions:")
    for prefix in sorted(major_regions.keys())[:15]:  # Show first 15 major regions
        region_name, count = major_regions[prefix]
        print(f"  {prefix}xxx: {region_name} ({count} codes)")
    
    return categories

# Run the analysis
hierarchy_categories = analyze_hierarchy()



DANISH SNOMED HIERARCHY ANALYSIS - DENMARK 2012-2013

MORPHOLOGY (M) CODE HIERARCHY:
----------------------------------------

Unknown/Unspecified (6 codes):
  M00010: ukendt morfologi
  M00015: uafklaret morfologi
  M00020: morfologiakse ikke anvendelig
  M00030: forandring ikke tildelt morfologikodenummer
  M00040: uopklaret, 'mystisk' tilfælde
  M00050: M-akse ikke anvendt ved konvertering 1978

Normal Tissue (6 codes):
  M00100: normalt væv
  M00120: normale celler
  M00121: normale celler, ingen endocervikale el. metaplastiske celler
  M00122: normale celler, 50-75% af epitelcellerne kan ikke typebestem
  M00140: normal struktur
  M00150: normal karyotype

Abnormal/Pathological Changes (9 codes):
  M01000: abnormt væv
  M01030: abnorm morfologi
  M01040: abnorm struktur
  M01050: abnormt epitel
  M01060: abnorm modning
  M01090: atypisk histologisk forandring
  M01111: uspecifik reaktiv forandring
  M01500: forsnævring
  ... and 1 more codes

Material Quality Issues (84 codes):
  

In [23]:
# Let's create a more detailed hierarchical structure focusing on the main categories
# particularly looking at malignant vs benign classifications

def create_detailed_hierarchy():
    print("DETAILED MORPHOLOGY HIERARCHY - DANISH SNOMED 2012-2013")
    print("=" * 65)
    
    # Define the main hierarchical structure based on SNOMED conventions
    hierarchy = {
        "1. NORMAL AND REACTIVE": {
            "1.1 Normal Tissue": [],
            "1.2 Unknown/Unspecified": [],
            "1.3 Abnormal but Non-neoplastic": [],
            "1.4 Material Issues": []
        },
        "2. INFLAMMATORY AND REACTIVE": {
            "2.1 Inflammatory Conditions": [],
            "2.2 Reactive Changes": [],
            "2.3 Infectious Conditions": []
        },
        "3. NEOPLASTIC CONDITIONS": {
            "3.1 Benign Tumors": [],
            "3.2 Malignant Tumors": [],
            "3.3 In Situ Carcinomas": [],
            "3.4 Uncertain Behavior": []
        }
    }
    
    # Categorize M codes based on content analysis
    for idx, row in m_codes.iterrows():
        code = row['SKSkode']
        text = row['Kodetekst'].lower()
        
        # Normal tissue categories
        if any(word in text for word in ['normalt væv', 'normale celler', 'normal struktur', 'normal karyotype']):
            hierarchy["1. NORMAL AND REACTIVE"]["1.1 Normal Tissue"].append((code, row['Kodetekst']))
        elif any(word in text for word in ['ukendt', 'uafklaret', 'ikke anvendelig']):
            hierarchy["1. NORMAL AND REACTIVE"]["1.2 Unknown/Unspecified"].append((code, row['Kodetekst']))
        elif any(word in text for word in ['abnormt væv', 'abnorm', 'atypisk']) and 'tumor' not in text:
            hierarchy["1. NORMAL AND REACTIVE"]["1.3 Abnormal but Non-neoplastic"].append((code, row['Kodetekst']))
        elif any(word in text for word in ['materiale', 'uegnet', 'for lidt']):
            hierarchy["1. NORMAL AND REACTIVE"]["1.4 Material Issues"].append((code, row['Kodetekst']))
        
        # Inflammatory and reactive
        elif any(word in text for word in ['inflammation', 'inflammatorisk', 'reaktiv']):
            hierarchy["2. INFLAMMATORY AND REACTIVE"]["2.1 Inflammatory Conditions"].append((code, row['Kodetekst']))
        elif any(word in text for word in ['hyperplasi', 'metaplasi', 'dysplasi']):
            hierarchy["2. INFLAMMATORY AND REACTIVE"]["2.2 Reactive Changes"].append((code, row['Kodetekst']))
        
        # Neoplastic conditions
        elif any(word in text for word in ['benign tumor', 'benignt', 'benigne']):
            hierarchy["3. NEOPLASTIC CONDITIONS"]["3.1 Benign Tumors"].append((code, row['Kodetekst']))
        elif any(word in text for word in ['malign tumor', 'maligne', 'karcinom', 'carcinoma', 'sarkom']) and 'in situ' not in text:
            hierarchy["3. NEOPLASTIC CONDITIONS"]["3.2 Malignant Tumors"].append((code, row['Kodetekst']))
        elif 'in situ' in text:
            hierarchy["3. NEOPLASTIC CONDITIONS"]["3.3 In Situ Carcinomas"].append((code, row['Kodetekst']))
        elif any(word in text for word in ['usikkert', 'uvist', 'tvivlsom']) and 'tumor' in text:
            hierarchy["3. NEOPLASTIC CONDITIONS"]["3.4 Uncertain Behavior"].append((code, row['Kodetekst']))
    
    # Display the hierarchy
    for main_category, subcategories in hierarchy.items():
        print(f"\n{main_category}")
        print("-" * (len(main_category) + 5))
        
        for subcategory, codes in subcategories.items():
            if codes:  # Only show categories that have codes
                print(f"\n  {subcategory} ({len(codes)} codes):")
                # Show first 5 examples of each subcategory
                for code, text in codes[:5]:
                    print(f"    {code}: {text}")
                if len(codes) > 5:
                    print(f"    ... and {len(codes) - 5} more codes")
    
    return hierarchy

# Create the detailed hierarchy
detailed_hierarchy = create_detailed_hierarchy()


DETAILED MORPHOLOGY HIERARCHY - DANISH SNOMED 2012-2013

1. NORMAL AND REACTIVE
---------------------------

  1.1 Normal Tissue (9 codes):
    M00100: normalt væv
    M00120: normale celler
    M00121: normale celler, ingen endocervikale el. metaplastiske celler
    M00122: normale celler, 50-75% af epitelcellerne kan ikke typebestem
    M00140: normal struktur
    ... and 4 more codes

  1.2 Unknown/Unspecified (6 codes):
    M00010: ukendt morfologi
    M00015: uafklaret morfologi
    M00020: morfologiakse ikke anvendelig
    M67014: ASCUS - atypiske pladeepitelceller af ukendt betydning
    M98600: klonal hæmatopoiese med ukendt potentiale
    ... and 1 more codes

  1.3 Abnormal but Non-neoplastic (94 codes):
    M01000: abnormt væv
    M01030: abnorm morfologi
    M01040: abnorm struktur
    M01050: abnormt epitel
    M01060: abnorm modning
    ... and 89 more codes

  1.4 Material Issues (42 codes):
    M09000: for lidt materiale til diagnostisk vurdering
    M09001: for lidt ma

In [24]:
# Now let's create a comprehensive topography hierarchy
def create_topography_hierarchy():
    print("\nTOPOGRAPHY (T) CODE HIERARCHY - DANISH SNOMED 2012-2013")
    print("=" * 65)
    
    # Analyze T codes to create anatomical hierarchy
    anatomical_hierarchy = {}
    
    # Group by major anatomical regions
    for idx, row in t_codes.iterrows():
        code = row['SKSkode']
        text = row['Kodetekst'].lower()
        
        # Extract the major region from the code structure
        if len(code) >= 4:
            major_region = code[:3]  # First 3 characters like T01, T02, etc.
            
            if major_region not in anatomical_hierarchy:
                anatomical_hierarchy[major_region] = []
            
            anatomical_hierarchy[major_region].append((code, row['Kodetekst']))
    
    # Create meaningful region names based on common patterns
    region_names = {
        'T00': "General/Unknown Topography",
        'T01': "Skin and Subcutaneous Tissue", 
        'T02': "Skin by Location",
        'T04': "Breast",
        'T06': "Bone Marrow and Hematopoietic",
        'T07': "Lymph Nodes",
        'T08': "Bone and Joints",
        'T09': "Central Nervous System",
        'T11': "Nose and Sinuses",
        'T12': "Larynx",
        'T13': "Trachea and Bronchi",
        'T14': "Lung",
        'T15': "Esophagus",
        'T16': "Stomach",
        'T17': "Small Intestine",
        'T18': "Colon",
        'T19': "Rectum",
        'T20': "Liver",
        'T22': "Pancreas",
        'T23': "Kidney",
        'T25': "Bladder",
        'T28': "Male Genital Organs",
        'T29': "Prostate",
        'T30': "Testis",
        'T31': "Female Genital Organs",
        'T33': "Ovary",
        'T34': "Uterus",
        'T35': "Cervix",
        'T36': "Vagina and Vulva",
        'T40': "Thyroid",
        'T41': "Adrenal Gland",
        'T42': "Pituitary",
        'T45': "Eye",
        'T46': "Ear"
    }
    
    # Display the topography hierarchy
    print("\nMAJOR ANATOMICAL REGIONS:")
    print("-" * 30)
    
    sorted_regions = sorted(anatomical_hierarchy.keys())
    
    for region in sorted_regions[:25]:  # Show first 25 regions
        if region in region_names:
            region_name = region_names[region]
        else:
            # Try to infer name from first code in the region
            first_code_text = anatomical_hierarchy[region][0][1] if anatomical_hierarchy[region] else "Unknown"
            region_name = first_code_text
        
        count = len(anatomical_hierarchy[region])
        print(f"\n{region}xxx: {region_name} ({count} codes)")
        
        # Show a few examples
        examples = anatomical_hierarchy[region][:4]
        for code, text in examples:
            print(f"    {code}: {text}")
        
        if count > 4:
            print(f"    ... and {count - 4} more codes")

create_topography_hierarchy()




TOPOGRAPHY (T) CODE HIERARCHY - DANISH SNOMED 2012-2013

MAJOR ANATOMICAL REGIONS:
------------------------------

T00xxx: General/Unknown Topography (25 codes)
    T00001: Topografi ukendt
    T00002: Topografi kan ikke anvendes
    T00003: Topografi ikke anvendt
    T00010: Legemet som helhed
    ... and 21 more codes

T01xxx: Skin and Subcutaneous Tissue (25 codes)
    T01000: Hud
    T01100: Epidermis
    T01110: Stratum corneum
    T01151: Melanocyt
    ... and 21 more codes

T02xxx: Skin by Location (172 codes)
    T02010: Hud inkl. subcutis
    T02020: Hud ekskl. subcutis
    T02100: Hud på hoved
    T02102: Hud på skalp
    ... and 168 more codes

T03xxx: Subcutis (13 codes)
    T03000: Subcutis
    T03010: Retinaculum cutis
    T03020: Panniculus adiposus
    T03100: Subcutis på hoved
    ... and 9 more codes

T04xxx: Breast (26 codes)
    T04000: Mamma
    T04001: Højre mamma
    T04002: Venstre mamma
    T04010: Mamma feminina
    ... and 22 more codes

T05xxx: Hæmatopoieti

In [None]:
# Create a final comprehensive summary report as CSV
def create_hierarchy_summary():
    
    # Create morphology hierarchy summary
    morphology_data = []
    
    # Main categories with their code ranges and descriptions
    morphology_categories = [
        {
            "Level": 1,
            "Category": "Normal and Unspecified",
            "Code_Range": "M000xx-M001xx", 
            "Description": "Normal tissue, unknown morphology, material issues",
            "Examples": "M00100 (normalt væv), M00010 (ukendt morfologi)"
        },
        {
            "Level": 1,
            "Category": "Abnormal Non-neoplastic", 
            "Code_Range": "M01xxx-M03xxx",
            "Description": "Abnormal tissue changes, not neoplastic",
            "Examples": "M01000 (abnormt væv), M01090 (atypisk histologisk forandring)"
        },
        {
            "Level": 1,
            "Category": "Inflammatory/Reactive",
            "Code_Range": "M4xxxx-M6xxxx", 
            "Description": "Inflammatory conditions, reactive changes, hyperplasia",
            "Examples": "M40000 (inflammation), M22430 (tanatofor dysplasi)"
        },
        {
            "Level": 1,
            "Category": "Neoplastic Conditions",
            "Code_Range": "M8xxxx-M9xxxx",
            "Description": "All tumor-related morphologies",
            "Examples": "M80000 (benign tumor), M80003 (malign tumor)"
        },
        {
            "Level": 2,
            "Category": "Benign Tumors",
            "Code_Range": "M8xxxx",
            "Description": "Non-malignant neoplasms",
            "Examples": "M80000 (benign tumor), M80010 (benigne tumorceller)"
        },
        {
            "Level": 2,
            "Category": "Malignant Tumors", 
            "Code_Range": "M8xxxx-M9xxxx",
            "Description": "Malignant neoplasms, carcinomas, sarcomas",
            "Examples": "M80003 (malign tumor), M80103 (karcinom)"
        },
        {
            "Level": 2,
            "Category": "In Situ Carcinomas",
            "Code_Range": "M8xxxx", 
            "Description": "Non-invasive malignant epithelial tumors",
            "Examples": "M80102 (karcinom in situ), M81402 (adenokarcinom in situ)"
        },
        {
            "Level": 2,
            "Category": "Uncertain Behavior",
            "Code_Range": "M8xxxx",
            "Description": "Tumors of uncertain malignant potential",
            "Examples": "M80001 (tumor, usikkert om benign eller malign)"
        }
    ]
    
    # Create topography hierarchy summary  
    topography_categories = [
        {
            "Level": 1,
            "Category": "General/Unknown",
            "Code_Range": "T00xxx",
            "Description": "Unknown topography, general body regions",
            "Examples": "T00001 (Topografi ukendt), T00010 (Legemet som helhed)"
        },
        {
            "Level": 1, 
            "Category": "Skin and Integument",
            "Code_Range": "T01xxx-T04xxx",
            "Description": "Skin, subcutaneous tissue, breast",
            "Examples": "T01000 (Hud), T04000 (Mamma)"
        },
        {
            "Level": 1,
            "Category": "Hematopoietic System", 
            "Code_Range": "T05xxx-T09xxx",
            "Description": "Blood, bone marrow, lymph nodes, spleen",
            "Examples": "T06000 (Knoglemarv), T08000 (Lymfeknude)"
        },
        {
            "Level": 1,
            "Category": "Musculoskeletal System",
            "Code_Range": "T10xxx-T18xxx", 
            "Description": "Bones, joints, muscles, soft tissue",
            "Examples": "T10020 (Knogle), T13000 (Skeletmuskulatur)"
        },
        {
            "Level": 1,
            "Category": "Respiratory System",
            "Code_Range": "T20xxx-T24xxx",
            "Description": "Nose, sinuses, larynx, trachea, lungs", 
            "Examples": "T21000 (Næse), T22000 (Bihule)"
        },
        {
            "Level": 1,
            "Category": "Digestive System",
            "Code_Range": "T25xxx-T32xxx",
            "Description": "Oral cavity, esophagus, stomach, intestines, liver",
            "Examples": "T25000 (Mundslimhinde), T30000 (Spiserør)"
        },
        {
            "Level": 1, 
            "Category": "Urogenital System",
            "Code_Range": "T60xxx-T75xxx",
            "Description": "Kidneys, bladder, male/female genital organs",
            "Examples": "T62000 (Nyre), T71000 (Penis)"
        },
        {
            "Level": 1,
            "Category": "Endocrine System", 
            "Code_Range": "T76xxx-T79xxx",
            "Description": "Thyroid, adrenals, pituitary, endocrine glands",
            "Examples": "T77000 (Thyreoidea), T79000 (Binyre)"
        },
        {
            "Level": 1,
            "Category": "Nervous System",
            "Code_Range": "T80xxx-T83xxx", 
            "Description": "Brain, spinal cord, peripheral nerves",
            "Examples": "T80000 (Hjerne), T82000 (Rygmarv)"
        },
        {
            "Level": 1,
            "Category": "Special Senses",
            "Code_Range": "T84xxx-T87xxx",
            "Description": "Eye, ear, organs of special senses", 
            "Examples": "T84000 (Øje), T86000 (Øre)"
        }
    ]
    
    # Convert to DataFrames
    morphology_df = pd.DataFrame(morphology_categories)
    topography_df = pd.DataFrame(topography_categories)
    
    print("DANISH SNOMED HIERARCHY SUMMARY - 2012-2013")
    print("=" * 60)
    print("\nMORPHOLOGY (M) CODE HIERARCHY:")
    print(morphology_df.to_string(index=False))
    
    print("\n" + "=" * 60)
    print("\nTOPOGRAPHY (T) CODE HIERARCHY:")  
    print(topography_df.to_string(index=False))
    
    return morphology_df, topography_df

# Create the summary
morph_df, topo_df = create_hierarchy_summary()



