In [None]:
import os
import pandas as pd
from lxml import etree

# Load and parse the XML file
with open('./resources/Patient-502.xml', 'rb') as file:
    tree = etree.parse(file)

# Define namespaces to use when searching
ns = {'hl7': 'urn:hl7-org:v3'}

In [None]:
gender_code = tree.find('.//hl7:administrativeGenderCode', namespaces=ns)
gender = gender_code.get('displayName') if gender_code is not None else 'Unknown'

# Extract race
race_code = tree.find('.//hl7:raceCode', namespaces=ns)
race = race_code.get('displayName') if race_code is not None else 'Unknown'

# Extract ethnicity
ethnicity_code = tree.find('.//hl7:ethnicGroupCode', namespaces=ns)
ethnicity = ethnicity_code.get('displayName') if ethnicity_code is not None else 'Unknown'

# Print the results
print(f'Gender: {gender}')
print(f'Race: {race}')
print(f'Ethnicity: {ethnicity}')

In [None]:
import os
import pandas as pd
from lxml import etree
from collections import defaultdict

# Directory containing the XML files
directory_path = './resources/'

# Initialize counters for each category
gender_count = defaultdict(int)
race_count = defaultdict(int)
ethnicity_count = defaultdict(int)

# Define namespaces to use when searching
ns = {'hl7': 'urn:hl7-org:v3'}

# Iterate over all XML files in the directory
for filename in os.listdir(directory_path):
    if filename.endswith('.xml'):
        file_path = os.path.join(directory_path, filename)

        # Load and parse the XML file
        try:
            with open(file_path, 'rb') as file:
                tree = etree.parse(file)

            # Extract administrative gender
            gender_code = tree.find('.//hl7:administrativeGenderCode', namespaces=ns)
            gender = gender_code.get('displayName') if gender_code is not None else 'Unknown'
            gender_count[gender] += 1

            # Extract race
            race_code = tree.find('.//hl7:raceCode', namespaces=ns)
            race = race_code.get('displayName') if race_code is not None else 'Unknown'
            race_count[race] += 1

            # Extract ethnicity
            ethnicity_code = tree.find('.//hl7:ethnicGroupCode', namespaces=ns)
            ethnicity = ethnicity_code.get('displayName') if ethnicity_code is not None else 'Unknown'
            ethnicity_count[ethnicity] += 1

        except Exception as e:
            print(f"Failed to parse {filename}: {e}")

In [None]:
# Print the summary counts
print("Gender Counts:")
for gender, count in gender_count.items():
    print(f"{gender}: {count}")

print("\nRace Counts:")
for race, count in race_count.items():
    print(f"{race}: {count}")

print("\nEthnicity Counts:")
for ethnicity, count in ethnicity_count.items():
    print(f"{ethnicity}: {count}")

In [None]:
import os
import pandas as pd
from lxml import etree

# Directory containing the XML files
directory_path = './resources/'

# Define namespaces to use when searching
ns = {'hl7': 'urn:hl7-org:v3'}

# Initialize a list to store records for DataFrame
data_records = []

# Iterate over all XML files in the directory
for filename in os.listdir(directory_path):
    if filename.endswith('.xml'):
        file_path = os.path.join(directory_path, filename)

        # Load and parse the XML file
        try:
            with open(file_path, 'rb') as file:
                tree = etree.parse(file)

            # Extract administrative gender
            gender_code = tree.find('.//hl7:administrativeGenderCode', namespaces=ns)
            gender = gender_code.get('displayName') if gender_code is not None else 'Unknown'

            # Extract race
            race_code = tree.find('.//hl7:raceCode', namespaces=ns)
            race = race_code.get('displayName') if race_code is not None else 'Unknown'

            # Extract ethnicity
            ethnicity_code = tree.find('.//hl7:ethnicGroupCode', namespaces=ns)
            ethnicity = ethnicity_code.get('displayName') if ethnicity_code is not None else 'Unknown'

            # Append data to the records list
            data_records.append({
                'Filename': filename,
                'Gender': gender,
                'Race': race,
                'Ethnicity': ethnicity
            })

        except Exception as e:
            print(f"Failed to parse {filename}: {e}")

# Create DataFrame from the records
df = pd.DataFrame(data_records)

# Print the DataFrame
print(df)

# Optionally, save the DataFrame to a CSV file
df.to_csv('output_summary_pivoted.csv', index=False)
