In [1]:
import os
import pydicom

## Load the dicom file

In [2]:
data = pydicom.dcmread("slice0.dcm")

## Define function to print all DataElements (tags) of the dataset (including the one nested in sequences)

In [3]:
def print_callback(dataset: "Dataset", elem: pydicom.DataElement) -> None:
    """Method to use as callback to walk() method."""
    print(dataset[elem.tag])
    if elem.tag.is_private:
        print('Is private')

def print_tags(dataset) -> None:
    """Print all elements from the Dataset."""
    dataset.walk(print_callback)

## Print the initial tags of the dicom file

In [4]:
print_tags(data)

(0008, 0008) Image Type                          CS: ['ORIGINAL', 'PRIMARY', 'AXIAL']
(0008, 0016) SOP Class UID                       UI: MR Image Storage
(0008, 0018) SOP Instance UID                    UI: 1.2.826.0.1.3680043.8.498.11647531947692001889139142101174006376
(0008, 0020) Study Date                          DA: '20231016'
(0008, 0060) Modality                            CS: 'MR'
(0008, 103e) Series Description                  LO: 'pacsman_testing_dicom'
(0010, 0010) Patient's Name                      PN: 'PACSMAN'
(0010, 0020) Patient ID                          LO: 'PACSMAN1'
(0010, 0026) Source Patient Group Identification SQ: <Sequence, length 1>
(0010, 0020) Patient ID                          LO: 'PACSMAN1'
(0020, 000d) Study Instance UID                  UI: 1.2.826.0.1.3680043.8.498.53705520178064824913661855181940860223
(0020, 000e) Series Instance UID                 UI: 1.2.826.0.1.3680043.8.498.51477978481347690572891822947821352950
(0020, 0013) Instance Numb

## Define functions to recurse throught through all tags and replace a string if necessary (anonymization)

In [5]:
def replace_str_in_number(elem_value, initial_str, new_str):
    """Function to replace a string in a number.

    Args:
        elem_value : Data element value in which to replace the string
        initial_str : Initial string to be replaced
        new_str : New string to replace the initial string

    Returns:
        number: Number with the replaced string
    """
    # Save the type of the element value
    elem_value_type = type(elem_value)
    # Convert the element value to a string
    elem_value_str = str(elem_value)
    # Replace the initial string with the new string and convert back
    # to the original type
    return elem_value_type(elem_value_str.replace(initial_str, new_str))


def anonymize_tag_recurse(ds: pydicom.Dataset, initial_str, new_str):
    """Function to anonymize / replace first level and nested tags in a pydicom Dataset recursively.

    It handles the cases where the value of the data element is one of the following:
    a single string value, a number, a list or tuple with all strings or all numbers,
    or a multi-value string with backslash separator.

    Args:
        ds : pydicom Dataset to anonymize
        initial_str : Initial string to be replaced
        new_str : New string to replace the initial string

    Returns:
        ds : Pydicom Dataset with the replaced tag values
    """
    for elem in ds:
        if elem.VR == "SQ":
            [anonymize_tag_recurse(item, initial_str, new_str) for item in elem.value]
        else:
            if isinstance(elem.value, str):
                if "\\" in elem.value:
                    elem.value = "\\".join(
                        [
                            value.replace(initial_str, new_str)
                            for value in elem.value.split("\\")
                        ]
                    )
                else:
                    elem.value = elem.value.replace(initial_str, new_str)
            elif isinstance(elem.value, int) or isinstance(elem.value, float):
                if initial_str.isnumeric():
                    elem.value = replace_str_in_number(elem.value, initial_str, new_str)
            elif isinstance(elem.value, list) or isinstance(elem.value, tuple):
                for i, value in enumerate(elem.value):
                    if isinstance(value, str):
                        elem.value[i] = value.replace(initial_str, new_str)
                    elif isinstance(elem.value, int) or isinstance(elem.value, float):
                        if initial_str.isnumeric():
                            elem.value = replace_str_in_number(
                                elem.value, initial_str, new_str
                            )
    return ds

## Apply the function and print the new tags of the modified dicom dataset

In [6]:
data_anom = anonymize_tag_recurse(data, 'PACSMAN1', 'PACSMAN2')
print_tags(data_anom)

(0008, 0008) Image Type                          CS: ['ORIGINAL', 'PRIMARY', 'AXIAL']
(0008, 0016) SOP Class UID                       UI: MR Image Storage
(0008, 0018) SOP Instance UID                    UI: 1.2.826.0.1.3680043.8.498.11647531947692001889139142101174006376
(0008, 0020) Study Date                          DA: '20231016'
(0008, 0060) Modality                            CS: 'MR'
(0008, 103e) Series Description                  LO: 'pacsman_testing_dicom'
(0010, 0010) Patient's Name                      PN: 'PACSMAN'
(0010, 0020) Patient ID                          LO: 'PACSMAN2'
(0010, 0026) Source Patient Group Identification SQ: <Sequence, length 1>
(0010, 0020) Patient ID                          LO: 'PACSMAN2'
(0020, 000d) Study Instance UID                  UI: 1.2.826.0.1.3680043.8.498.53705520178064824913661855181940860223
(0020, 000e) Series Instance UID                 UI: 1.2.826.0.1.3680043.8.498.51477978481347690572891822947821352950
(0020, 0013) Instance Numb