In [26]:
import pydicom
import pandas as pd
import hashlib
import uuid

In [27]:
def find_element_by_tag(dicom_file, desired_tag): 

    # Access the data element
    data_element = dicom_file.get(desired_tag)

    if data_element:  # If the data element is found
        tag = data_element.tag
        description = data_element.name
        value = data_element.value
        
        # Output the tag, description, and value
        print(f"Tag: {tag}")
        print(f"Description: {description}")
        print(f"Value: {value}")

        return True

    else:
        print(f"Tag {desired_tag} not found in the DICOM file.")

        return False

In [28]:
def hash(original_value):
    
    hasher = hashlib.sha256()  # Use SHA-256 (or another SHA algorithm as per requirement)
    hasher.update(original_value.encode('utf-8'))
    return hasher.hexdigest()

In [29]:
def hash_element_by_tag(dicom_file, desired_tag):
    
    if desired_tag in dicom_file: 
        original_value = dicom_file[desired_tag].value
        hashed_value = hash(original_value)
        dicom_file[desired_tag].value = hashed_value[:16]  # prevent exceeding from maximum length of 16 allowed for VR SH.

In [30]:
def delete_element_by_tag(dicom_file, desired_tag): 
    
    if desired_tag in dicom_file: 
        del dicom_file[desired_tag]

In [31]:
def autofill_element_by_tag(dicom_file, desired_tag): 
    
    if desired_tag in dicom_file: 
        dicom_file[desired_tag].value = "Anonymized"

In [32]:
def sha512_hash_uid(original_uid, num_digits=39): 
    
    # Create a SHA-512 hash of the input value
    hasher = hashlib.sha512()
    hasher.update(original_uid.encode('utf-8'))
    hash_hex = hasher.hexdigest()
    
    # Take the first `num_digits` characters of the hash and convert to a decimal number
    # This will be a portion of the hash, not the entire hash.
    hash_numeric = int(hash_hex[:num_digits], 16)
    
    # Convert the number to a string, if it needs to be a string
    hash_str = str(hash_numeric)
    
    # Truncate or pad the string to ensure it is exactly `num_digits` long
    hash_str = (hash_str[:num_digits]).zfill(num_digits)
    
    return hash_str

In [33]:
def transformUID_by_tag(dicom_file, desired_tag, chorus_id, institution_id, dataset_id, tag_id): 

    if desired_tag in dicom_file: 
        original_uid = dicom_file[desired_tag].value
        new_uid = sha512_hash_uid(original_uid)

        custom_uid = f"{chorus_id}.{institution_id}.{dataset_id}.{tag_id}.{new_uid}"

        # Ensure the length of the UID does not exceed 64 characters
        if len(custom_uid) > 64:
            raise ValueError("The generated UID exceeds the maximum allowed length of 64 characters.")
    
        dicom_file[desired_tag].value = custom_uid

In [34]:
def text_tag_2_tuple(text_tag): 

    split_text_tag = text_tag.split(',')

    decimal_group = split_text_tag[0]
    decimal_element = split_text_tag[1]
    
    hexadecimal_group = '0x' + decimal_group
    hexadecimal_element = '0x' + decimal_element
    
    return (int(hexadecimal_group, 16), int(hexadecimal_element, 16))

In [35]:
dicom_file = pydicom.dcmread('sample_data/0_ORIGINAL.dcm')

select_attr_path = 'MIDAS_checklist/MIDAS DICOM Tag De-identification Checklist_EA_Selected(1).xlsx'
select_attr_df = pd.read_excel(select_attr_path, dtype=str)  # Openpyxl library == v3.0.10
# print(select_attr_df.loc[:, "Attribute Name": "Operation"])

chorus_id = "1.2.840.113554"
institution_id = "6"
dataset_id = "1"
tag_id = "104"

for _, row in select_attr_df.iterrows(): 
    if pd.notna(row['Tag']) \
        and ',' in row['Tag'] \
        and len(row['Tag'].split(',')) == 2 \
        and len(row['Tag'].split(',')[0]) == len(row['Tag'].split(',')[1]) == 4:

        try: 
            desired_tag = text_tag_2_tuple(row['Tag'])
        except ValueError:
            print('invalid literal for int() with base 16: ' + row['Tag'])
        else: 
            if find_element_by_tag(dicom_file, desired_tag): 

                if row['Operation'] == 'hash': 
                    hash_element_by_tag(dicom_file, desired_tag)
                
                elif row['Operation'] == 'delete': 
                    delete_element_by_tag(dicom_file, desired_tag)

                elif row['Operation'] == 'autofill': 
                    autofill_element_by_tag(dicom_file, desired_tag)
                
                elif row['Operation'] == 'TransformUID': 
                    transformUID_by_tag(dicom_file, desired_tag, chorus_id, institution_id, dataset_id, tag_id)
                
                else: continue

Tag (8, 42) not found in the DICOM file.
Tag (12298, 195) not found in the DICOM file.
Tag (12298, 221) not found in the DICOM file.
Tag (22, 75) not found in the DICOM file.
Tag (64, 41362) not found in the DICOM file.
Tag (64, 41986) not found in the DICOM file.
Tag (64, 41363) not found in the DICOM file.
Tag (64, 41329) not found in the DICOM file.
Tag: (0020, 000e)
Description: Series Instance UID
Value: 2.25.22373700284337223907674770690654453238
Tag (12298, 434) not found in the DICOM file.
Tag (12298, 422) not found in the DICOM file.
Tag (64, 1786) not found in the DICOM file.
Tag (12298, 534) not found in the DICOM file.
Tag (64, 1290) not found in the DICOM file.
Tag: (0020, 000d)
Description: Study Instance UID
Value: 2.25.106461954783291641048254423668956446198


In [36]:
dicom_file.save_as('output_dicom/output.dcm')

In [37]:
# dicom_file = pydicom.dcmread('sample_data/0_ORIGINAL.dcm') # sample_data/1.2.840.113619.2.80.2342848408.76634.1509736050.26.dcm

# desired_tag = (0x0020,0x000D)
# # desired_tag = (0x0008,0x0050)

# print('Before: ')
# find_element_by_tag(dicom_file, desired_tag)
# print('')

# print('After: ')
# chorus_id = "1.2.840.113554"
# institution_id = "6"
# dataset_id = "1"
# tag_id = "104"
# transformUID_by_tag(dicom_file, desired_tag, chorus_id, institution_id, dataset_id, tag_id)
# find_element_by_tag(dicom_file, desired_tag)

# print('After: ')
# hash_element_by_tag(dicom_file, desired_tag)
# find_element_by_tag(dicom_file, desired_tag)

# print('After: ')
# autofill_element_by_tag(dicom_file, desired_tag)
# find_element_by_tag(dicom_file, desired_tag)

# print('After: ')
# delete_element_by_tag(dicom_file, desired_tag)
# find_element_by_tag(dicom_file, desired_tag)
