In [3]:
import os

###################################################################################
# Check that these are the correct paths running (change if necessary)

# Finn's Laptop
dicombasefolder = '/mnt/d/Finn/KPUM_NODDI/DICOM_KPUM_NODDI'

# KPUM Workstation
#dicombasefolder = '/mnt/e/Finn/KPUM_NODDI/DICOM_KPUM_NODDI'

# sourcedata base folder to go within dicombasefolder
sourcedatabasefolder = 'sourcedata'

# tsv-files to keep track of in dicombasefolder/sourcedatabasefolder
subjecttrackerpath = os.path.join(dicombasefolder, sourcedatabasefolder)
subjecttrackerfile = 'Subject_Tracker_for_dicomdir2sourcedata.tsv'

###################################################################################
# User input

# Participant details
subject = '035'     # Subject's study ID
session = 'MR1'     # Current scan's session ID 

# The DICOM folder/s to be processed
#in_dicomdatafolder = ['019_MR1_8279558_20220208/DIOCM_fromPACS',
#                      '019_MR1_8279558_20220208/DICOM_NODDI']

# if only one folder then use one entry like in example below
in_dicomdatafolder = ['035_8453727_20230329/DICOM']


###################################################################################

In [4]:
##########################################################################
# Define local functions
def reorganize_dicom(inputdicomdir, outputdicomdir):
    # Re-organize using dcm2niix
    import subprocess
    from datetime import datetime
    
    print("Start re-organising DCMs")    
    starttime=datetime.now()
    filebase = os.path.join("s%2s_%p","%p_%5r.dcm")
    p=subprocess.Popen(f"dcm2niix -d 8 -b o -r y -w 1 -o {outputdicomdir} -f {filebase} {inputdicomdir}", stdout=subprocess.PIPE, shell=True)
    (output, err) = p.communicate()  
    #This makes the wait possible
    p_status = p.wait()
    #This will give you the output of the command being executed
    #print(f"Command output: {output}")
    endtime=datetime.now()
    print("Done re-organising DCMs. Elapsed time:", endtime-starttime)

def anonymize_dicom(infile, outfile):
    # Function that anonymizes one DCM-file (infile) according to data/time_elements in list
    # and save overwrites with anonymized version (outfile)
    import pydicom as pm

    dicom_file = pm.dcmread(infile)

    # DCM tags to anonymize
    data_elements = ['PatientName',
                     'PatientID']
    for de in data_elements:
        if de in dicom_file:
            dicom_file.data_element(de).value = 'Anonymous'
    time_elements = ['PatientBirthDate']
    for de in time_elements:
        if de in dicom_file:
            dicom_file.data_element(de).value = '19010101'
    # save file
    dicom_file.save_as(outfile)


import os, subprocess
import pandas as pd
from datetime import datetime
from glob import glob


# Define path and create output dicomdir if not present
outputdicomdir = os.path.join(dicombasefolder, sourcedatabasefolder, f'sub-{subject}', f'ses-{session}')
if not os.path.exists(outputdicomdir): # then make this directory
    os.makedirs(outputdicomdir)
subjecttrackertsv = os.path.join(subjecttrackerpath, subjecttrackerfile)

# Read the Subject_Tracker_dicomdir2sourcedata.tsv
df = pd.read_csv(subjecttrackertsv, sep="\t")
df.fillna('', inplace=True)

# Loop over entries in in_dicomdatafolder
for dicomdatafolder in in_dicomdatafolder:

    # Check if dicomdatafolder has an entry in df, and if not add it to the file
    if not df['input'].isin([dicomdatafolder]).any():
        # We should add as new entry in the bottom.
        new_row = {'input': dicomdatafolder, 
                   'output': os.path.join(sourcedatabasefolder, f'sub-{subject}', f'ses-{session}'), 
                   'converted': 'Pending',                    
                   'comments': ''} 
        df.loc[len(df)] = new_row
        print(f"{dicomdatafolder} is not in {subjecttrackerfile}. Adding a it as new entry")
        # First, sort df according to 'input' column 
        df = df.sort_values(by = 'input')
        # and then write to a new file
        df.to_csv(os.path.join(dicombasefolder, subjecttrackertsv), sep="\t", index=False)
    else:
        print(f"{dicomdatafolder} is already in {subjecttrackerfile} ")
        
    # Read the Subject_Tracker_dicomdir2sourcedata.tsv again (can have been updated)
    df = pd.read_csv(subjecttrackertsv, sep="\t")
    df.fillna('', inplace=True)
    
    # Now do the conversion and anonymization
    if  [ df.loc[df['input'] == dicomdatafolder, ['converted']] != 'Done' ] and [ df.loc[df['input'] == dicomdatafolder, ['converted']] != 'Pending' ] :

        startprocess=datetime.now()

        # Write that we are taking care of this right
        df.loc[df['input'] == dicomdatafolder, ['converted']] = 'Pending'
        df.loc[df['input'] == dicomdatafolder, ['output']] = os.path.join(sourcedatabasefolder, f'sub-{subject}', f'ses-{session}')
        df.to_csv(os.path.join(dicombasefolder, subjecttrackertsv), sep="\t", index=False)
        # and read again
        df = pd.read_csv(subjecttrackertsv, sep="\t")
        df.fillna('', inplace=True)

        print(f'Processing {dicomdatafolder}')
        # Re-organize DCMs using local function 
        inputdicomdir = os.path.join(dicombasefolder, dicomdatafolder)
        reorganize_dicom(inputdicomdir, outputdicomdir)


        # Anonymize DCMs using local function  
        starttime=datetime.now()
        print("Start anonymizing DCMs.")
        in_folder=os.listdir(outputdicomdir)
        for folder in in_folder:
            in_slices = glob(os.path.join(outputdicomdir, folder, '*.dcm'))
            #print(in_slices)
            for in_slice_ in in_slices:
                out_slice_ = in_slice_ # write to same file = overwrite with anonymized version
                anonymize_dicom(in_slice_, out_slice_) # write to file defined by out_slice 
        endtime=datetime.now()
        print("Done anonymizing DCMs. Elapsed time:", endtime-starttime)

        # Update Subject_Tracker_dicomdir2sourcedata.tsv
        df.loc[df['input'] == dicomdatafolder, ['converted']] = 'Done'
        df.loc[df['input'] == dicomdatafolder, ['output']] = os.path.join(sourcedatabasefolder, f'sub-{subject}', f'ses-{session}')
        df.to_csv(os.path.join(dicombasefolder, subjecttrackertsv), sep="\t", index=False)
        
        # Ready with everything
        endprocess=datetime.now()
        print("Done re-organising and anonymizing DCMs")
        print("Total Elapsed time:", endprocess-startprocess)
        print()
    else :
        print("Already converted")

035_8453727_20230329/DICOM is not in Subject_Tracker_for_dicomdir2sourcedata.tsv. Adding a it as new entry
Processing 035_8453727_20230329/DICOM
Start re-organising DCMs
Done re-organising DCMs. Elapsed time: 0:22:19.798811
Start anonymizing DCMs.
Done anonymizing DCMs. Elapsed time: 0:06:18.755066
Done re-organising and anonymizing DCMs
Total Elapsed time: 0:28:38.657827

