In [8]:
import os, subprocess
import pandas as pd
from glob import glob
from datetime import datetime

# Finn's Laptop
dicombasefolder = '/mnt/d/Finn/KPUM_NODDI/DICOM_KPUM_NODDI'
# KPUM Workstation
#dicombasefolder = '/mnt/e/Finn/KPUM_NODDI/DICOM_KPUM_NODDI'

# sourcedata base folder to go within dicom base folder
sourcedatabasefolder = 'sourcedata'

# tsv-files to keep track of conversion, located in dicombasefolder/sourcedatabasefolder
subjecttrackerpath = os.path.join(dicombasefolder, sourcedatabasefolder)
subjecttrackerfile = 'Subject_Tracker_for_dicomdir2sourcedata.tsv'



##########################################################################
# Define local functions
def reorganize_dicom(inputdicomdir, outputdicomdir):
    # Re-organize using dcm2niix
    import subprocess
    from datetime import datetime
    
    print("Start re-organising DCMs")    
    starttime=datetime.now()
    filebase = os.path.join("s%2s_%p","%p_%5r.dcm")
    p=subprocess.Popen(f"dcm2niix -d 8 -b o -r y -w 1 -o {outputdicomdir} -f {filebase} {inputdicomdir}", stdout=subprocess.PIPE, shell=True)
    (output, err) = p.communicate()  
    #This makes the wait possible
    p_status = p.wait()
    #This will give you the output of the command being executed
    print(f"Command output: {output}")
    endtime=datetime.now()
    print("Done re-organising DCMs. Elapsed time:", endtime-starttime)

def anonymize_dicom(infile, outfile):
    # Function that anonymizes one DCM-file (infile) according to data/time_elements in list
    # and save overwrites with anonymized version (outfile)
    import pydicom as pm

    dicom_file = pm.dcmread(infile)

    # DCM tags to anonymize
    data_elements = ['PatientName',
                     'PatientID']
    for de in data_elements:
        if de in dicom_file:
            dicom_file.data_element(de).value = 'Anonymous'
    time_elements = ['PatientBirthDate']
    for de in time_elements:
        if de in dicom_file:
            dicom_file.data_element(de).value = '19010101'
    # save file
    dicom_file.save_as(outfile)


##########################################################################
# START MAIN

# Read the updated Subject_Tracker_dicomdir2sourcedata.tsv
subjecttrackertsv = os.path.join(subjecttrackerpath, subjecttrackerfile)
df = pd.read_csv(subjecttrackertsv, sep="\t")
df.fillna('', inplace=True)

# Loop over entries in Subject_Tracker_dicomdir2sourcedata.tsv which has not yet been converted
for index, row in df.loc[df['converted'] != 'Done'].iterrows() :

    dicomdatafolder = row['input']
    print(f'Processing {dicomdatafolder}')

    # Parse didcomdatafolder to get variables
    subject, session, rest  = dicomdatafolder.split("_", 2)

    # Define path and create output outputdicomdir if not present
    inputdicomdir = os.path.join(dicombasefolder, dicomdatafolder)
    outputdicomdir = os.path.join(dicombasefolder, sourcedatabasefolder, f'sub-{subject}', f'ses-{session}')
    if not os.path.exists(outputdicomdir): # then make this directory
        os.makedirs(outputdicomdir)

    # Check if not converted
    if  [ df.loc[df['input'] == dicomdatafolder, ['converted']] != 'Done' ] :
        
        startprocess=datetime.now()
        
        # Re-organize DCMs using local function 
        reorganize_dicom(inputdicomdir, outputdicomdir)

        # Anonymize DCMs using local function  
        starttime=datetime.now()
        print("Start anonymizing DCMs.")
        in_folder=os.listdir(outputdicomdir)
        for folder in in_folder:
            in_slices = glob(os.path.join(outputdicomdir, folder, '*.dcm'))
            #print(in_slices)
            for in_slice_ in in_slices:
                out_slice_ = in_slice_ # write to same file = overwrite with anonymized version
                anonymize_dicom(in_slice_, out_slice_) # write to file defined by out_slice 
        endtime=datetime.now()
        print("Done anonymizing DCMs. Elapsed time:", endtime-starttime)

        # Update Subject_Tracker_dicomdir2sourcedata.tsv
        df.loc[df['input'] == dicomdatafolder, ['converted']] = 'Done'
        df.loc[df['input'] == dicomdatafolder, ['output']] = os.path.join(sourcedatabasefolder, f'sub-{subject}', f'ses-{session}')
        df.to_csv(os.path.join(dicombasefolder, subjecttrackertsv), sep="\t", index=False)
        
        # Ready with everything
        endprocess=datetime.now()
        print("Done re-organising and anonymizing DCMs")
        print("Total Elapsed time:", endprocess-startprocess)
        print()
    else :
        print("Already converted")


Processing 010_MR1_8253422_20211011/DICOM_fromPACS
Start re-organising and anonymizing DCMs
Command output:  b"Chris Rorden's dcm2niiX version v1.0.20220720  GCC10.4.0 x86-64 (64-bit Linux)\nIllegal/Obsolete DICOM (/mnt/d/Finn/KPUM_NODDI/DICOM_KPUM_NODDI/010_MR1_8253422_20211011/DICOM_fromPACS/DICOM/0000E004/AA6DB18C/AAC3763E/00001589/EE01D45F): Overlay Bits Allocated must be 1, not 16\nIllegal/Obsolete DICOM (/mnt/d/Finn/KPUM_NODDI/DICOM_KPUM_NODDI/010_MR1_8253422_20211011/DICOM_fromPACS/DICOM/0000E004/AA6DB18C/AAC3763E/00001589/EE01D45F): Overlay Bit Position shall be 0, not 12\nIllegal/Obsolete DICOM (/mnt/d/Finn/KPUM_NODDI/DICOM_KPUM_NODDI/010_MR1_8253422_20211011/DICOM_fromPACS/DICOM/0000E004/AA6DB18C/AAC3763E/00001589/EE086643): Overlay Bits Allocated must be 1, not 16\nIllegal/Obsolete DICOM (/mnt/d/Finn/KPUM_NODDI/DICOM_KPUM_NODDI/010_MR1_8253422_20211011/DICOM_fromPACS/DICOM/0000E004/AA6DB18C/AAC3763E/00001589/EE086643): Overlay Bit Position shall be 0, not 12\nIllegal/Obsolet