In [1]:
import os
import glob
import shutil
import pydicom

In [2]:
HOMEPATH = os.getenv('HOME')
DATAPATH = os.path.join(HOMEPATH, 'Data', 'fMRIBreastData')

In [3]:
SRCFLDR = os.path.join(DATAPATH, 'rawS3')
DSTFLDR = os.path.join(DATAPATH, 'StudyData')
# if DSTFLDR, creates one:
os.makedirs(DSTFLDR, exist_ok=True)
FEXT = 'dcm'

print(f'Working directory is {SRCFLDR}')
print(f'Saving directory is {DSTFLDR}')

Working directory is /Users/joseulloa/Data/fMRIBreastData/rawS3
Saving directory is /Users/joseulloa/Data/fMRIBreastData/StudyData


In [4]:
dcmlist = glob.glob(os.path.join(SRCFLDR,f'*.{FEXT}'))
print(f'There are {len(dcmlist)} files in {SRCFLDR}')

There are 12600 files in /Users/joseulloa/Data/fMRIBreastData/rawS3


In [6]:
# Sort out dicom files following this order:
# PatientID () --> Not used
# PatientName () --> Use this as it is also used by Nasib, so to keep consistency
# StudyID () --> Not needed
# StudyDate () --> This defines the pre- and post-treatment (just the earliest is pre- and the latest is post-)
# SeriesNro () --> This is relevant to later eliminate any possible in-scanner post-processed data
# SeriesDescription ()
os.makedirs(DSTFLDR, exist_ok=True)
for dcm in dcmlist:
    ds = pydicom.dcmread(dcm,stop_before_pixels=True)    
    [PatientID, 
     PatientName, 
     StudyDate, 
     SeriesNro, 
     TempPos] = [ds.PatientID, str(ds.PatientName), ds.StudyDate, 
                 str(ds.SeriesNumber), str(ds.TemporalPositionIdentifier)]
    
    name_as_list = PatientName.split(' ')
    # remove multiple spaces:
    name_no_space = [i for i in name_as_list if i != '']
    # From the second element onward, use camel-case:
    name_camel_case = [i.title().replace('Treatmensst','Treatment') if idx>0 else i for idx, i in enumerate(name_no_space)]
    # Re-Join the name with a dash instead of (multiple) spaces:
    PatientName = '-'.join(name_camel_case)
    folderStruct = os.path.join(DSTFLDR,
                                '-'.join([PatientName[:PatientName.find('-')], 
                                          PatientID]),
                                '-'.join([PatientName.replace(' ','_'),
                                          StudyDate.replace(' ','_')]), 
                                SeriesNro.replace(' ','_'), 
                                TempPos.replace(' ','_'))
    os.makedirs(folderStruct, exist_ok=True)
    dstFile = os.path.join(folderStruct, os.path.split(dcm)[-1])
    if not os.path.isfile(dstFile):
        shutil.copy2(dcm, dstFile)
    else:
        print(f'File {dstFile}, already exists. Nothing done')
        
print(f'All done. Check the folders to see the results. Bye!')

File /Users/joseulloa/Data/fMRIBreastData/StudyData/CR-ANON68760/CR-Post-Treatment-20230120/301/3/bf498590-aac6-46fc-ad18-0de871ca1925.dcm, already exists. Nothing done
File /Users/joseulloa/Data/fMRIBreastData/StudyData/CR-ANON68760/CR-Post-Treatment-20230120/301/1/a35dd1a1-1c31-4a50-a961-907745c72232.dcm, already exists. Nothing done
File /Users/joseulloa/Data/fMRIBreastData/StudyData/NE-ANON89073/NE-Post-Treatment-Motion-Corrected-20221215/67799/3/ee3bbc5a-74a5-4e14-ba11-e4e511e25a0a.dcm, already exists. Nothing done
File /Users/joseulloa/Data/fMRIBreastData/StudyData/DC-ANON97378/DC-Post-Treatment-20230726/301/1/a05d357a-9682-40b2-a736-e7a39395a3c9.dcm, already exists. Nothing done
File /Users/joseulloa/Data/fMRIBreastData/StudyData/JB-ANON18218/JB-Pre-Treatment-20230405/301/5/c4ec9c39-c92c-479d-9f77-eb9d3006c370.dcm, already exists. Nothing done
File /Users/joseulloa/Data/fMRIBreastData/StudyData/JB-ANON18218/JB-Post-Treatment-20230511/301/6/5c426468-1f07-4127-b2d7-3ddbab7dc518.dc

In [68]:
a='a-b'

In [72]:
a[:a.find('c')]

'a-'