In [28]:
import os
import sys
import math
import glob
from time import perf_counter
import shutil
import pydicom

In [19]:
def getenv():
    """
    Requires sys and os modules:
    import sys
    import os
    """
    if sys.platform == 'win32':
        env_home = 'HOMEPATH'
    elif (sys.platform == 'darwin') | (sys.platform == 'linux'):
        env_home = 'HOME'
    HOMEPATH = os.getenv(env_home)
    
    return HOMEPATH

def check_path_exist(path, file=False):
    """
    Flag FILE indicates the path contains a file name (FLAG=TRUE) or the path only points to a folder (FLAG=FALSE (Default))
    """
    if file:
        is_path = os.path.isfile(path)
    else:
        is_path = os.path.isdir(path)

    print(f'{"OK:" if is_path else "ERROR:"} Path to {"file" if file else "folder"} {path} does{"" if is_path else " NOT"} exist')

    return is_path

In [20]:
HOMEPATH = getenv()
DATAPATH = os.path.join(HOMEPATH, 'Data', 'fMRIBreastData')
# Check whether the DATAPATH route exists or not:
_ = check_path_exist(DATAPATH)

OK: Path to folder \Users\joseu\Data\fMRIBreastData does exist


In [21]:
SRCFLDR = os.path.join(DATAPATH, 'rawS3')
# Check path exists or not:
check_path_exist(SRCFLDR)

DSTFLDR = os.path.join(DATAPATH, 'StudyData')
# if DSTFLDR does not exist, creates one (otherwise, it doesn't do anything):
os.makedirs(DSTFLDR, exist_ok=True)
FEXT = 'dcm'

print(f'Working directory is {SRCFLDR}')
print(f'Saving directory is {DSTFLDR}')

OK: Path to folder \Users\joseu\Data\fMRIBreastData\rawS3 does exist
Working directory is \Users\joseu\Data\fMRIBreastData\rawS3
Saving directory is \Users\joseu\Data\fMRIBreastData\StudyData


In [22]:
dcmlist = glob.glob(os.path.join(SRCFLDR,f'*.{FEXT}'))
print(f'There are {len(dcmlist)} files in {SRCFLDR}')

There are 12600 files in \Users\joseu\Data\fMRIBreastData\rawS3


In [38]:
# Sort out dicom files following this order:
# PatientID () --> Not used
# PatientName () --> Use this as it is also used by Nasib, so to keep consistency
# StudyID () --> Not needed
# StudyDate () --> This defines the pre- and post-treatment (just the earliest is pre- and the latest is post-)
# SeriesNro () --> This is relevant to later eliminate any possible in-scanner post-processed data
# SeriesDescription ()
print(f'Processing DICOM list at {SRCFLDR}. Please wait...')
nels = len(dcmlist)
nsteps = 10**math.floor(math.log10(0.01*nels)+1)
start_time = perf_counter()
for nimg, dcm in enumerate(dcmlist):
    if (nimg % nsteps)==0:
        print(f'\t{nels-nimg} files to process...')

    ds = pydicom.dcmread(dcm,stop_before_pixels=True)    
    [PatientID, 
     PatientName, 
     StudyDate, 
     SeriesNro, 
     TempPos] = [ds.PatientID, str(ds.PatientName), ds.StudyDate, 
                 str(ds.SeriesNumber), str(ds.TemporalPositionIdentifier)]
    
    name_as_list = PatientName.split(' ')
    # remove multiple spaces:
    name_no_space = [i for i in name_as_list if i != '']
    # From the second element onward, use camel-case:
    name_camel_case = [i.title().replace('Treatmensst','Treatment') if idx>0 else i for idx, i in enumerate(name_no_space)]
    # Re-Join the name with a dash instead of (multiple) spaces:
    PatientName = '-'.join(name_camel_case)
    folderStruct = os.path.join(DSTFLDR,
                                '-'.join([PatientName[:PatientName.find('-')], 
                                          PatientID]),
                                '-'.join([PatientName.replace(' ','_'),
                                          StudyDate.replace(' ','_')]), 
                                SeriesNro.replace(' ','_'), 
                                TempPos.replace(' ','_'))
    os.makedirs(folderStruct, exist_ok=True)
    dstFile = os.path.join(folderStruct, os.path.split(dcm)[-1])
    if not os.path.isfile(dstFile):
        shutil.copy2(dcm, dstFile)
    else:
        print(f'File {dstFile}, already exists. Nothing done')
end_time = perf_counter()
elp_time = end_time - start_time        
print(f'All done (Elapsed time was {elp_time:.1f}[s]). Check the folders to see the results. Bye!')

Processing DICOM list at \Users\joseu\Data\fMRIBreastData\rawS3. Please wait...
	12600 files to process...
	11600 files to process...
	10600 files to process...
	9600 files to process...
	8600 files to process...
	7600 files to process...
	6600 files to process...
	5600 files to process...
	4600 files to process...
	3600 files to process...
	2600 files to process...
	1600 files to process...
	600 files to process...
All done (Elapsed time was 279.9[s]). Check the folders to see the results. Bye!
