In [None]:
import pydicom as dcm

import numpy as np

import glob

import os

import re

from shutil import copyfile

import pickle

In [None]:
def get_patient_ID(DICOMDIR):
    
    registry = dcm.read_file(DICOMDIR)
    
    patientID = registry.DirectoryRecordSequence[0].PatientID
    
    return patientID

def get_matching_files(DICOMDIR,SeriesDescription):
    
    '''takes a path to a DICOMDIR registry file, a series description and return file paths for all DICOMs from the corresponding series'''

    registry = dcm.read_file(DICOMDIR)
    #the directory containing the referenced file - which will be the top of a file tree containing all files referenced by the registry
    containerDirectory = os.path.split(DICOMDIR)[0]
    
    
    startIndex = None
    endIndex = None
    alldescs = []
    
    
    for index,item in enumerate(registry.DirectoryRecordSequence):
        
        if item.DirectoryRecordType == 'SERIES':
            alldescs.append(item.SeriesDescription)
            if re.match(SeriesDescription,item.SeriesDescription) and startIndex is None:
                startIndex = index+1
            elif startIndex is not None and endIndex is None:
                endIndex = index
    
    if startIndex is not None:
        matchingFiles = []
        for item in registry.DirectoryRecordSequence[startIndex:endIndex]:
            if item.DirectoryRecordType == 'IMAGE':
                filename = os.path.join(containerDirectory,*item.ReferencedFileID)
                matchingFiles.append(filename)

        return matchingFiles

def first_image_in_series(listOfDicomFiles):
    
    #first sort the list, as usually the one with the lowest trigger time is also the first one after sorting
    if listOfDicomFiles is not None:
        sortedList = sorted(listOfDicomFiles)

        for dicom in sortedList:
            triggerTime = dcm.read_file(dicom).TriggerTime
            if triggerTime == 0.0:
                return dicom

In [None]:
DICOMDIRS = glob.glob(os.path.join('data','DICOMS','*','DICOMDIR'))
subjects = glob.glob(os.path.join('data','DICOMS','*'))

patientIDs = [get_patient_ID(d) for d in DICOMDIRS]

In [None]:
#regex-able patterns for the different sequences present
BTFE = 'B-TFE_4CH'
alternateBTFE = '4ch_cine'
CaSc = '.*CaSc'

dicomBTFEs = []

for d in DICOMDIRS:
    
    dBTFE = first_image_in_series(get_matching_files(d,BTFE))
    if not BTFE: #try the alternate string
        dBTFE = first_image_in_series(get_matching_files(d,alternateBTFE))
    dicomBTFEs.append(dBTFE)


In [None]:
dicomCTs = [get_matching_files(d,CaSc) for d in DICOMDIRS]

In [None]:
#THIS JUST USED FOR COPYING CT FILES SO QFAT IS EASIER TO USE
CTFOLDER = os.path.join('data','CT')

for s,ctList in zip(subjects,dicomCTs):
    subjectFolder = os.path.join(CTFOLDER,os.path.split(s)[1])
    if not os.path.isdir(subjectFolder):
        os.makedirs(subjectFolder)
    [copyfile(d,os.path.join(subjectFolder,os.path.split(d)[1])) for d in ctList]
    

Now, use QFAT to analyse all of those folders (cannot currently be done programmatically). Put the results file in 'data'

Now, load the details for image preprocessing, and create the anonymized files:


In [None]:
PADSIZE = pickle.load(open(os.path.join('data','PADSIZE.pickle'),'rb'))
PXSPACING = pickle.load(open(os.path.join('data','PXSPACING.pickle'),'rb'))
PXAREA = np.product(PXSPACING)

In [None]:
def splitall(path):
    allparts = []
    while 1:
        parts = os.path.split(path)
        if parts[0] == path:  # sentinel for absolute paths
            allparts.insert(0, parts[0])
            break
        elif parts[1] == path: # sentinel for relative paths
            allparts.insert(0, parts[1])
            break
        else:
            path = parts[0]
            allparts.insert(0, parts[1])
    return allparts

BTFEs = [d for d in dicomBTFEs if d is not None]
subjects = [int(splitall(d)[2]) for d in BTFEs]


In [None]:
anondir = os.path.join('data','anon')

if not os.path.isdir(anondir):
    os.makedirs(anondir)
    
anonFiles = []

for patientNumber,imagePath in zip(subjects,BTFEs):
    outFile = os.path.join(anondir,str(patientNumber).zfill(3) + '.pickle')
    try:
        pixelArray,pxSpacing = load_image(imagePath,desiredPxSpacing=PXSPACING, padSize=PADSIZE)
        result = {'pxSpacing':pxSpacing,'pxArray':pixelArray}
        with open(outFile,'wb') as f:
            pickle.dump(result,f)
    anonFiles.append(outFile)
    except:
        print(outFile)