This notebook will only run if data contains paired data downloaded from the Barts PACS - patients which were enrolled on the local EVINCI substudy (ask Francesca Pugliese about this)

In [None]:
import pydicom as dcm

import numpy as np

import glob

import os

import re

from shutil import copyfile

import matplotlib.pyplot as plt

from network_utils import predict_stochastic
from MultiResUNet.MultiResUNet import MultiResUnet

from mask_utils import load_image,show_image_with_masks

from tensorflow.keras.models import model_from_json

import pickle

import pandas as pd

from scipy.stats import pearsonr

In [None]:
def get_patient_ID(DICOMDIR):
    
    registry = dcm.read_file(DICOMDIR)
    
    patientID = registry.DirectoryRecordSequence[0].PatientID
    
    return patientID

def get_matching_files(DICOMDIR,SeriesDescription):
    
    '''takes a path to a DICOMDIR registry file, a series description and return file paths for all DICOMs from the corresponding series'''

    registry = dcm.read_file(DICOMDIR)
    #the directory containing the referenced file - which will be the top of a file tree containing all files referenced by the registry
    containerDirectory = os.path.split(DICOMDIR)[0]
    
    
    startIndex = None
    endIndex = None
    alldescs = []
    
    
    for index,item in enumerate(registry.DirectoryRecordSequence):
        
        if item.DirectoryRecordType == 'SERIES':
            alldescs.append(item.SeriesDescription)
            if re.match(SeriesDescription,item.SeriesDescription) and startIndex is None:
                startIndex = index+1
            elif startIndex is not None and endIndex is None:
                endIndex = index
    
    if startIndex is not None:
        matchingFiles = []
        for item in registry.DirectoryRecordSequence[startIndex:endIndex]:
            if item.DirectoryRecordType == 'IMAGE':
                filename = os.path.join(containerDirectory,*item.ReferencedFileID)
                matchingFiles.append(filename)

        return matchingFiles

def first_image_in_series(listOfDicomFiles):
    
    #first sort the list, as usually the one with the lowest trigger time is also the first one after sorting
    if listOfDicomFiles is not None:
        sortedList = sorted(listOfDicomFiles)

        for dicom in sortedList:
            triggerTime = dcm.read_file(dicom).TriggerTime
            if triggerTime == 0.0:
                return dicom

In [None]:
DICOMDIRS = glob.glob(os.path.join('data','DICOMS','*','DICOMDIR'))
subjects = glob.glob(os.path.join('data','DICOMS','*'))

patientIDs = [get_patient_ID(d) for d in DICOMDIRS]

In [None]:
#regex-able patterns for the different sequences present
BTFE = 'B-TFE_4CH'
alternateBTFE = '4ch_cine'
CaSc = '.*CaSc'

dicomBTFEs = []

for d in DICOMDIRS:
    
    dBTFE = first_image_in_series(get_matching_files(d,BTFE))
    if not BTFE: #try the alternate string
        dBTFE = first_image_in_series(get_matching_files(d,alternateBTFE))
    dicomBTFEs.append(dBTFE)


In [None]:
dicomCTs = [get_matching_files(d,CaSc) for d in DICOMDIRS]

In [None]:
#THIS JUST USED FOR COPYING CT FILES SO QFAT IS EASIER TO USE
CTFOLDER = os.path.join('data','CT')

for s,ctList in zip(subjects,dicomCTs):
    subjectFolder = os.path.join(CTFOLDER,os.path.split(s)[1])
    if not os.path.isdir(subjectFolder):
        os.makedirs(subjectFolder)
    [copyfile(d,os.path.join(subjectFolder,os.path.split(d)[1])) for d in ctList]
    

Now, use QFAT to analyse all of those folders (cannot currently be done programmatically). Put the results file in 'data'

Now, load the model which will be used to make predictions...

In [None]:
#load the model
modelBaseName = 'mrunet_bayesian_2020-07-13_13-40' 

#location of the actual saved model
modelBaseName = os.path.join('data','models',modelBaseName)

modelParamFile = modelBaseName + '.h5'
modelArchitecture = modelBaseName + '.json'

with open( modelArchitecture , 'r') as json_file:
    MODEL = model_from_json( json_file.read() )

MODEL.load_weights(modelParamFile)

#hyperparameter N, defined according to quantify_model_performance.ipynb
N = 15

accuracyModelPath = modelBaseName + '_prediction_conversion.pickle'
ACCURACYMODEL = pickle.load(open(accuracyModelPath,'rb'))


Now, load the details for image preprocessing:


In [None]:
PADSIZE = pickle.load(open(os.path.join('data','PADSIZE.pickle'),'rb'))
PXSPACING = pickle.load(open(os.path.join('data','PXSPACING.pickle'),'rb'))
PXAREA = np.product(PXSPACING)

In [None]:
RESNAMES = ['meanArea (cm2)','stdArea (cm2)','predicted DSC']

def get_image(imagePath):

    im = load_image(imagePath,desiredPxSpacing=PXSPACING, padSize=PADSIZE)[0]
    
    return im

def quantify_fat(im):
    
    im = im.reshape((1,*im.shape,1))
    res = predict_stochastic(MODEL,N,ACCURACYMODEL,im) 
    
    prediction = res[0]

    #wrap up into a dict for easy DataFram-ing. The first 2 outputs are the actual segmentation and the uncertainty map so not needed in a results table
    resultDict = dict(zip(RESNAMES,res[2:]))

    #ensure that units of area are correct...
    resultDict['meanArea (cm2)'] *= (PXAREA/100)
    resultDict['stdArea (cm2)'] *= (PXAREA/100)
    return resultDict,prediction

In [None]:
np.argwhere(bad)

In [None]:
badQC.index[badQC]

In [None]:
[i for i,n in enumerate(patientIDs) if n in badQC.index[badQC]]

In [None]:
QFAT.iloc[34]['BTFE predicted DSC']

In [None]:
eg = 107

plt.figure(figsize = (5,5))

im = get_image(dicomBTFEs[eg])
result,mask = quantify_fat(im)
show_image_with_masks(im,mask.squeeze(),{'c':'r','linewidth':0.5})
plt.title('predicted DSC = %.02f'  %result['predicted DSC'])

plt.savefig(os.path.join('graphs','CTComparison','example' + str(eg) + '.png'))
plt.savefig(os.path.join('graphs','CTComparison','example' + str(eg) + '.svg'))

Now, we read the results table from QFAT so we can match up 

In [None]:
QFATresultsFile = os.path.join('data','QFAT_Results.txt')

QFAT = pd.read_csv(QFATresultsFile,index_col = 'Patient_ID')

In [None]:
for patient,BTFE in zip(patientIDs,dicomBTFEs):
    
    if patient in QFAT.index and BTFE is not None:
        im = get_image(BTFE)
        result,mask = quantify_fat(im)
        QFAT.loc[patient, 'BTFE fat area'] = result['meanArea (cm2)']
        QFAT.loc[patient, 'BTFE predicted DSC'] = result['predicted DSC']

In [None]:
name,ext = os.path.splitext(QFATresultsFile)
name += '_with_CMR'
outputFile = name+ext
QFAT.to_csv(outputFile)

In [None]:
plt.figure(figsize = (10,5))

x,y = QFAT[' Thoracic_Fat_volume'],QFAT['BTFE fat area']
notna = ~np.logical_or(np.isnan(x),np.isnan(y))
goodQC = QFAT['BTFE predicted DSC'] > 0.6
badQC = QFAT['BTFE predicted DSC'] < 0.6
good = np.logical_and(goodQC,notna) 
bad = np.logical_and(badQC,notna)
r,p = pearsonr(x[use],y[use])
plt.scatter(x[good],y[good],label = 'predicted DSC > 0.6, n = '+ str(use.sum()))
plt.scatter(x[bad],y[bad],label = 'predicted DSC < 0.6, n = '+ str(badQC.sum()),c='k',alpha = 0.5)
plt.title(' '.join(('pearson r =','%.02f' %r,', p = %.02g' %p) ))
plt.xlabel('Pericardial fat volume from CT/QFAT (cm$^{3}$)')
plt.ylabel('Pericardial fat area from CMR (cm$^{2}$)')
plt.legend()


plt.savefig(os.path.join('graphs','CTComparison','ThoracicFatVolume_BTFE_correlation.png'))
plt.savefig(os.path.join('graphs','CTComparison','ThoracicFatVolume_BTFE_correlation.svg'))

In [None]:

plt.figure(figsize = (10,5))

x,y = QFAT[' Epicardial_Fat_volume'],QFAT['BTFE fat area']
notna = ~np.logical_or(np.isnan(x),np.isnan(y))
goodQC = QFAT['BTFE predicted DSC'] > 0.6
badQC = QFAT['BTFE predicted DSC'] < 0.6
good = np.logical_and(goodQC,notna) 
bad = np.logical_and(badQC,notna)
r,p = pearsonr(x[use],y[use])
plt.scatter(x[good],y[good],label = 'predicted DSC > 0.6, n = '+ str(use.sum()))
plt.scatter(x[bad],y[bad],label = 'predicted DSC < 0.6, n = '+ str(badQC.sum()),c='k',alpha = 0.5)
plt.title(' '.join(('pearson r =','%.02f' %r,', p = %.02g' %p) ))
plt.xlabel('Pericardial fat volume from CT/QFAT (cm$^{3}$)')
plt.ylabel('Pericardial fat area from CMR (cm$^{2}$)')
plt.legend()

plt.savefig(os.path.join('graphs','CTComparison','EpicardialFatVolume_BTFE_correlation.png'))
plt.savefig(os.path.join('graphs','CTComparison','EpicardialFatVolume_BTFE_correlation.svg'))