In [1]:
import os.path
import scipy.io
import scipy.stats
import numpy as np
from sklearn.metrics import confusion_matrix

## getPatientData()

In [2]:
def getPatientData():
    import scipy.io
    """
    Read the data from the stray .mat files and store it in a
    list of dictionaries, with each list element containing a patient
    e.g.
        patients = 
    
        1x104 list with each entry being a dict with fields:
        
            adc       | (144x144x26) ndarray uint16
            cdi       | (144x144x26) ndarray float64
            pMask     | (144x144x26) ndarray bool
            numTumor  | int
            patientID | str
            tumors    | dict
            
        patients[2]['tumors'] = 
        
        a dict with fields:
        
            gleasonScore | list of int
            lesion       | list of (144x144x26) ndarray bool
    
    """
    patientID = scipy.io.loadmat('../patientID.mat')['patientID']
    numPatients = len(patientID)

    patientIDWithTumor = scipy.io.loadmat('../posID.mat')['caseID']
    numPatientsWithTumor = len(patientIDWithTumor)
    
    patients = [dict() for x in range(numPatients)]
    
    for i in range(numPatients):
        path = "../100_original_anonymized_images_ExportedMatlab/" + patientID[i] + '/'
        patients[i] = {
            "adc": scipy.io.loadmat(path + 'ADC_' + patientID[i] + '.mat')['ADC'],
            "cdi": scipy.io.loadmat(path + 'CDI_matlab_' + patientID[i] + '.mat')['CDI_matlab'],
            "pMask": np.array(scipy.io.loadmat(path + 'PMask0_' + patientID[i] + '.mat')['Mask0'],dtype=bool),
            "numTumor": 0,
            "patientID": patientID[i]
        }
        lesionFilePath = path + 'Lesion_' + patientID[i] + '.mat'
        if os.path.exists(lesionFilePath):
            lesionData = scipy.io.loadmat(lesionFilePath)['Lesion']
            lesionInfo = scipy.io.loadmat(path + 'Lesion_info_' + patientID[i] + '.mat')['Lesion_info']
            patients[i]["numTumor"] = len(lesionInfo[0])
            patients[i]["tumors"] = {
                "gleasonScore": [lesionInfo[0][j] for j in range(len(lesionInfo[0]))],
                "lesion": [np.array(lesionData[0][j],dtype=bool) for j in range(len(lesionInfo[0]))]
            }
    return [patients, numPatients, numPatientsWithTumor]

## getCancerPixels()

In [3]:
def getCancerPixels(patient,modality,cancer):
    """
    Takes a patient dictionary as input and return an array that contains 
    all the pixels identified as either cancerous or non-cancerous
    @input: patient: a dictionary containing all relevant info about a patient
    @input modality: a string indicating adc or cdi
    @input: cancer: a bool indicating whether the pixels should be cancerous or not
    @output: an array containing all the identified pixels of the given modality
    """
    cancerMask = getCombinedCancerMask(patient)
    try:
        return patient[modality][np.logical_and(patient["pMask"]==1,cancerMask==int(cancer))]
    except:
        print("Unrecognized modality.")
        return 0
    

## getCombinedCancerMask()

In [4]:
def getCombinedCancerMask(patient):
    if patient['numTumor'] == 0:
        return np.zeros(np.shape(patient['pMask']))
    else:
        cancerMask = np.zeros(np.shape(patient['pMask']), dtype=bool)
        for i in range(patient["numTumor"]):
            if patient["tumors"]["gleasonScore"][i] < 7:
                continue
            cancerMask[np.logical_or(cancerMask == 1,patient["tumors"]["lesion"][i] == 1)] = 1
    return cancerMask
    

## getDistWBoundary()

In [5]:
def getDistsWBoundary(cancerPixels,nonCancerPixels):

    dist = scipy.stats.norm

    mu_cancer,     std_cancer     = dist.fit(cancerPixels)
    mu_non_cancer, std_non_cancer = dist.fit(nonCancerPixels)

    x_cancer     = [mu_cancer-4    *std_cancer,    mu_cancer    +4*std_cancer]
    x_non_cancer = [mu_non_cancer-4*std_non_cancer,mu_non_cancer+4*std_non_cancer]

    x_lb = x_cancer[0] if x_cancer[0] < x_non_cancer[0] else x_non_cancer[0]
    x_ub = x_cancer[1] if x_cancer[1] > x_non_cancer[1] else x_non_cancer[1]

    x = np.linspace(x_lb,x_ub,200)

    y_cancer     = dist.pdf(x, mu_cancer,    std_cancer)
    y_non_cancer = dist.pdf(x, mu_non_cancer,std_non_cancer)

    boundary = x[np.argmin(np.abs(y_cancer / y_non_cancer -1))]
    
    return [x,y_cancer,y_non_cancer,boundary]

## splitModalityPixel()

In [6]:
def splitModalityPixel(training_set=None):
    assert type(training_set == "numpy.ndarray"), "The input parameter must be a numpy array"
    assert training_set.all() != None, "empty training set"
    assert  len(training_set) != 0, "empty training set"
    
    adcCancerPixels,    cdiCancerPixels    = np.array([]), np.array([])
    adcNonCancerPixels, cdiNonCancerPixels = np.array([]), np.array([])

    for patient in training_set:
        if patient["numTumor"] != 0:
            cancerPixelsTmp = getCancerPixels(patient,"adc",True)
            adcCancerPixels = np.append(adcCancerPixels, cancerPixelsTmp)
            cancerPixelsTmp = getCancerPixels(patient,"cdi",True)
            cdiCancerPixels = np.append(cdiCancerPixels, cancerPixelsTmp)
        nonCancerPixelsTmp = getCancerPixels(patient,"adc",False)
        adcNonCancerPixels = np.append(adcNonCancerPixels, nonCancerPixelsTmp)
        nonCancerPixelsTmp = getCancerPixels(patient,"cdi",False)
        cdiNonCancerPixels = np.append(cdiNonCancerPixels, nonCancerPixelsTmp)
    
    return adcCancerPixels, adcNonCancerPixels, cdiCancerPixels, cdiNonCancerPixels

## evaluate()

In [7]:
def evaluate(test_set, adc_threshold, cdi_threshold):
    
    cdi_cm = np.zeros(dtype=int,shape=(2,2))
    adc_cm = np.zeros(dtype=int,shape=(2,2))
    
    for patient in test_set:
        cdi_prediction = np.zeros(shape=patient["pMask"].shape, dtype=bool)
        cdi_prediction[np.logical_and(patient["pMask"] == True, patient["cdi"] > cdi_threshold)] = True
        cdi_cm += confusion_matrix(getCombinedCancerMask(patient)[patient["pMask"] == True].flatten(),
                                   cdi_prediction[patient["pMask"] == True].flatten())
        
        adc_prediction = np.zeros(shape=patient["pMask"].shape, dtype=bool)
        adc_prediction[np.logical_and(patient["pMask"] == True, patient["adc"] < adc_threshold)] = True
        adc_cm += confusion_matrix(getCombinedCancerMask(patient)[patient["pMask"] == True].flatten(),
                                   adc_prediction[patient["pMask"] == True].flatten())

    return adc_cm, cdi_cm