In [42]:
import os.path
import scipy.io
import numpy as np

### Folder Structure

All analyses are done in Jupyter Notebooks which can be found in the folder `analysis`

This folder should be in the `UW` folder, and have the sibling directory `100_original_anonymized_images_ExportedMatlab`. The following three (3) files should also be found in at this level:
* `patientID.mat`
* `posID.mat`
* `!ReadMe.txt`

In [143]:
def getPatientData():
    """
    Read the data from the stray .mat files and store it in a
    list of dictionaries, with each list element containing a patient
    e.g.
        patients = 
    
        1x104 list with each entry being a dict with fields:
        
            adc       | (144x144x26) ndarray uint16
            cdi       | (144x144x26) ndarray float64
            pMask     | (144x144x26) ndarray bool
            numTumor  | int
            patientID | str
            tumors    | dict
            
        patients[2]['tumors'] = 
        
        a dict with fields:
        
            gleasonScore | list of int
            lesion       | list of (144x144x26) ndarray bool
    
    """
    patientID = scipy.io.loadmat('../patientID.mat')['patientID']
    numPatients = len(patientID)

    patientIDWithTumor = scipy.io.loadmat('../posID.mat')['caseID']
    numPatientsWithTumor = len(patientIDWithTumor)
    
    patients = [dict() for x in range(numPatients)]
    
    for i in range(numPatients):
        path = "../100_original_anonymized_images_ExportedMatlab/" + patientID[i] + '/'
        patients[i] = {
            "adc": scipy.io.loadmat(path + 'ADC_' + patientID[i] + '.mat')['ADC'],
            "cdi": scipy.io.loadmat(path + 'CDI_matlab_' + patientID[i] + '.mat')['CDI_matlab'],
            "pMask": np.array(scipy.io.loadmat(path + 'PMask0_' + patientID[i] + '.mat')['Mask0'],dtype=bool),
            "numTumor": 0,
            "patientID": patientID[i]
        }
        lesionFilePath = path + 'Lesion_' + patientID[i] + '.mat'
        if os.path.exists(lesionFilePath):
            lesionData = scipy.io.loadmat(lesionFilePath)['Lesion']
            lesionInfo = scipy.io.loadmat(path + 'Lesion_info_' + patientID[i] + '.mat')['Lesion_info']
            patients[i]["numTumor"] = len(lesionInfo[0])
            patients[i]["tumors"] = {
                "gleasonScore": [lesionInfo[0][j] for j in range(len(lesionInfo[0]))],
                "lesion": [np.array(lesionData[0][j],dtype=bool) for j in range(len(lesionInfo[0]))]
            }
    return [patients, numPatients, numPatientsWithTumor]

In [126]:
[patients, numPatients, numPatientsWithTumor] = getPatientData()

In [144]:
help(getPatientData)

Help on function getPatientData in module __main__:

getPatientData()
    Read the data from the stray .mat files and store it in a
    list of dictionaries, with each list element containing a patient
    e.g.
        patients = 
    
        1x104 list with each entry being a dict with fields:
        
            adc       | (144x144x26) uint16
            cdi       | (144x144x26) float64
            pMask     | (144x144x26) bool
            numTumor  | int
            patientID | str
            tumors    | dict
            
        patients[2]['tumors'] = 
        
        a dict with fields:
        
            gleasonScore | list of int
            lesion       | list of (144x144x26) bool

