In [2]:
import os
import pandas

config = {
    'metadatapath' : '/Users/john/Projects/SOTAEvaluationNoduleDetection/output/metadata',
    'datapath':'/Users/john/Projects/SOTAEvaluationNoduleDetection/scans/lung50',
    'preprocess_result_path':'./prep_result/',
    'outputfile':'prediction.csv',
    'detector_model':'net_detector',
    'detector_param':'./model/detector.ckpt',
    'classifier_model':'net_classifier',
    'classifier_param':'./model/classifier.ckpt',
    'n_gpu':1,
    'n_worker_preprocessing':None,
    'use_exsiting_preprocessing':False,
    'skip_preprocessing':False,
    'skip_detect':False
}
metadatapath = config['metadatapath']
datapath = config['datapath']
prep_result_path = config['preprocess_result_path']
skip_prep = config['skip_preprocessing']
skip_detect = config['skip_detect']

[('/Users/john/Projects/SOTAEvaluationNoduleDetection/scans/lung50/summit-9769-yta',
  [],
  ['summit-9769-yta_Y0_BASELINE_A.zraw', 'summit-9769-yta_Y0_BASELINE_A.mhd'])]

In [27]:
import numpy as np
import os
import pydicom
import SimpleITK as sitk

def load_scan(path):
    slices = [pydicom.read_file(path + '/' + s) for s in os.listdir(path)]
    slices.sort(key = lambda x: float(x.ImagePositionPatient[2]))
    if slices[0].ImagePositionPatient[2] == slices[1].ImagePositionPatient[2]:
        sec_num = 2;
        while slices[0].ImagePositionPatient[2] == slices[sec_num].ImagePositionPatient[2]:
            sec_num = sec_num+1;
        slice_num = int(len(slices) / sec_num)
        slices.sort(key = lambda x:float(x.InstanceNumber))
        slices = slices[0:slice_num]
        slices.sort(key = lambda x:float(x.ImagePositionPatient[2]))
    try:
        slice_thickness = np.abs(slices[0].ImagePositionPatient[2] - slices[1].ImagePositionPatient[2])
    except:
        slice_thickness = np.abs(slices[0].SliceLocation - slices[1].SliceLocation)
        
    for s in slices:
        s.SliceThickness = slice_thickness
        
    return slices

def get_pixels_hu(slices):
    image = np.stack([s.pixel_array for s in slices])
    # Convert to int16 (from sometimes int16), 
    # should be possible as values should always be low enough (<32k)
    image = image.astype(np.int16)
    
    # Convert to Hounsfield units (HU)
    for slice_number in range(len(slices)):        
        intercept = slices[slice_number].RescaleIntercept
        slope = slices[slice_number].RescaleSlope
        
        if slope != 1:
            image[slice_number] = slope * image[slice_number].astype(np.float64)
            image[slice_number] = image[slice_number].astype(np.int16)
            
        image[slice_number] += np.int16(intercept)
    
    return np.array(image, dtype=np.int16), np.array([slices[0].SliceThickness] + list(slices[0].PixelSpacing), dtype=np.float32)


def load_metaio_scan(path):
    """
    Loads the scan from raw. Keeps all properties as part of the slices. 
    """

    # unique identifier can be found from file name
    scan_uid = os.path.basename(path).split('.')[0]

    # read in the scan
    metadata = sitk.ReadImage(path+'.mhd')
    image = np.array(sitk.GetArrayFromImage(metadata), dtype=np.int16)
    # Pull out the salient bits of info needed
    origin = np.array(metadata.GetOrigin(), dtype=np.float32)[::-1]
    voxel_size = np.array(metadata.GetSpacing(),dtype=np.float32)[::-1]
    orientation = np.array(metadata.GetDirection(),dtype=np.float32)

    return image, voxel_size, origin, orientation


In [30]:
"""
experiment to validate that loading a metaio scan results in the 
same output as the existing code for reading a dicom file
"""
case_path = '/Users/john/Projects/SOTAEvaluationNoduleDetection/scans/lung50/summit-2264-sze/summit-2264-sze_Y0_BASELINE_A/scans/3-SUMMIT_Lung_ASiR_V_50_/resources/DICOM/files'
case = load_scan(case_path)
case_pixels_1, spacing_1 = get_pixels_hu(case)

case_path_2 = '/Users/john/Projects/SOTAEvaluationNoduleDetection/scans/lung50/summit-2264-sze/summit-2264-sze_Y0_BASELINE_A'
case_pixels_2, spacing_2, origin_2, orientation_2 = load_metaio_scan(case_path_2)

np.array_equal(case_pixels_1, case_pixels_2), np.array_equal(spacing_1, spacing_2)


numpy.ndarray

(475, 512, 512)

dtype('int16')

numpy.ndarray

array([0.625   , 0.748047, 0.748047], dtype=float32)

dtype('float32')

numpy.ndarray

(475, 512, 512)

dtype('int16')

numpy.ndarray

array([0.625   , 0.748047, 0.748047], dtype=float32)

dtype('float32')

(True, True)

In [6]:
"""

review the nodule prediction files

"""
import numpy as np

lbb = np.load('/Users/john/Projects/SOTAEvaluationNoduleDetection/models/grt123/bbox_result/summit-2264-sze_Y0_BASELINE_A_lbb.npy')
pbb = np.load('/Users/john/Projects/SOTAEvaluationNoduleDetection/models/grt123/bbox_result/summit-2264-sze_Y0_BASELINE_A_pbb.npy')

pbb


array([[ -1.0058006 ,   9.34499563,  61.63537601,  25.57677225,
          9.47987399],
       [ -1.60796082,   9.50708356,  65.0438125 ,  25.25533536,
          9.49339826],
       [ -2.14207435,   9.29750488,  93.51342486, 297.32727012,
          8.85282339],
       ...,
       [ -1.76541555, 161.40352019, 121.06568811, 214.43193464,
          8.25465564],
       [  2.08258772, 161.6329742 , 121.0517908 , 217.18792478,
          8.36213585],
       [ -1.09013808, 165.1447904 , 121.34341612, 217.28486838,
          8.5587367 ]])

In [6]:
i = 0
input_size = [10, 10, 10]
stride = 4
filename = 'harry'

assert input_size[i] % stride == 0, f'Error: index: {i}, input_size: {input_size[i]} is not divisiable by stride:{stride}, file:{filename}'


AssertionError: Error: index: 0, input_size: 10 is not divisiable by stride:4, file:harry