**Experiment with DICOMs preprocessing**

In [None]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import os
import dicom
import scipy.ndimage as spi
import matplotlib.pyplot as plt


SAMPLE_IMGS = '../input/sample_images'

def load_scans(patient):
    patient_dir = os.path.join(SAMPLE_IMGS, patient)
    slices = [dicom.read_file(os.path.join(patient_dir, scan)) for scan in os.listdir(patient_dir)]

    # ImagePositionPatient[2] equals the slice location == Z coordinate of the scan
    slices.sort(key = lambda x: float(x.ImagePositionPatient[2]))
    try:
        slice_thickness = np.abs(slices[0].ImagePositionPatient[2] - slices[1].ImagePositionPatient[2])
    except:
        slice_thickness = np.abs(slices[0].SliceLocation - slices[1].SliceLocation)
        
    for s in slices:
        s.SliceThickness = slice_thickness
        
    return slices

def get_pixels_hu(slices):
    image = np.stack([s.pixel_array for s in slices])
    image = image.astype(np.int16)
    image[image == -2000] = 0
    
    # Convert to Hounsfield units (HU)
    for slice_number in range(len(slices)):
        
        intercept = slices[slice_number].RescaleIntercept
        slope = slices[slice_number].RescaleSlope
        
        if slope != 1:
            image[slice_number] = slope * image[slice_number].astype(np.float64)
            image[slice_number] = image[slice_number].astype(np.int16)
            
        image[slice_number] += np.int16(intercept)
   
    return np.array(image, dtype=np.int16)

cancer_id = '0acbebb8d463b4b9ca88cf38431aac69'
scans = load_scans(cancer_id)
image_hu = get_pixels_hu(scans)
                     
plt.imshow(image_hu[70], cmap='gray')
plt.show()

In [None]:
from skimage.morphology import ball, disk, binary_erosion, binary_closing
from skimage.measure import label, regionprops
from skimage import measure
from skimage.filters import roberts
from skimage.segmentation import clear_border
from scipy import ndimage as ndi


threshold = -420


def get_segmented_lungs(image, plot=False):
    '''
    Step 1: Convert into a binary image. 
    '''
    binary = image < threshold

    '''
    Step 2: Remove the blobs connected to the border of the image.
    '''
    cleared = clear_border(binary)

    '''
    Step 3: Closure operation with a disk of radius 2
    '''
    selem = disk(2)
    binary = binary_closing(cleared, selem)
    if plot == True:
        plt.imshow(binary, cmap='gray')
        plt.show()

    '''    
    Step 4: Label the image.
    '''
    label_image = label(binary)
   
    '''
    Step 5: Keep the labels with 2 largest areas.
    '''
    areas = [r.area for r in regionprops(label_image)]

    areas.sort()
    if len(areas) > 2:
        for region in regionprops(label_image):
            if region.area < areas[-2]:
                for coordinates in region.coords:                
                       label_image[coordinates[0], coordinates[1]] = 0
    binary = label_image > 0
    if plot == True:
        plt.imshow(binary, cmap=plt.cm.bone)
        plt.show() 

    '''
    Step 6: Closure operation with a disk of radius 12.
    '''
    selem = disk(12)
    binary = binary_closing(binary, selem)
    if plot == True:
        plt.imshow(binary, cmap='gray')
        plt.show()

    '''
    Step 7: Fill in the small holes inside the binary mask of lungs.
    '''
    edges = roberts(binary)
    binary = ndi.binary_fill_holes(edges)
    if plot == True:
        plt.imshow(binary, cmap=plt.cm.bone)
        plt.show()

    '''
    Step 8: Erosion operation with a disk of radius 2.
    '''
    selem = disk(2)
    binary = binary_erosion(binary, selem)
    if plot == True:
        plt.imshow(binary, cmap='gray')
        plt.show() 
   
    '''
    Step 9: Superimpose the binary mask on the input image.
    '''
    get_high_vals = binary == 0
    image[get_high_vals] = 0
    if plot == True:
        plt.imshow(image, cmap='gray')
        plt.show() 
        
    return image

get_segmented_lungs(image_hu[70], True)

In [None]:
def apply_threshold(threshold, scan):
    scan[scan < threshold] = 0
    return scan

def get_lung_nodules(patient_imgs):
    nodules = [apply_threshold(threshold, scan) for scan in patient_imgs]
    return np.stack([nodule for nodule in nodules if nodule.any()])

segmented = [get_segmented_lungs(image) for image in image_hu]

In [None]:
from mpl_toolkits.mplot3d.art3d import Poly3DCollection


def plot_3d(image, threshold=-300):
    # Position the scan upright, 
    # so the head of the patient would be at the top facing the camera
    p = image.transpose(2,1,0)
    
    verts, faces = measure.marching_cubes(p, threshold)

    fig = plt.figure(figsize=(10, 10))
    ax = fig.add_subplot(111, projection='3d')

    mesh = Poly3DCollection(verts[faces], alpha=0.70)
    face_color = [0.45, 0.45, 0.75]
    mesh.set_facecolor(face_color)
    ax.add_collection3d(mesh)

    ax.set_xlim(0, p.shape[0])
    ax.set_ylim(0, p.shape[1])
    ax.set_zlim(0, p.shape[2])

    plt.show()
    

plot_3d(get_lung_nodules(segmented), -400)