In [1]:
import pydicom # dicom handler package
import cv2 
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
import numpy as np
import scipy.ndimage
import os
from mpl_toolkits.mplot3d.art3d import Poly3DCollection
import pickle

# Functions for image manipulation

In [16]:
def load_scan(path):
    file_list = []
    for dirName, subdirList, fileList in os.walk(path):
        for filename in fileList:
            if ".dcm" in filename.lower():
                file_list.append(os.path.join(dirName,filename))
    return file_list

def extract_pixel_spacing(files, outputFilename = 'PixelSpacing_ForTestData.csv'):
    size_data = {'x':[],'y':[]}
    for filenameDCM in files:
        # read the file
        ds = pydicom.read_file(filenameDCM, stop_before_pixels=True)
        try:
            size_data['x'].append(ds.ImagerPixelSpacing[0])
            size_data['y'].append(ds.ImagerPixelSpacing[1])
        except Exception as e:
            size_data['x'].append(ds.PixelSpacing[0])
            size_data['y'].append(ds.PixelSpacing[1])
    pd.DataFrame(size_data).to_csv(outputFilename)

def resize_dicom_image(image, size_x = 6000, size_y=6000):
    result =cv2.resize(image, dsize=(size_x, size_y), interpolation= cv2.INTER_CUBIC)
    return result


def create_unified_sized_image(dicom_file):
    ConstPixelDims = (int(dicom_file.Rows), int(dicom_file.Columns))
    ConstPixelSpacing = (float(RefDs.ImagerPixelSpacing[0]),float(RefDs.ImagerPixelSpacing[1]))
    x = np.arange(0.0, (ConstPixelDims[0]+1)*ConstPixelSpacing[0], ConstPixelSpacing[0])
    y = np.arange(0.0, (ConstPixelDims[1]+1)*ConstPixelSpacing[1], ConstPixelSpacing[1])

    
def create_teaching_array(files, size = 576):
    #Taking the first image as a reference base
    reference = pydicom.read_file(files[0])
    ReferenceConstPixelDims = (int(reference.Rows), int(reference.Columns), len(files))
    ReferenceConstPixelDims = (size, size, len(files))
    ArrayImg = np.zeros(ReferenceConstPixelDims, dtype = reference.pixel_array.dtype)
    
    for index,fileName in enumerate(files):
        if index / 100 == 0:
            print('100 parsed!')
        ds = pydicom.read_file(fileName)
        #unified_image = create_unified_sized_image(ds)
        resized = resize_dicom_image(ds.pixel_array, size_x = size, size_y = size)
        ArrayImg[:,:,index] = resized
    return ArrayImg

def preproceess_images(filePath, max_num):
    files = load_scan(filePath)
    files = files[0:max_num]
    teach_array = create_teaching_array(files, size = 1024)
    print("Teaching array is ready!\nImage size: {0}x{0}, with {1} images".format(1024, max_num))
    #pickle.dump(teach_array, open("teach_array.p", "wb"))
    return teach_array

In [17]:
%%time
file = load_scan(input_root)
print(len(file))

4682
Wall time: 40 ms


In [48]:
%%time
# Populating file list data
input_root = 'D:\\Lung\\'
teach_array = preproceess_images(input_root,250)

100 parsed!
Teaching array is ready!
Image size: 1024x1024, with 250 images
Wall time: 3.89 s


In [68]:
import os
def to_jpeg(filename, folder):
    dicom_image = pydicom.read_file(filename)
    cv2.imwrite(folder + os.sep + parse_path_to_filename(filename) ,dicom_image.pixel_array)
    
def parse_path_to_filename(path):
    new_value = path.replace(os.sep,"_")
    new_value = new_value.replace("/",'_')
    new_value = new_value.replace(":","")
    new_value = new_value.replace(" ","")
    new_value = new_value.split(".")[-2]
    new_value += '.jpg'
    return new_value

In [76]:
%%time
print("Starting the conversion of :" , len(file))
for index, element in enumerate(file):
    to_jpeg(element, folder ='converted')

Starting the conversion of : 4682
Wall time: 2min 4s


In [30]:
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator

In [32]:
train_datagen = ImageDataGenerator(
    rescale = 1./255,
    shear_range = 0.2,
    zoom_range = 0.2,
    rotation_range = 20,
    width_shift_range = 0.2,
    height_shift_range = 0.2,
    horizontal_flip = True)

val_datagen = ImageDataGenerator(
    rescale = 1./255)

In [None]:
train_generator = train_datagen.flow_from_directory()