# Body part classification

This notebook classifies the x-ray images into different body parts.

First I import all the dependencies

In [1]:
# import tf
import tensorflow as tf

# import numpy for arrays
import numpy as np
# import pandas for csv import
import pandas as pd
# import matplotlib
import matplotlib.pyplot as plt
# os to handle pathnames
import os.path
# PIL to open & manipulate images
from PIL import Image
from PIL import ImageChops
from PIL import ImageOps
from PIL import ImageFile
# for messages in loops
from IPython.display import clear_output

# import keras modules from tensorflow.contrib for the CNN
from tensorflow.contrib.keras.python.keras.models import Sequential
from tensorflow.contrib.keras.python.keras.layers import Dense, Dropout, \
    Flatten
from tensorflow.contrib.keras.python.keras.layers.convolutional import \
    Conv2D, MaxPooling2D
from tensorflow.contrib.keras.python.keras.utils import to_categorical
from tensorflow.contrib.keras.python.keras import backend as K
from tensorflow.contrib.keras.python.keras import callbacks
from tensorflow.contrib.keras.python.keras.preprocessing.image import \
    ImageDataGenerator, array_to_img, img_to_array, load_img

Next I define the image format and fix the random seed

In [2]:
# image format -> (rows, cols, channels)
K.set_image_data_format("channels_last")
# fix random seed for reproducibility
seed = 1
np.random.seed(seed)
tf.set_random_seed(seed)

Create an array with all image names and an array with their labels

In [3]:
df = pd.read_csv(filepath_or_buffer="/data/deepxray/data/rau_data/merged.csv")
df.head()
names = df["sop_iuid"].as_matrix()
labels = df["body_part"].as_matrix()
n_img = names.size

I then define a function to import and transform the images

In [5]:
# desired image size
size = (256, 256)
# specify filepath of images
root = "/data/deepxray"
dirname = "images/jpgs"

def preprocess_img(file_name):
    # at least one file is damaged, this setting allows
    # to import it anyways
    ImageFile.LOAD_TRUNCATED_IMAGES = True
    
    # filename
    file = file_name + ".jpg"

    # read image
    image = Image.open(fp=os.path.join(root, dirname, file))
    # some images are read as rgb -> convert to grayscale
    if (image.mode != 0):
        image = image.convert(mode='L')
    # scaling image to desired resultion while keeping aspect ratio
    image.thumbnail(size=size, resample=Image.ANTIALIAS)
    image_size = image.size
    # histogram normalization (remaps the image -> lightest pixel = 255, darkest pixel = 0)
    image = ImageOps.autocontrast(image=image)
    # add a black bar to the image to get desired size
    thumb = image.crop(box=(0, 0, size[0], size[1]))
    # shift original image to the middle so that the black bars are on both sides
    offset_x = max((size[0] - image_size[0]) / 2, 0)
    offset_y = max((size[1] - image_size[1]) / 2, 0)
    thumb = ImageChops.offset(image=thumb, xoffset=int(offset_x), yoffset=int(offset_y))
    # turn into np array
    data = np.asarray(a=thumb, dtype="uint8")
    #data = data.reshape(data.shape[0], data.shape[1], 1)
    #data = data/255.
    return(data)

Import all images and save as np array

In [6]:
images = []

for i, name in enumerate(names):
    if (i % 100) == 0:
        clear_output()
        print("Importing images, {0:.2f} % finished".format(i/n_img*100))
    img = preprocess_img(name)
    images.append(img)

Importing images, 52.49 % finished


OSError: cannot identify image file '/data/deepxray/images/jpgs/1.2.840.114257.1.25112008.121033.1.1.1.jpg'

In [None]:
images = np.array(images)
np.save(file="/data/body_part_classification/img_array.npy", arr=images)

In [None]:
images = np.load(file="/data/body_part_classification/img_array.npy")
images = images/255.
plt.imshow(images[0], cmap='gray')
plt.show()

OSError: cannot identify image file '/data/deepxray/images/jpgs/1.2.840.114257.1.25112008.121033.1.1.1.jpg'