# Body part classification

This notebook classifies the x-ray images into different body parts.

## 1. Preprocessing

First I import all the dependencies

In [1]:
# import tf
import tensorflow as tf

# import numpy for arrays
import numpy as np
# import pandas for csv import
import pandas as pd
# import matplotlib
import matplotlib.pyplot as plt
# os to handle pathnames
import os.path
# PIL to open & manipulate images
from PIL import Image
from PIL import ImageChops
from PIL import ImageOps
from PIL import ImageFile
# for messages in loops
from IPython.display import clear_output
# for train/test split
from sklearn.model_selection import train_test_split
# for one-hot encoding
from sklearn.preprocessing import LabelBinarizer

# import keras modules from tensorflow.contrib for the CNN
from tensorflow.contrib.keras.python.keras.models import Sequential
from tensorflow.contrib.keras.python.keras.layers import Dense, Dropout, \
    Flatten
from tensorflow.contrib.keras.python.keras.layers.convolutional import \
    Conv2D, MaxPooling2D
from tensorflow.contrib.keras.python.keras.utils import to_categorical
from tensorflow.contrib.keras.python.keras import backend as K
from tensorflow.contrib.keras.python.keras import callbacks
from tensorflow.contrib.keras.python.keras.preprocessing.image import \
    ImageDataGenerator, array_to_img, img_to_array, load_img

Next I define the image format and fix the random seed

In [2]:
# image format -> (rows, cols, channels)
K.set_image_data_format("channels_last")
# fix random seed for reproducibility
seed = 1
np.random.seed(seed)
tf.set_random_seed(seed)

Create an array with all image names and an array with their labels

In [3]:
df = pd.read_csv(filepath_or_buffer="/data/deepxray/data/rau_data/merged.csv")
df.head()
names = df["sop_iuid"].as_matrix()
labels = df["body_part"].as_matrix()
n_img = names.size

I then define a function to import and transform the images

In [4]:
# desired image size
size = (256, 256)
# specify filepath of images
root = "/data/deepxray"
dirname = "images/jpgs"

def preprocess_img(file_name):
    # at least one file is damaged, this setting allows
    # to import it anyways
    ImageFile.LOAD_TRUNCATED_IMAGES = True
    
    # filename
    file = file_name + ".jpg"

    # read image
    image = Image.open(fp=os.path.join(root, dirname, file))
    # some images are read as rgb -> convert to grayscale
    if (image.mode != 0):
        image = image.convert(mode='L')
    # scaling image to desired resultion while keeping aspect ratio
    image.thumbnail(size=size, resample=Image.ANTIALIAS)
    image_size = image.size
    # histogram normalization (remaps the image -> lightest pixel = 255, darkest pixel = 0)
    image = ImageOps.autocontrast(image=image)
    # add a black bar to the image to get desired size
    thumb = image.crop(box=(0, 0, size[0], size[1]))
    # shift original image to the middle so that the black bars are on both sides
    offset_x = max((size[0] - image_size[0]) / 2, 0)
    offset_y = max((size[1] - image_size[1]) / 2, 0)
    thumb = ImageChops.offset(image=thumb, xoffset=int(offset_x), yoffset=int(offset_y))
    # turn into np array
    data = np.asarray(a=thumb, dtype="uint8")
    return(data)

Import all images (already done, see 2 cells below import of array)

Save as np array

Load saved array

splitting data into train & test set

In [5]:
img_train = np.load(file="/data/body_part_classification/img_train.npy")
img_test = np.load(file="/data/body_part_classification/img_test.npy")
labels_train = np.load(file="/data/body_part_classification/labels_train.npy")
labels_test = np.load(file="/data/body_part_classification/labels_test.npy")

In [6]:
# one hot encode outputs
labels_train = LabelBinarizer().fit_transform(labels_train)
labels_test = LabelBinarizer().fit_transform(labels_test)
num_classes = labels_test.shape[1]
print(num_classes)

6


## 2. Model

In [7]:
def conv_model():
    # create model
    model = Sequential()
    model.add(Conv2D(filters=32, kernel_size=(3, 3), input_shape=(256, 256, 1),
                     activation="relu", kernel_initializer="he_normal"))
    model.add(Conv2D(filters=32, kernel_size=(3, 3), activation="relu"))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Conv2D(filters=32, kernel_size=(3, 3), activation="relu"))
    model.add(Conv2D(filters=32, kernel_size=(3, 3), activation="relu"))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Conv2D(filters=64, kernel_size=(3, 3), activation="relu"))
    model.add(Conv2D(filters=64, kernel_size=(3, 3), activation="relu"))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Conv2D(filters=64, kernel_size=(3, 3), activation="relu"))
    model.add(Conv2D(filters=64, kernel_size=(3, 3), activation="relu"))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Flatten())
    model.add(Dense(units=128, kernel_initializer="he_normal",
                    activation="relu"))
    model.add(Dropout(rate=0.3))
    model.add(Dense(units=128, kernel_initializer="he_normal",
                    activation="relu"))
    model.add(Dense(units=num_classes, activation="softmax",
                    kernel_initializer="he_normal"))

    # Compile model
    model.compile(optimizer="adam", loss="categorical_crossentropy",
                  metrics=["accuracy"])

    return model

In [8]:
# build the model
model = conv_model()
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_1 (Conv2D)            (None, 254, 254, 32)      320       
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 252, 252, 32)      9248      
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 126, 126, 32)      0         
_________________________________________________________________
conv2d_3 (Conv2D)            (None, 124, 124, 32)      9248      
_________________________________________________________________
conv2d_4 (Conv2D)            (None, 122, 122, 32)      9248      
_________________________________________________________________
max_pooling2d_2 (MaxPooling2 (None, 61, 61, 32)        0         
_________________________________________________________________
conv2d_5 (Conv2D)            (None, 59, 59, 64)        18496     
__________

In [9]:
# tensorboard callback
tb_callback = callbacks.TensorBoard(log_dir="/data/body_part_classification/tensorboard/1st_model",
                                    histogram_freq=0, write_graph=True,
                                    write_images=True)

In [10]:
# Fit the model
model.fit(x=img_train, y=labels_train, batch_size=100, epochs=100, verbose=2,
          callbacks=[tb_callback], validation_data=(img_test, labels_test))

Train on 33875 samples, validate on 14519 samples
Epoch 1/100
121s - loss: 0.3434 - acc: 0.9068 - val_loss: 0.1185 - val_acc: 0.9705
Epoch 2/100
116s - loss: 0.1096 - acc: 0.9720 - val_loss: 0.0965 - val_acc: 0.9754
Epoch 3/100
116s - loss: 0.0884 - acc: 0.9766 - val_loss: 0.0988 - val_acc: 0.9735
Epoch 4/100
116s - loss: 0.0747 - acc: 0.9801 - val_loss: 0.0906 - val_acc: 0.9757
Epoch 5/100
116s - loss: 0.0640 - acc: 0.9824 - val_loss: 0.1038 - val_acc: 0.9762
Epoch 6/100
116s - loss: 0.0592 - acc: 0.9826 - val_loss: 0.0951 - val_acc: 0.9775
Epoch 7/100
116s - loss: 0.0551 - acc: 0.9838 - val_loss: 0.0871 - val_acc: 0.9790
Epoch 8/100
116s - loss: 0.0501 - acc: 0.9844 - val_loss: 0.1039 - val_acc: 0.9782
Epoch 9/100
115s - loss: 0.0441 - acc: 0.9866 - val_loss: 0.1017 - val_acc: 0.9808
Epoch 10/100
115s - loss: 0.0389 - acc: 0.9881 - val_loss: 0.1202 - val_acc: 0.9776
Epoch 11/100
115s - loss: 0.0374 - acc: 0.9885 - val_loss: 0.1087 - val_acc: 0.9798
Epoch 12/100
115s - loss: 0.0321 - 

114s - loss: 0.0024 - acc: 0.9993 - val_loss: 0.1520 - val_acc: 0.9819
Epoch 99/100
114s - loss: 0.0023 - acc: 0.9992 - val_loss: 0.1739 - val_acc: 0.9841
Epoch 100/100
114s - loss: 0.0049 - acc: 0.9988 - val_loss: 0.1790 - val_acc: 0.9820


<tensorflow.contrib.keras.python.keras.callbacks.History at 0x7ff3bcd93d30>

In [11]:
# Final evaluation of the model
scores = model.evaluate(img_test, labels_test, verbose=0)
print("Error: %.2f%%" % (100 - scores[1] * 100))

Error: 1.80%


In [12]:
# Save the model
model.save('/data/body_part_classification/1st_model.h5')