# Body part classification

This notebook classifies the x-ray images into different body parts.

## 1. Preprocessing

First I import all the dependencies

In [1]:
# import tf
import tensorflow as tf

# import numpy for arrays
import numpy as np
# import pandas for csv import
import pandas as pd
# import matplotlib
import matplotlib.pyplot as plt
# os to handle pathnames
import os.path
# PIL to open & manipulate images
from PIL import Image
from PIL import ImageChops
from PIL import ImageOps
from PIL import ImageFile
# for messages in loops
from IPython.display import clear_output
# for train/test split
from sklearn.model_selection import train_test_split
# for one-hot encoding
from sklearn.preprocessing import LabelBinarizer

# import keras modules from tensorflow.contrib for the CNN
from tensorflow.contrib.keras.python.keras.models import Sequential
from tensorflow.contrib.keras.python.keras.layers import Dense, Dropout, \
    Flatten
from tensorflow.contrib.keras.python.keras.layers.convolutional import \
    Conv2D, MaxPooling2D
from tensorflow.contrib.keras.python.keras.utils import to_categorical
from tensorflow.contrib.keras.python.keras import backend as K
from tensorflow.contrib.keras.python.keras import callbacks
from tensorflow.contrib.keras.python.keras.preprocessing.image import \
    ImageDataGenerator, array_to_img, img_to_array, load_img

Next I define the image format and fix the random seed

In [2]:
# image format -> (rows, cols, channels)
K.set_image_data_format("channels_last")
# fix random seed for reproducibility
seed = 1
np.random.seed(seed)
tf.set_random_seed(seed)

Create an array with all image names and an array with their labels

I then define a function to import and transform the images

Import all images (already done, see 2 cells below import of array)

Save as np array

Load saved array

splitting data into train & test set

In [3]:
img_train = np.load(file="/data/body_part_classification/img_train.npy")
img_test = np.load(file="/data/body_part_classification/img_test.npy")
labels_train = np.load(file="/data/body_part_classification/labels_train.npy")
labels_test = np.load(file="/data/body_part_classification/labels_test.npy")

In [4]:
# one hot encode outputs
labels_train = LabelBinarizer().fit_transform(labels_train)
labels_test = LabelBinarizer().fit_transform(labels_test)
num_classes = labels_test.shape[1]
print(num_classes)

6


## 2. Model

In [7]:
def conv_model():
    # create model
    model = Sequential()
    model.add(Conv2D(filters=32, kernel_size=(3, 3), padding="same", input_shape=(256, 256, 1),
                     activation="relu", kernel_initializer="he_normal"))
    model.add(Conv2D(filters=32, kernel_size=(3, 3), padding="same", activation="relu"))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Conv2D(filters=32, kernel_size=(3, 3), padding="same", activation="relu"))
    model.add(Conv2D(filters=32, kernel_size=(3, 3), padding="same", activation="relu"))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Conv2D(filters=64, kernel_size=(3, 3), padding="same", activation="relu"))
    model.add(Conv2D(filters=64, kernel_size=(3, 3), padding="same", activation="relu"))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Conv2D(filters=64, kernel_size=(3, 3), padding="same", activation="relu"))
    model.add(Conv2D(filters=64, kernel_size=(3, 3), padding="same", activation="relu"))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Conv2D(filters=128, kernel_size=(3, 3), padding="same", activation="relu"))
    model.add(Conv2D(filters=128, kernel_size=(3, 3), padding="same", activation="relu"))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Conv2D(filters=128, kernel_size=(3, 3), padding="same", activation="relu"))
    model.add(Conv2D(filters=128, kernel_size=(3, 3), padding="same", activation="relu"))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Flatten())
    model.add(Dense(units=256, kernel_initializer="he_normal",
                    activation="relu"))
    model.add(Dropout(rate=0.5))
    model.add(Dense(units=256, kernel_initializer="he_normal",
                    activation="relu"))
    model.add(Dropout(rate=0.5))
    model.add(Dense(units=num_classes, activation="softmax",
                    kernel_initializer="he_normal"))

    # Compile model
    model.compile(optimizer="adam", loss="categorical_crossentropy",
                  metrics=["accuracy"])

    return model

In [8]:
# build the model
model = conv_model()
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_13 (Conv2D)           (None, 256, 256, 32)      320       
_________________________________________________________________
conv2d_14 (Conv2D)           (None, 256, 256, 32)      9248      
_________________________________________________________________
max_pooling2d_7 (MaxPooling2 (None, 128, 128, 32)      0         
_________________________________________________________________
conv2d_15 (Conv2D)           (None, 128, 128, 32)      9248      
_________________________________________________________________
conv2d_16 (Conv2D)           (None, 128, 128, 32)      9248      
_________________________________________________________________
max_pooling2d_8 (MaxPooling2 (None, 64, 64, 32)        0         
_________________________________________________________________
conv2d_17 (Conv2D)           (None, 64, 64, 64)        18496     
__________

In [9]:
# tensorboard callback
tb_callback = callbacks.TensorBoard(log_dir="/data/body_part_classification/tensorboard/2nd_model",
                                    histogram_freq=0, write_graph=True,
                                    write_images=True)

In [10]:
# Fit the model
model.fit(x=img_train, y=labels_train, batch_size=100, epochs=100, verbose=2,
          callbacks=[tb_callback], validation_data=(img_test, labels_test))

Train on 33875 samples, validate on 14519 samples
Epoch 1/100
137s - loss: 0.4334 - acc: 0.8434 - val_loss: 0.1194 - val_acc: 0.9722
Epoch 2/100
125s - loss: 0.1205 - acc: 0.9705 - val_loss: 0.0926 - val_acc: 0.9750
Epoch 3/100
124s - loss: 0.1043 - acc: 0.9751 - val_loss: 0.0832 - val_acc: 0.9781
Epoch 4/100
124s - loss: 0.0832 - acc: 0.9796 - val_loss: 0.0813 - val_acc: 0.9800
Epoch 5/100
124s - loss: 0.0756 - acc: 0.9816 - val_loss: 0.0818 - val_acc: 0.9818
Epoch 6/100
124s - loss: 0.0659 - acc: 0.9822 - val_loss: 0.0747 - val_acc: 0.9820
Epoch 7/100
124s - loss: 0.0649 - acc: 0.9833 - val_loss: 0.0924 - val_acc: 0.9800
Epoch 8/100
123s - loss: 0.0565 - acc: 0.9847 - val_loss: 0.0771 - val_acc: 0.9820
Epoch 9/100
123s - loss: 0.0530 - acc: 0.9849 - val_loss: 0.0707 - val_acc: 0.9834
Epoch 10/100
123s - loss: 0.0479 - acc: 0.9868 - val_loss: 0.0770 - val_acc: 0.9822
Epoch 11/100
123s - loss: 0.0433 - acc: 0.9871 - val_loss: 0.0830 - val_acc: 0.9810
Epoch 12/100
123s - loss: 0.0396 - 

121s - loss: 0.0061 - acc: 0.9987 - val_loss: 0.1487 - val_acc: 0.9841
Epoch 99/100
121s - loss: 0.0111 - acc: 0.9972 - val_loss: 0.1578 - val_acc: 0.9829
Epoch 100/100
121s - loss: 0.0141 - acc: 0.9970 - val_loss: 0.1879 - val_acc: 0.9837


<tensorflow.contrib.keras.python.keras.callbacks.History at 0x7f821bfffeb8>

In [11]:
# Final evaluation of the model
scores = model.evaluate(img_test, labels_test, verbose=0)
print("Error: %.2f%%" % (100 - scores[1] * 100))

Error: 1.63%


In [12]:
# Save the model
model.save('/data/body_part_classification/2nd_model.h5')