# Abnormality Detection in Musculoskeletal Radiographs

## XR_SHOULDER Study Type

In [1]:
from keras.applications.densenet import DenseNet169, DenseNet121, preprocess_input
from keras.preprocessing.image import ImageDataGenerator, load_img, image
from keras.models import Sequential, Model, load_model
from keras.layers import Conv2D, MaxPool2D
from keras.layers import Activation, Dropout, Flatten, Dense
from keras.callbacks import EarlyStopping, ReduceLROnPlateau, ModelCheckpoint, Callback
from keras import regularizers
import pandas as pd
from tqdm import tqdm
import os
import numpy as np
import random
from keras.optimizers import Adam
import keras.backend as K
import cv2
import matplotlib.pyplot as plt

Using TensorFlow backend.


###  Data preprocessing

In [2]:
def listdir_nohidden(path):
    for f in os.listdir(path):
        if not f.startswith('.'):
            yield f

###  Creating a csv file containing path to image & csv

In [3]:
def create_images_metadata_csv(category,study_types):
    """
    This function creates a csv file containing the path of images, label.
    """
    image_data = {}
    study_label = {'positive': 1, 'negative': 0}
    #study_types = ['XR_ELBOW','XR_FINGER','XR_FOREARM','XR_HAND','XR_HUMERUS','XR_SHOULDER','XR_WRIST']
    #study_types = ['XR_ELBOW']
    i = 0
    image_data[category] = pd.DataFrame(columns=['Path','Count', 'Label'])
    for study_type in study_types: # Iterate throught every study types
        DATA_DIR = 'data/MURA-v1.1/%s/%s/' % (category, study_type)
        patients = list(os.walk(DATA_DIR))[0][1]  # list of patient folder names
        for patient in tqdm(patients):  # for each patient folder
            for study in os.listdir(DATA_DIR + patient):  # for each study in that patient folder
                if(study != '.DS_Store'):
                    label = study_label[study.split('_')[1]]  # get label 0 or 1
                    path = DATA_DIR + patient + '/' + study + '/'  # path to this study
                    for j in range(len(list(listdir_nohidden(path)))):
                        image_path = path + 'image%s.png' % (j + 1)
                        image_data[category].loc[i] = [image_path,1, label]  # add new row
                        i += 1
    image_data[category].to_csv(category+"_image_data.csv",index = None, header=False)

In [4]:
#New function create image array by study level
def getImagesInArrayNew(train_dataframe):
    images = []
    labels = []
    for i, data in tqdm(train_dataframe.iterrows()):
        img = cv2.imread(data['Path'])
#         #random rotation
#         angle = random.randint(-30,30)
#         M = cv2.getRotationMatrix2D((img_width/2,img_height/2),angle,1)
#         img = cv2.warpAffine(img,M,(img_width,img_height))
        #resize
        img = cv2.resize(img,(img_width,img_height))    
        img = img[...,::-1].astype(np.float32)
        images.append(img)
        labels.append(data['Label'])
    images = np.asarray(images).astype('float32') 
    #normalization
    mean = np.mean(images[:, :, :])
    std = np.std(images[:, :, :])
    images[:, :, :] = (images[:, :, :] - mean) / std
    labels = np.asarray(labels)
    return {'images': images, 'labels': labels}

####  Variables intialization

In [5]:
img_width, img_height = 224, 224
#Keras ImageDataGenerator to load, transform the images of the dataset
BASE_DATA_DIR = 'data/'
IMG_DATA_DIR = 'MURA-v1.1/'

###  XR_SHOULDER ImageDataGenertors

In [6]:
train_data_dir = BASE_DATA_DIR + IMG_DATA_DIR + 'train/XR_SHOULDER'
valid_data_dir = BASE_DATA_DIR + IMG_DATA_DIR + 'valid/XR_SHOULDER'

train_datagen = ImageDataGenerator(
    rotation_range=30,
    horizontal_flip=True
)

test_datagen = ImageDataGenerator(
    rotation_range=30,
    horizontal_flip=True

)

study_types = ['XR_SHOULDER']

create_images_metadata_csv('train',study_types)
create_images_metadata_csv('valid',study_types)

valid_image_df = pd.read_csv('valid_image_data.csv', names=['Path','Count', 'Label'])
train_image_df = pd.read_csv('train_image_data.csv', names=['Path', 'Count','Label'])

dd={}

dd['train'] = train_image_df
dd['valid'] = valid_image_df

valid_dict = getImagesInArrayNew(valid_image_df)
train_dict = getImagesInArrayNew(train_image_df)

train_datagen.fit(train_dict['images'],augment=True)
test_datagen.fit(valid_dict['images'],augment=True)

validation_generator = test_datagen.flow(
    x=valid_dict['images'],
    y=valid_dict['labels'],
    batch_size = 1
)

train_generator = train_datagen.flow(
    x=train_dict['images'],
    y=train_dict['labels']
)

100%|██████████| 2694/2694 [00:23<00:00, 116.82it/s]
100%|██████████| 173/173 [00:01<00:00, 132.19it/s]
563it [00:02, 243.52it/s]
8379it [00:34, 243.58it/s]


###  Building a model

###  Model paramaters

In [7]:
#model parameters for training
#K.set_learning_phase(1)
nb_train_samples = len(train_dict['images'])
nb_validation_samples = len(valid_dict['images'])
epochs = 10
batch_size = 8
steps_per_epoch = nb_train_samples//batch_size
print(steps_per_epoch)
n_classes = 1

1047


In [8]:
def build_model():
    base_model = DenseNet169(input_shape=(None, None,3),
                             weights='imagenet',
                             include_top=False,
                             pooling='avg')
#     i = 0
#     total_layers = len(base_model.layers)
#     for layer in base_model.layers:
#         if(i <= total_layers//2):
#             layer.trainable = True
#             i = i+1

    x = base_model.output

    predictions = Dense(n_classes,activation='sigmoid')(x)
    model = Model(inputs=base_model.input, outputs=predictions)
    return model

In [9]:
model = build_model()

In [10]:
#Compiling the model
model.compile(loss="binary_crossentropy", optimizer='adam', metrics=['acc', 'mse'])

In [11]:
#callbacks for early stopping incase of reduced learning rate, loss unimprovement
early_stop = EarlyStopping(monitor='val_loss', patience=8, verbose=1, min_delta=1e-4)
reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=1, verbose=1, min_lr=0.0001)
callbacks_list = [early_stop, reduce_lr]

###  Training the Model

In [12]:
#train the module
model_history = model.fit_generator(
    train_generator,
    epochs=epochs,
    workers=0,
    use_multiprocessing=False,  
    steps_per_epoch = nb_train_samples//batch_size,
    validation_data=validation_generator,
    validation_steps=nb_validation_samples //batch_size,
    callbacks=callbacks_list
)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10

Epoch 00004: ReduceLROnPlateau reducing learning rate to 0.00010000000474974513.
Epoch 5/10
Epoch 6/10

Epoch 00006: ReduceLROnPlateau reducing learning rate to 0.0001.
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [13]:
model.save("densenet_mura_rs_v3_xr_shoulder.h5")

###  Performance Evaluation

In [15]:
#Now we evaluate the trained model with the validation dataset and make a prediction. 
#The class predicted will be the class with maximum value for each image.
ev = model.evaluate_generator(validation_generator, steps=(nb_validation_samples //batch_size)+1,  workers=0, use_multiprocessing=False)
ev[1]


0.704225352112676