In [1]:
import tensorflow as tf
from tensorflow.keras import layers
from skimage.io import imshow
from pathlib import Path
import pandas as pd
import time
import matplotlib.pyplot as plt
from keras.utils import plot_model
from keras.utils.vis_utils import *
import keras
import importlib
import pydot
from IPython.display import Image 

Using TensorFlow backend.


In [2]:
dataset_root = Path('../MURA-v1.1')

In [3]:
df = pd.read_csv(dataset_root/'train_image_paths.csv', header=None, names=['filename'])
df.head()

Unnamed: 0,filename
0,MURA-v1.1/train/XR_SHOULDER/patient00001/study...
1,MURA-v1.1/train/XR_SHOULDER/patient00001/study...
2,MURA-v1.1/train/XR_SHOULDER/patient00001/study...
3,MURA-v1.1/train/XR_SHOULDER/patient00002/study...
4,MURA-v1.1/train/XR_SHOULDER/patient00002/study...


In [4]:
df['class'] = (df.filename
               .str.extract('study.*_(positive|negative)'))
df.head()

Unnamed: 0,filename,class
0,MURA-v1.1/train/XR_SHOULDER/patient00001/study...,positive
1,MURA-v1.1/train/XR_SHOULDER/patient00001/study...,positive
2,MURA-v1.1/train/XR_SHOULDER/patient00001/study...,positive
3,MURA-v1.1/train/XR_SHOULDER/patient00002/study...,positive
4,MURA-v1.1/train/XR_SHOULDER/patient00002/study...,positive


In [5]:
def generate_df(dataset_root, csv_name):
    df = pd.read_csv(dataset_root/csv_name, header=None, names=['filename'])
    df['class'] = (df.filename
               .str.extract('study.*_(positive|negative)'))
    return df

In [6]:
list(dataset_root.parent.iterdir())

[PosixPath('../Notebooks 2019-20'),
 PosixPath('../.DS_Store'),
 PosixPath('../MURA-v1.1'),
 PosixPath('../DeepLearning2.ipynb'),
 PosixPath('../exec2'),
 PosixPath('../fashion-mnist-image-classification'),
 PosixPath('../.ipynb_checkpoints'),
 PosixPath('../Lectures 2019-20')]

In [7]:
from keras.applications.resnet50 import preprocess_input

batch_size=32
img_height=244
img_width=244
nb_epochs = 10

datagen = keras.preprocessing.image.ImageDataGenerator(preprocessing_function=preprocess_input,
                                                       shear_range=0.2,
                                                       zoom_range=0.2,
                                                       rotation_range = 30, 
                                                       horizontal_flip=True)

train_generator = datagen.flow_from_dataframe(
    generate_df(dataset_root, 'train_image_paths.csv'),
    directory=dataset_root.parent,
    target_size=(img_height, img_width),
    batch_size=batch_size,
    class_mode='binary',
    subset='training') # set as training data

validation_generator = datagen.flow_from_dataframe(generate_df(dataset_root, 'valid_image_paths.csv'),
    directory=dataset_root.parent,
    target_size=(img_height, img_width),
    batch_size=batch_size,
    class_mode='binary')

Found 36808 images belonging to 2 classes.
Found 3197 images belonging to 2 classes.


In [9]:
def plotAccuracyLoss(someHistory):
    plt.style.use('dark_background')
    plt.rcParams['font.size'] = 14
    plt.figure(1, figsize = (15,8)) 
    plt.subplot(221)  
    plt.plot(someHistory.history['acc'])  
    plt.plot(someHistory.history['val_acc'])  
    plt.title('Model Accuracy')  
    plt.ylabel('Accuracy')  
    plt.xlabel('Epochs')  
    plt.legend(['train', 'valid'], loc='upper left')

    plt.subplot(222)  
    plt.plot(someHistory.history['loss'])  
    plt.plot(someHistory.history['val_loss'])  
    plt.title('Model Loss')  
    plt.ylabel('Loss')
    plt.xlabel('Epochs')
    plt.legend(['train', 'valid'], loc='upper left')

    plt.show()

In [25]:
# Early stopping & checkpointing the best model
from tensorflow.python.keras.callbacks import EarlyStopping, ModelCheckpoint

cb_early_stopper = EarlyStopping(monitor = 'val_loss', patience = 2)
cb_checkpointer = ModelCheckpoint(filepath = 'output/best.hdf5', monitor = 'val_loss', save_best_only = True, mode = 'auto')

## DenseNet 50

In [33]:
from keras.layers import *
from keras.models import Sequential
from keras.applications.resnet50 import ResNet50

In [34]:
resNet = ResNet50(
    weights= 'imagenet',
    include_top = False, 
    input_shape=(img_height, img_width, 3), 
    pooling='avg')

In [35]:
resNet.trainable = False

In [36]:
modelCNN2 = Sequential([
    resNet,
    Dense(1, activation='sigmoid')
])
modelCNN2.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
resnet50 (Model)             (None, 2048)              23587712  
_________________________________________________________________
dense_2 (Dense)              (None, 1)                 2049      
Total params: 23,589,761
Trainable params: 2,049
Non-trainable params: 23,587,712
_________________________________________________________________


In [37]:
modelCNN2.compile(loss=keras.losses.binary_crossentropy,
              optimizer=keras.optimizers.Adam(),
              metrics=['accuracy'])

In [None]:
STEP_SIZE_TRAIN=train_generator.samples//batch_size
STEP_SIZE_VALID=validation_generator.samples//batch_size

t_start = time.time()

historyCNN2 = modelCNN2.fit_generator(generator=train_generator,
                    steps_per_epoch=STEP_SIZE_TRAIN,
                    validation_data=validation_generator,
                    validation_steps=STEP_SIZE_VALID,
                    epochs=nb_epochs,
                    callbacks=[cb_checkpointer, cb_early_stopper],
                    use_multiprocessing=True
)

Instructions for updating:
Use tf.cast instead.
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
 141/1150 [==>...........................] - ETA: 3:46:21 - loss: 0.5494 - acc: 0.7292

In [None]:
# Print the network architecture diagram
plot_model(modelCNN2, to_file='output/img_model/cnn_resNet2.png', show_shapes=True)
Image('output/img_model/cnn_resNet.png')

In [None]:
# Print resNet architecture diagram
plot_model(resNet, to_file='output/img_model/resNet_arch2.png', show_shapes=True)
Image('output/img_model/resNet_arch.png')

In [None]:
modelCNN2.metrics_names

In [None]:
modelCNN2.evaluate_generator(generator=validation_generator,steps=STEP_SIZE_TRAIN)

In [None]:
t_finish = time.time()
print(f"Kernel run time = {(t_finish-t_start)/3600} hours")

In [None]:
print(historyCNN2.history.keys())

In [None]:
plotAccuracyLoss(historyCNN2)