In [45]:
import numpy as np
import pandas as pd
import os
import platform

# graphing
import matplotlib.image as mpimage
import matplotlib.pyplot as plt

%matplotlib inline

from PIL import Image
import random

# TF model stuff
import tensorflow as tf
from tensorflow.keras import backend as K
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Flatten, Dense, BatchNormalization, GlobalAveragePooling2D, Dropout
from tensorflow.keras.layers import LeakyReLU
from tensorflow.keras.optimizers import Adam, RMSprop, SGD
from tensorflow.keras.applications.inception_v3 import InceptionV3, preprocess_input
from tensorflow.keras.preprocessing.image import ImageDataGenerator

from sklearn.utils.class_weight import compute_class_weight
from sklearn.metrics import accuracy_score, roc_auc_score, classification_report


In [46]:
# Creates directory, if directory exists removes if remove parameter is set to True 
def create_directory(directory_path, remove=False):
    if remove and os.path.exists(directory_path):
        try:
            shutil.rmtree(directory_path)
            os.mkdir(directory_path)
        except:
            print("Could not remove directory : ", directory_path)
            return False
    else:
        try:
            os.mkdir(directory_path)
        except:
            print("Could not create directory: ", directory_path)
            return False
        
    return True

# Removes directory, if directory exists 
def remove_directory(directory_path):
    if os.path.exists(directory_path):
        try:
            shutil.rmtree(directory_path)
        except:
            print("Could not remove directory : ", directory_path)
            return False
        
    return True

def clear_directory(directory_path):
    dirs_files = os.listdir(directory_path)
    
    for item in dirs_files:
#         item_path = os.path.join(directory_path, item)
        item_path = directory_path+ item
        
        try:
            if os.path.isfile(item_path):
                os.unlink(item_path)
            elif os.path.isdir(item_path): 
                shutil.rmtree(item_path)
        except Exception as e:
            print(e)
            
    return True


def remove_empty_folders(path, removeRoot=True):
    if not os.path.isdir(path):
        return
    
    # remove empty subfolders
    files = os.listdir(path)
    
    if len(files):
        for f in files:
            fullpath = os.path.join(path, f)
            
            if os.path.isdir(fullpath):
                remove_empty_folders(fullpath)

    # if folder empty, delete it
    files = os.listdir(path)
    
    if len(files) == 0 and removeRoot:
        print("Removing empty folder:", path)
        os.rmdir(path)
        
        
def dir_file_count(directory):
    return sum([len(files) for r, d, files in os.walk(directory)])

In [None]:
train_dir = 'chest_xray/train'
test_dir = 'chest_xray/test'
validation_dir = 'chest_xray/val'

In [None]:
img = mpimage.imread(train_dir + '/NORMAL/IM-0115-0001.jpeg')
image_plot = plt.imshow(img, cmap='gray')
plt.show()

In [None]:
NUM_CLASSES = 2
def create_model(input_shape):
    K.clear_session()
    base_model = InceptionV3(weights='imagenet', include_top=False, input_shape=input_shape)
    x = base_model.output
    x = GlobalAveragePooling2D(name='avg_pool')(x)
    x = Dense(512, activation='relu')(x)
    x = Dropout(0.3)(x)
    x = Dense(256, activation='relu')(x)
    x = Dropout(0.3)(x)
    x = Dense(128, activation='relu')(x)
    x = Dropout(0.3)(x)
    x = LeakyReLU()(x)
    
    for layer in base_model.layers:
        layer.trainable = False
        
    predictions = Dense(NUM_CLASSES, activation='sigmoid')(x)
    model = Model(inputs=base_model.inputs, outputs=predictions)
    return model

In [None]:
dimension_x = 175
dimension_y = 175
model = create_model((dimension_x, dimension_y, 3))

In [None]:
training_loss = tf.keras.metrics.Mean('training_loss', dtype=tf.float32)
training_accuracy = tf.keras.metrics.SparseCategoricalAccuracy('training_accuracy', dtype=tf.float32)
test_loss = tf.keras.metrics.Mean('test_loss', dtype=tf.float32)
test_accuracy = tf.keras.metrics.SparseCategoricalAccuracy('test_accuracy', dtype=tf.float32)
model.summary()

In [None]:
optimizer = Adam(lr=0.0001)

model.compile(loss='categorical_crossentropy',
             optimizer=optimizer,
             metrics=['accuracy'])

In [None]:
def file_count(directory):
    return sum([len(files) for r, d, files in os.walk(directory)])

In [None]:
rescale = 1./255
target_size = (dimension_x, dimension_y)
batch_size = 500
class_mode = 'categorical'

train_datagen = ImageDataGenerator(rescale=rescale,
                                  shear_range=0.2,
                                  zoom_range=0.2,
                                  horizontal_flip=True,
                                  validation_split=0.2)

train_generator = train_datagen.flow_from_directory(train_dir,
                                                    target_size=target_size,
                                                    class_mode=class_mode,
                                                    batch_size=batch_size,
                                                    shuffle=True)
val_datagen = ImageDataGenerator(rescale=rescale)
val_generator = val_datagen.flow_from_directory(validation_dir,
                                               target_size=target_size,
                                               class_mode=class_mode,
                                               batch_size=file_count(validation_dir),
                                               shuffle=False)

test_datagen = ImageDataGenerator(rescale=rescale)
test_generator = test_datagen.flow_from_directory(test_dir,
                                                  target_size=target_size,
                                                  class_mode=class_mode,
                                                  batch_size=file_count(validation_dir),
                                                  shuffle=False)

In [None]:
y = train_generator.classes
labels = np.unique(y)

train_class_weights = compute_class_weight('balanced', labels, y)
print(train_class_weights)

## Training

In [None]:
history = model.fit_generator(train_generator,
                              steps_per_epoch=len(train_generator),
                              epochs=10,
                              verbose=1,
                              validation_data=val_generator,
                              validation_steps=len(val_generator),
                              class_weight=train_class_weights,
                              workers=20
                             )
MODEL_FILE = 'pneumonia_v1.hd5'
model.save(MODEL_FILE)

In [None]:
model = tf.keras.models.load_model(MODEL_FILE)

In [None]:
model.evaluate_generator(test_generator, steps=len(test_generator), verbose=1)

In [None]:
vl_score = model.predict_generator(test_generator)
AUC = roc_auc_score(test_generator.classes, np.argmax(vl_score, axis=1))

print(classification_report(test_generator.classes,
                     np.argmax(vl_score, axis=1),
                     target_names=['Normal', 'Pnuemonia']))
print(AUC)