In [None]:
import numpy as np 
import pandas as pd 
from utils import *
from glob import glob
import matplotlib.pyplot as plt
import matplotlib as mpl
import os
from sklearn.model_selection import train_test_split
import statistics
from tqdm import tqdm
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from keras.callbacks import Callback

In [None]:
disease_labels = ['Atelectasis', 'Consolidation', 'Infiltration', 'Pneumothorax', 'Edema', 'Emphysema', 'Fibrosis', 'Effusion', 'Pneumonia', 'Pleural_Thickening',
'Cardiomegaly', 'Nodule', 'Mass', 'Hernia']

In [None]:
labels_train_val = pd.read_csv('C:/Users/tanma/Downloads/archive/train_val_list.txt')
labels_train_val.columns = ['Image_Index']

labels_test = pd.read_csv('C:/Users/tanma/Downloads/archive/test_list.txt')
labels_test.columns = ['Image_Index']

In [None]:
labels_df = pd.read_csv('C:/Users/tanma/Downloads/archive/Data_Entry_2017.csv')

labels_df.columns = ['Image_Index', 'Finding_Labels', 'Follow_Up_#', 'Patient_ID',
                  'Patient_Age', 'Patient_Gender', 'View_Position',
                  'Original_Image_Width', 'Original_Image_Height',
                  'Original_Image_Pixel_Spacing_X',
                  'Original_Image_Pixel_Spacing_Y', 'dfd']
labels_df.head()

In [None]:
binary_disease = ['Effusion']
binary_disease_str = 'Effusion'
labels_df[binary_disease_str] = labels_df['Finding_Labels'].map(lambda x: binary_disease_str in x)

# Print Class Mapping
print(labels_df[binary_disease_str].head(3))

In [None]:
train_val_merge = pd.merge(left=labels_train_val, right=labels_df, left_on='Image_Index', right_on='Image_Index')

test_merge = pd.merge(left=labels_test, right=labels_df, left_on='Image_Index', right_on='Image_Index')

In [None]:
train_val_merge['Finding_Labels'] = train_val_merge['Finding_Labels'].apply(lambda s: [l for l in str(s).split('|')])

test_merge['Finding_Labels'] = test_merge['Finding_Labels'].apply(lambda s: [l for l in str(s).split('|')])

In [None]:
num_glob = []
num_glob = glob('C:/Users/tanma/Downloads/archive/images_001/images/*.png')+ glob('C:/Users/tanma/Downloads/archive/images_002/images/*.png')+ glob('C:/Users/tanma/Downloads/archive/images_003/images/*.png')+ glob('C:/Users/tanma/Downloads/archive/images_004/images/*.png')+ glob('C:/Users/tanma/Downloads/archive/images_005/images/*.png')+ glob('C:/Users/tanma/Downloads/archive/images_006/images/*.png')+ glob('C:/Users/tanma/Downloads/archive/images_007/images/*.png')+ glob('C:/Users/tanma/Downloads/archive/images_008/images/*.png')+ glob('C:/Users/tanma/Downloads/archive/images_009/images/*.png')+ glob('C:/Users/tanma/Downloads/archive/images_010/images/*.png')+ glob('C:/Users/tanma/Downloads/archive/images_011/images/*.png')+ glob('C:/Users/tanma/Downloads/archive/images_012/images/*.png')
img_path = {os.path.basename(x): x for x in num_glob}
train_val_merge['Paths'] = train_val_merge['Image_Index'].map(img_path.get)
# Testing Mapping
test_merge['Paths'] = test_merge['Image_Index'].map(img_path.get)
train_val_merge.tail(30)

In [None]:
patients = np.unique(train_val_merge['Patient_ID'])
test_patients = np.unique(test_merge['Patient_ID'])
patients

In [None]:
train_df, val_df = train_test_split(patients,
                                   test_size = 0.0669,
                                   random_state = 2019,
                                    shuffle= True
                                   )  

In [None]:
train_df = train_val_merge[train_val_merge['Patient_ID'].isin(train_df)]
val_df = train_val_merge[train_val_merge['Patient_ID'].isin(val_df)]
test_df = test_merge[test_merge['Patient_ID'].isin(test_patients)]

In [None]:
train_df

In [None]:
train_data_gen = ImageDataGenerator(rescale=1./255,
                                    samplewise_center=True, 
                                    samplewise_std_normalization=True, 
                                    horizontal_flip = True,
                                    zoom_range=0.1, 
                                    height_shift_range=0.05, 
                                    width_shift_range=0.05,
                                    rotation_range=5
                                    )

In [None]:
IMG_SIZE = (224,224)
train_gen = train_data_gen.flow_from_dataframe(dataframe=train_df, 
                                                directory=None,
                                                shuffle= True,
                                                seed = 2,
                                                x_col = 'Paths',
                                                y_col = binary_disease, 
                                                target_size = IMG_SIZE,
                                                class_mode='raw',
                                                classes = disease_labels,
                                                color_mode = 'rgb',
                                                batch_size = 16)

In [None]:
val_gen = train_data_gen.flow_from_dataframe(
                                            dataframe=val_df, 
                                            directory=None,
                                            shuffle= True,
                                            seed = 2,
                                            x_col = 'Paths',
                                            y_col = binary_disease, 
                                            target_size = IMG_SIZE,
                                            classes = disease_labels,
                                            class_mode='raw',
                                            color_mode = 'rgb',
                                            batch_size = 16
                                            )

In [None]:
IMG_IND = 224
train_data = tf.data.Dataset.from_generator(lambda: train_gen,
                                            output_types=(tf.float32, tf.int32),
                                           output_shapes=([None, IMG_IND, IMG_IND, 3], [None, 1]))
val_data = tf.data.Dataset.from_generator(lambda: val_gen,
                                          output_types=(tf.float32, tf.int32),
                                         output_shapes=([None, IMG_IND, IMG_IND, 3], [None, 1]))

In [None]:
image_paths = np.array(train_df['Paths'])
from tensorflow.keras.preprocessing.image import load_img, img_to_array, array_to_img

images_to_augment = []

for image_path in image_paths[:4]:
    image = load_img(image_path, target_size=(IMG_IND, IMG_IND))
    image = img_to_array(image)
    images_to_augment.append(image)
    
images_to_augment = np.array(images_to_augment)

images_augmented = next(train_data_gen.flow(x=images_to_augment,
                                batch_size=10,
                                shuffle=False))
from tensorflow.keras.preprocessing.image import array_to_img

fig, axes = plt.subplots(2, 2)

for i in range(2):
    axes[i, 0].imshow(array_to_img(images_to_augment[i]), 
                      #horizontal_flip = True,
                      interpolation='nearest')
    
    axes[i, 1].imshow(array_to_img(images_augmented[i]), 
                      interpolation='nearest')
    
    axes[i, 0].set_xticks([])
    axes[i, 1].set_xticks([])
    
    axes[i, 0].set_yticks([])
    axes[i, 1].set_yticks([])
    
    axes[i, 0].set_yticks([])
    axes[i, 1].set_yticks([])
    
    axes[i, 0].set_xticks([])
    axes[i, 1].set_xticks([])
    
    axes[i, 0].set_yticks([])
    axes[i, 1].set_yticks([])
    
    axes[i, 0].set_yticks([])
    axes[i, 1].set_yticks([])
    
columns = ['Base Image', 'Augmented Image']
for ax, column in zip(axes[0], columns):
    ax.set_title(column) 
    
plt.show()

In [None]:
base_model = tf.keras.applications.DenseNet121(input_shape=(224,224,3),include_top=False,weights='imagenet')
base_model.trainable = True
top_model = tf.keras.models.Sequential()
top_model.add(tf.keras.layers.Flatten(input_shape=base_model.output_shape[1:]))
top_model.add(tf.keras.layers.Dropout(0.5))
top_model.add(tf.keras.layers.Dense(64, kernel_initializer='normal'))
top_model.add(tf.keras.layers.BatchNormalization())
top_model.add(tf.keras.layers.Activation('relu'))
top_model.add(tf.keras.layers.Dense(64, kernel_initializer='normal'))
top_model.add(tf.keras.layers.BatchNormalization())
top_model.add(tf.keras.layers.Activation('relu'))
top_model.add(tf.keras.layers.Dense(1,activation='sigmoid'))
model = tf.keras.models.Model(base_model.inputs, top_model(base_model.output))
model.summary()

In [None]:
METRICS = [tf.keras.metrics.BinaryAccuracy(name='BinaryAccuracy')]
model.compile(loss = 'categorical_crossentropy',optimizer = 'Adam',metrics = METRICS)

In [None]:
# Training/Validation Steps
LEARNING_RATE_PATIENCE = 5
# Dynamic Learning Rate
reduced_lr = tf.keras.callbacks.ReduceLROnPlateau(
                                                monitor='val_loss',
                                                factor=.05,
                                                patience=LEARNING_RATE_PATIENCE,
                                                verbose=1,
                                                mode='min',
                                                cooldown=0,
                                                min_lr=1e-6 
                                                )
# auroc = MultipleClassAUROC(
#                             sequence = x_val,
#                             class_names=binary_disease,
#                             weights_path=CALLBACKS_DIR,
#                             stats={},
#                             workers=1,
#                             )

class_weight = {
    0: 1.,
    1: 2.0
}
BATCH_SIZE = 16
train_steps = train_gen.samples // 64
val_steps = val_gen.samples // 64
history = model.fit(
                    train_data.prefetch(buffer_size=tf.data.experimental.AUTOTUNE) ,
                    steps_per_epoch = 252, 
                    validation_data=  val_data.prefetch(buffer_size=tf.data.experimental.AUTOTUNE),    
                    validation_steps = 17, 
                    epochs=10,
                    use_multiprocessing=True,
                    class_weight = class_weight,
                    callbacks=[reduced_lr]
)

In [None]:

pred_Y = model.predict(x_test,
                        steps=64,
                        verbose = True)

In [None]:

from sklearn.metrics import confusion_matrix

plt.matshow(confusion_matrix(y_test, pred_Y>0.5))

In [None]:
y_test

In [None]:
# Our Changes 
base_model = tf.keras.applications.DenseNet121(input_shape=(224,224,3),include_top=False,weights='imagenet')
model = tf.keras.Sequential([base_model,tf.keras.layers.Dense(units = 14, activation = 'linear'),tf.keras.layers.Dense(units = 14,activation = 'sigmoid')])

In [None]:
class MultipleClassAUROC(Callback):
    """
    Monitor mean AUROC and update model
    """
    def __init__(self, sequence, class_names, weights_path, stats=None, workers=1):
        super(Callback, self).__init__()
        #self.steps=STEPS ############################
        self.sequence = sequence
        self.workers = workers
        self.class_names = class_names
        self.weights_path = weights_path
        self.best_weights_path = os.path.join(
            os.path.split(weights_path)[0],
            f"best_{os.path.split(weights_path)[1]}",
        )
        self.best_auroc_log_path = os.path.join(
            os.path.split(weights_path)[0],
            "best_auroc.log",
        )
        self.stats_output_path = os.path.join(
            os.path.split(weights_path)[0],
            ".training_stats.json"
        )
        # for resuming previous training
        if stats:
            self.stats = stats
        else:
             self.stats = {"best_mean_auroc": 0}
        self.aurocs = {}
        for c in self.class_names:
            self.aurocs[c] = []

    def on_epoch_end(self, epoch, logs={}):
        """
        Calculate the average AUROC and save the best model weights according
        to this metric.
        """
        print("\n*********************************")
        self.stats["lr"] = float(kb.eval(self.model.optimizer.lr))
        print(f"current learning rate: {self.stats['lr']}")
        #LR_LOG.append(self.stats['lr'])

        y_hat = model.predict(self.sequence,verbose=1)
        
        pred_indices = np.argmax(y_hat,axis=1)

        y = y_val 
        
        print(f"*** epoch#{epoch + 1} dev auroc ***")
        current_auroc = []
        try:
            score = roc_auc_score(y, y_hat)
        except ValueError:
            score = 0

        current_auroc.append(score)
        EPOCH = epoch + 1 

        print("*********************************")

        mean_auroc = np.mean(current_auroc)
        MEAN_AUROC.append(mean_auroc)
        print(f"Effusion auroc: {mean_auroc}\n")
        
        print("*********************************")