# Chest X-Ray Medical Diagnosis with Deep Learning


Download used data here: https://drive.google.com/file/d/1AUWthYTUeVYMRV69O-U2zyOhscXci3SZ/view?usp=sharing 

-----------------------------------------------------------------------------------------------------------

general dataset: [ChestX-ray8 dataset](https://arxiv.org/abs/1705.02315) which contains 108,948 frontal-view X-ray images of 32,717 unique patients. 
- Each image in the data set contains multiple text-mined labels identifying 14 different pathological conditions. 
- These in turn can be used by physicians to diagnose 8 different diseases. 
- We will use this data to develop a single model that will provide binary classification predictions for each of the 14 labeled pathologies. 
- In other words it will predict 'positive' or 'negative' for each of the pathologies.
 
download the entire dataset: [here](https://nihcc.app.box.com/v/ChestXray-NIHCC). 


- For deeper data preprocessing and analysis of the raw version of the same dataset, refer to pytorch
implementation of similar task in the same repository here:
https://github.com/VladimerKhasia/ML-in-Notebooks/blob/main/Pytorch/xray_img_captioning.ipynb


In [None]:
import sys
import os
import random
import cv2
from PIL import Image
import glob
from sklearn.metrics import roc_auc_score, roc_curve

import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

import tensorflow 
from tensorflow.compat.v1.logging import INFO, set_verbosity
tensorflow.compat.v1.logging.set_verbosity(tensorflow.compat.v1.logging.ERROR)
tensorflow.compat.v1.disable_eager_execution()
from tensorflow import keras

from keras.preprocessing.image import ImageDataGenerator
from keras.applications.densenet import DenseNet121
from keras.layers import Dense, GlobalAveragePooling2D
from keras.models import Model
from keras import backend
from keras import backend as K

from keras.models import load_model

tensorflow.__version__, keras.__version__

('2.9.2', '2.9.0')

In [None]:
#@title helpers

random.seed(a=None, version=2)

set_verbosity(INFO)


# def get_mean_std_per_batch(image_dir, df, H=320, W=320):
#     sample_data = []
#     for img in df.sample(100)["Image"].values:
#         image_path = os.path.join(image_dir, img)
#         sample_data.append(
#             np.array(tensorflow.keras.utils.load_img(image_path, target_size=(H, W))))

#     mean = np.mean(sample_data, axis=(0, 1, 2, 3))
#     std = np.std(sample_data, axis=(0, 1, 2, 3), ddof=1)
#     return mean, std

def get_mean_std_per_batch(image_dir, df, H=320, W=320, imagetype='png'):
    sample_data = []
    size = H, W

    for infile in glob.glob(f"{image_dir}*.{imagetype}"):
        file, ext = os.path.splitext(infile)
        with Image.open(infile) as im:
            im.thumbnail(size)
            #im.save(file + ".thumbnail", f"{imagetype}")
            input_arr = tensorflow.keras.utils.img_to_array(im)
            input_arr = np.array([input_arr])
            sample_data.append(input_arr)
    mean = np.mean(sample_data, axis=(0, 1, 2, 3))
    std = np.std(sample_data, axis=(0, 1, 2, 3), ddof=1)
    return mean, std


def load_image(img, image_dir, df, preprocess=True, H=320, W=320, metrics=None):
    """Load and preprocess image."""
    if metrics is None: 
      mean, std = get_mean_std_per_batch(image_dir, df, H=H, W=W)
    else: 
      mean, std = metrics
     
    img_path = os.path.join(image_dir, img)
    x = tensorflow.keras.utils.load_img(img_path, target_size=(H, W))
    if preprocess:
        x -= mean
        x /= std
        x = np.expand_dims(x, axis=0)
    return x


def grad_cam(input_model, image, cls, layer_name, H=320, W=320):
    """GradCAM method for visualizing input saliency."""
    y_c = input_model.output[0, cls]
    conv_output = input_model.get_layer(layer_name).output
    grads = K.gradients(y_c, conv_output)[0]

    gradient_function = K.function([input_model.input], [conv_output, grads])

    output, grads_val = gradient_function([image])
    output, grads_val = output[0, :], grads_val[0, :, :, :]

    weights = np.mean(grads_val, axis=(0, 1))
    cam = np.dot(output, weights)

    # Process CAM
    cam = cv2.resize(cam, (W, H), cv2.INTER_LINEAR)
    cam = np.maximum(cam, 0)
    cam = cam / cam.max()
    return cam
    

def compute_gradcam(model, img, image_dir, df, labels, selected_labels,
                    layer_name='bn', metrics=None):
    preprocessed_input = load_image(img, image_dir, df, metrics=metrics)
    predictions = model.predict(preprocessed_input)

    print("Loading original image")
    plt.figure(figsize=(15, 10))
    plt.subplot(151)
    plt.title("Original")
    plt.axis('off')
    plt.imshow(load_image(img, image_dir, df, preprocess=False, metrics=metrics), cmap='gray')

    j = 1
    for i in range(len(labels)):
        if labels[i] in selected_labels:
            print(f"Generating gradcam for class {labels[i]}")
            gradcam = grad_cam(model, preprocessed_input, i, layer_name)
            plt.subplot(151 + j)
            plt.title(f"{labels[i]}: p={predictions[0][i]:.3f}")
            plt.axis('off')
            plt.imshow(load_image(img, image_dir, df, preprocess=False, metrics=metrics),
                       cmap='gray')
            plt.imshow(gradcam, cmap='jet', alpha=min(0.5, predictions[0][i]))
            j += 1


def get_roc_curve(labels, predicted_vals, generator):
    auc_roc_vals = []
    for i in range(len(labels)):
        try:
            gt = generator.labels[:, i]
            pred = predicted_vals[:, i]
            auc_roc = roc_auc_score(gt, pred)
            auc_roc_vals.append(auc_roc)
            fpr_rf, tpr_rf, _ = roc_curve(gt, pred)
            plt.figure(1, figsize=(10, 10))
            plt.plot([0, 1], [0, 1], 'k--')
            plt.plot(fpr_rf, tpr_rf,
                     label=labels[i] + " (" + str(round(auc_roc, 3)) + ")")
            plt.xlabel('False positive rate')
            plt.ylabel('True positive rate')
            plt.title('ROC curve')
            plt.legend(loc='best')
        except:
            print(
                f"Error in generating ROC curve for {labels[i]}. "
                f"Dataset lacks enough examples."
            )
    plt.show()
    return auc_roc_vals


# LOAD MODEL
def load_model(path):
    labels = ['Cardiomegaly', 'Emphysema', 'Effusion', 'Hernia', 'Infiltration', 'Mass', 'Nodule', 'Atelectasis',
              'Pneumothorax', 'Pleural_Thickening', 'Pneumonia', 'Fibrosis', 'Edema', 'Consolidation']

    train_df = pd.read_csv(path + "./train-small.csv")
    valid_df = pd.read_csv(path + "./valid-small.csv")
    test_df = pd.read_csv(path + "./test.csv")

    class_pos = train_df.loc[:, labels].sum(axis=0)
    class_neg = len(train_df) - class_pos
    class_total = class_pos + class_neg

    pos_weights = class_pos / class_total
    neg_weights = class_neg / class_total
    print("Got loss weights")
    # create the base pre-trained model
    ### DenseNet121(weights=path+'densenet.hdf5', include_top=False) --> if our own pretrained model, otherwise:
    base_model = DenseNet121(weights='imagenet', include_top=False) 
    print("Loaded DenseNet")
    # add a global spatial average pooling layer
    x = base_model.output
    x = GlobalAveragePooling2D()(x)
    # and a logistic layer
    predictions = Dense(len(labels), activation="sigmoid")(x)
    print("Added layers")

    model = Model(inputs=base_model.input, outputs=predictions)

    def get_weighted_loss(neg_weights, pos_weights, epsilon=1e-7):
        def weighted_loss(y_true, y_pred):
            # L(X, y) = −w * y log p(Y = 1|X) − w *  (1 − y) log p(Y = 0|X)
            # from https://arxiv.org/pdf/1711.05225.pdf
            loss = 0
            for i in range(len(neg_weights)):
                loss -= (neg_weights[i] * y_true[:, i] * K.log(y_pred[:, i] + epsilon) + 
                         pos_weights[i] * (1 - y_true[:, i]) * K.log(1 - y_pred[:, i] + epsilon))
            
            loss = K.sum(loss)
            return loss
        return weighted_loss
    
    model.compile(optimizer='adam', loss=get_weighted_loss(neg_weights, pos_weights))
    print("Compiled Model")

    # model.load_weights(path + "pretrained_model.h5")  ## this means that model trained on imagenet then was finetuned on 
    # print("Loaded Weights")                           ## chest x-ray images. So, You can replace this with 
                                                      ## finetuning training see:"X-ray image classification and interpretation"
    return model           

In [None]:
#@title tests


### test helper functions
def datatype_check(expected_output, target_output, error):
    success = 0
    if isinstance(target_output, dict):
        for key in target_output.keys():
            try:
                success += datatype_check(expected_output[key], 
                                         target_output[key], error)
            except:
                print("Error: {} in variable {}. Got {} but expected type {}".format(error,
                                                                          key, type(target_output[key]), type(expected_output[key])))
        if success == len(target_output.keys()):
            return 1
        else:
            return 0
    elif isinstance(target_output, tuple) or isinstance(target_output, list):
        for i in range(len(target_output)):
            try: 
                success += datatype_check(expected_output[i], 
                                         target_output[i], error)
            except:
                print("Error: {} in variable {}, Got {}  but expected type {}".format(error,
                                                                          i, type(target_output[i]), type(expected_output[i])))
        if success == len(target_output):
            return 1
        else:
            return 0
                
    else:
        assert isinstance(target_output, type(expected_output))
        return 1
            
def equation_output_check(expected_output, target_output, error):
    success = 0
    if isinstance(target_output, dict):
        for key in target_output.keys():
            try:
                success += equation_output_check(expected_output[key], 
                                         target_output[key], error)
            except:
                print(expected_output[key], 
                                         target_output[key])
                print("Error: {} for variable {}.".format(error,
                                                                          key))
        if success == len(target_output.keys()):
            return 1
        else:
            return 0
    elif isinstance(target_output, tuple) or isinstance(target_output, list):
        for i in range(len(target_output)):
            try: 
                success += equation_output_check(expected_output[i], 
                                         target_output[i], error)
            except:
                print("Error: {} for variable in position {}.".format(error, i))
        if success == len(target_output):
            return 1
        else:
            return 0
                
    else:
        if hasattr(target_output, 'shape'):
            np.testing.assert_array_almost_equal(target_output, expected_output)
        else:
            assert target_output == expected_output
        return 1
    
def shape_check(expected_output, target_output, error):
    success = 0
    if isinstance(target_output, dict):
        for key in target_output.keys():
            try:
                success += shape_check(expected_output[key], 
                                         target_output[key], error)
            except:
                print("Error: {} for variable {}.".format(error, key))
        if success == len(target_output.keys()):
            return 1
        else:
            return 0
    elif isinstance(target_output, tuple) or isinstance(target_output, list):
        for i in range(len(target_output)):
            try: 
                success += shape_check(expected_output[i], 
                                         target_output[i], error)
            except:
                print("Error: {} for variable {}.".format(error, i))
        if success == len(target_output):
            return 1
        else:
            return 0
                
    else:
        if hasattr(target_output, 'shape'):
            assert target_output.shape == expected_output.shape
        return 1
                
def multiple_test(test_cases, target):
    success = 0
    for test_case in test_cases:
        try:
            target_answer = target(*test_case['input'])                   
            if test_case['name'] == "datatype_check":
                success += datatype_check(test_case['expected'], target_answer, test_case['error'])
            if test_case['name'] == "equation_output_check":
                success += equation_output_check(test_case['expected'], target_answer, test_case['error'])
            if test_case['name'] == "shape_check":
                success += shape_check(test_case['expected'], target_answer, test_case['error'])
        except:
            print("Error: " + test_case['error'])
            
    if success == len(test_cases):
        print("\033[92m All tests passed.")
    else:
        print('\033[92m', success," Tests passed")
        print('\033[91m', len(test_cases) - success, " Tests failed")
        raise AssertionError("Not all tests were passed for {}. Check your equations and avoid using global variables inside the function.".format(target.__name__))
        
def multiple_test_weight_loss(test_cases, target, sess):
    success = 0
    for test_case in test_cases:
        try:
            target_answer = target(*test_case['input']).eval(session=sess)                   
            if test_case['name'] == "datatype_check":
                success += datatype_check(test_case['expected'], target_answer, test_case['error'])
            if test_case['name'] == "equation_output_check":
                success += equation_output_check(test_case['expected'], target_answer, test_case['error'])
            if test_case['name'] == "shape_check":
                success += shape_check(test_case['expected'], target_answer, test_case['error'])
        except:
            print("Error: " + test_case['error'])
            
    if success == len(test_cases):
        print("\033[92m All tests passed.")
    else:
        print('\033[92m', success," Tests passed")
        print('\033[91m', len(test_cases) - success, " Tests failed")
        raise AssertionError("Not all tests were passed for {}. Check your equations and avoid using global variables inside the function.".format(target.__name__))


### ex1
def check_for_leakage_test(target):
    df1 = pd.DataFrame({'patient_id': [0, 1, 2]})
    df2 = pd.DataFrame({'patient_id': [2, 3, 4]})
    expected_output_1 = True
    
    print("Test Case 1\n")
    print("df1")
    print(df1)
    print("df2")
    print(df2)
    print("leakage output:", target(df1, df2, 'patient_id'), "\n-------------------------------------")
    
    df3 = pd.DataFrame({'patient_id': [0, 1, 2]})
    df4 = pd.DataFrame({'patient_id': [3, 4, 5]})
    expected_output_2 = False
    
    print("Test Case 2\n")
    print("df1") ### same heading for df3
    print(df3)
    print("df2") ### same heading for df4
    print(df4)
    print("leakage output:", target(df3, df4, 'patient_id'), "\n")
    
    test_cases = [
        {
            "name":"datatype_check",
            "input": [df1, df2, 'patient_id'],
            "expected": expected_output_1,
            "error":"Data-type mismatch, make sure you are using pandas functions"
        },
        {
            "name":"datatype_check",
            "input": [df3, df4, 'patient_id'],
            "expected": expected_output_2,
            "error":"Datatype mismatch, make sure you are using pandas functions"
        },
        {
            "name": "shape_check",
            "input": [df1, df2, 'patient_id'],
            "expected": expected_output_1,
            "error": "Wrong shape, make sure you are using pandas functions"
        },
        {
            "name": "shape_check",
            "input": [df3, df4, 'patient_id'],
            "expected": expected_output_2,
            "error": "Wrong shape, make sure you are using pandas functions"
        },
        {
            "name": "equation_output_check",
            "input": [df1, df2, 'patient_id'],
            "expected": expected_output_1,
            "error": "Wrong output, make sure you are using pandas functions"
        },
        {
            "name": "equation_output_check",
            "input": [df3, df4, 'patient_id'],
            "expected": expected_output_2,
            "error": "Wrong output, make sure you are using pandas functions"
        }
    ]

    multiple_test(test_cases, target)
    
### ex2
def compute_class_freqs_test(target):
    labels_matrix = np.array(
        [[1, 0, 0],
         [0, 1, 1],
         [1, 0, 1],
         [1, 1, 1],
         [1, 0, 1]]
    )

    print("Labels:")
    print(labels_matrix)
    pos_freqs, neg_freqs = target(labels_matrix)
    print("\nPos Freqs: ", pos_freqs)
    print("Neg Freqs: ", neg_freqs, "\n")
    
    expected_freqs = (np.array([0.8, 0.4, 0.8]), np.array([0.2, 0.6, 0.2]))
    
    test_cases = [
        {
            "name":"datatype_check",
            "input": [labels_matrix],
            "expected": expected_freqs,
            "error": "Data-type mismatch."
        },
        {
            "name": "shape_check",
            "input": [labels_matrix],
            "expected": expected_freqs,
            "error": "Wrong shape."
        },
        {
            "name": "equation_output_check",
            "input": [labels_matrix],
            "expected": expected_freqs,
            "error": "Wrong output."
        }
    ]
    
    multiple_test(test_cases, target)
    
### ex3
def get_weighted_loss_test_case(sess):
    with sess.as_default() as sess:
        y_true = K.constant(np.array(
            [[1, 1, 1],
             [1, 1, 0],
             [0, 1, 0],
             [1, 0, 1]]
        ))
        
        w_p = np.array([0.25, 0.25, 0.5])
        w_n = np.array([0.75, 0.75, 0.5])
        
        y_pred_1 = K.constant(0.7*np.ones(y_true.shape))
        y_pred_2 = K.constant(0.3*np.ones(y_true.shape))
    
    return y_true.eval(session=sess), w_p, w_n, y_pred_1.eval(session=sess), y_pred_2.eval(session=sess)
def get_weighted_loss_test(target, epsilon, sess):
    y_true, w_p, w_n, y_pred_1, y_pred_2 = get_weighted_loss_test_case(sess)
    
    print("y_true:")
    print(y_true)
    print("\nw_p:")
    print(w_p)
    print("\nw_n:")
    print(w_n)
    print("\ny_pred_1:")
    print(y_pred_1)
    print("\ny_pred_2:")
    print(y_pred_2)
    
    L = target(w_p, w_n, epsilon)
    L1 = L(y_true, y_pred_1).eval(session=sess)
    L2 = L(y_true, y_pred_2).eval(session=sess)
    
    print("\nIf you weighted them correctly, you'd expect the two losses to be the same.")
    print("With epsilon = 1, your losses should be, L(y_pred_1) = -0.4956203 and L(y_pred_2) = -0.4956203\n")
    print("Your outputs:\n")
    print("L(y_pred_1) = ", L1)
    print("L(y_pred_2) = ", L2)
    print("Difference: L(y_pred_1) - L(y_pred_2) = ", L1-L2, "\n")
    
    expected_output_1 = np.float32(-0.4956203)
    expected_output_2 = np.float32(-0.4956203)
    
    test_cases = [
        {
            "name":"datatype_check",
            "input": [y_true, y_pred_1],
            "expected": expected_output_1,
            "error": "Data-type mismatch. Make sure it is a np.float32 value."
        },
        {
            "name":"datatype_check",
            "input": [y_true, y_pred_2],
            "expected": expected_output_2,
            "error": "Data-type mismatch. Make sure it is a np.float32 value."
        },
        {
            "name": "shape_check",
            "input": [y_true, y_pred_1],
            "expected": expected_output_1,
            "error": "Wrong shape."
        },
        {
            "name": "shape_check",
            "input": [y_true, y_pred_2],
            "expected": expected_output_2,
            "error": "Wrong shape."
        },
        {
            "name": "equation_output_check",
            "input": [y_true, y_pred_1],
            "expected": expected_output_1,
            "error": "Wrong output. One possible mistake, your epsilon is not equal to 1."
        },
        {
            "name": "equation_output_check",
            "input": [y_true, y_pred_2],
            "expected": expected_output_2,
            "error": "Wrong output. One possible mistake, your epsilon is not equal to 1."
        }
    ]
    
    multiple_test_weight_loss(test_cases, L, sess)    

In [None]:
train_df = pd.read_csv("./train-small.csv")
valid_df = pd.read_csv("./valid-small.csv")

test_df = pd.read_csv("./test.csv")

train_df.head()

Unnamed: 0,Image,Atelectasis,Cardiomegaly,Consolidation,Edema,Effusion,Emphysema,Fibrosis,Hernia,Infiltration,Mass,Nodule,PatientId,Pleural_Thickening,Pneumonia,Pneumothorax
0,00008270_015.png,0,0,0,0,0,0,0,0,0,0,0,8270,0,0,0
1,00029855_001.png,1,0,0,0,1,0,0,0,1,0,0,29855,0,0,0
2,00001297_000.png,0,0,0,0,0,0,0,0,0,0,0,1297,1,0,0
3,00012359_002.png,0,0,0,0,0,0,0,0,0,0,0,12359,0,0,0
4,00017951_001.png,0,0,0,0,0,0,0,0,1,0,0,17951,0,0,0


In [None]:
labels = ['Cardiomegaly', 
          'Emphysema', 
          'Effusion', 
          'Hernia', 
          'Infiltration', 
          'Mass', 
          'Nodule', 
          'Atelectasis',
          'Pneumothorax',
          'Pleural_Thickening', 
          'Pneumonia', 
          'Fibrosis', 
          'Edema', 
          'Consolidation']

In [None]:
def check_for_leakage(df1, df2, patient_col):
    """
    Return True if there any patients are in both df1 and df2.

    Args:
        df1 (dataframe): dataframe describing first dataset
        df2 (dataframe): dataframe describing second dataset
        patient_col (str): string name of column with patient IDs
    
    Returns:
        leakage (bool): True if there is leakage, otherwise False
    """

    # leakage contains true if there is patient overlap, otherwise false.
    # boolean (true if there is at least 1 patient in both groups)
    df1_patients_unique = set(df1[patient_col].values)
    df2_patients_unique = set(df2[patient_col].values)
    patients_in_both_groups = list(df1_patients_unique.intersection(df2_patients_unique))
    leakage = False if len(patients_in_both_groups) == 0 else True  
    
    return leakage

In [None]:
check_for_leakage_test(check_for_leakage)

In [None]:
print("leakage between train and valid: {}".format(check_for_leakage(train_df, valid_df, 'PatientId')))
print("leakage between train and test: {}".format(check_for_leakage(train_df, test_df, 'PatientId')))
print("leakage between valid and test: {}".format(check_for_leakage(valid_df, test_df, 'PatientId')))

In [None]:
def get_train_generator(df, image_dir, x_col, y_cols, shuffle=True, batch_size=8, seed=1, target_w = 320, target_h = 320):
    """
    Return generator for training set, normalizing using batch
    statistics.

    Args:
      train_df (dataframe): dataframe specifying training data.
      image_dir (str): directory where image files are held.
      x_col (str): name of column in df that holds filenames.
      y_cols (list): list of strings that hold y labels for images.
      batch_size (int): images per batch to be fed into model during training.
      seed (int): random seed.
      target_w (int): final width of input images.
      target_h (int): final height of input images.
    
    Returns:
        train_generator (DataFrameIterator): iterator over training set
    """        
    print("getting train generator...") 
    # normalize images
    image_generator = ImageDataGenerator(
        samplewise_center=True,
        samplewise_std_normalization= True)
    
    # flow from directory with specified batch size
    # and target image size
    generator = image_generator.flow_from_dataframe(
            dataframe=df,
            directory=image_dir,
            x_col=x_col,
            y_col=y_cols,
            class_mode="raw",
            batch_size=batch_size,
            shuffle=shuffle,
            seed=seed,
            target_size=(target_w,target_h))
    
    return generator

In [None]:
def get_test_and_valid_generator(valid_df, test_df, train_df, image_dir, x_col, y_cols, sample_size=100, batch_size=8, seed=1, target_w = 320, target_h = 320):
    """
    Return generator for validation set and test set using 
    normalization statistics from training set.

    Args:
      valid_df (dataframe): dataframe specifying validation data.
      test_df (dataframe): dataframe specifying test data.
      train_df (dataframe): dataframe specifying training data.
      image_dir (str): directory where image files are held.
      x_col (str): name of column in df that holds filenames.
      y_cols (list): list of strings that hold y labels for images.
      sample_size (int): size of sample to use for normalization statistics.
      batch_size (int): images per batch to be fed into model during training.
      seed (int): random seed.
      target_w (int): final width of input images.
      target_h (int): final height of input images.
    
    Returns:
        test_generator (DataFrameIterator) and valid_generator: iterators over test set and validation set respectively
    """
    print("getting train and valid generators...")
    # get generator to sample dataset
    raw_train_generator = ImageDataGenerator().flow_from_dataframe(
        dataframe=train_df, 
        directory=IMAGE_DIR, 
        x_col="Image", 
        y_col=labels, 
        class_mode="raw", 
        batch_size=sample_size, 
        shuffle=True, 
        target_size=(target_w, target_h))
    
    # get data sample
    batch = raw_train_generator.next()
    data_sample = batch[0]

    # use sample to fit mean and std for test set generator
    image_generator = ImageDataGenerator(
        featurewise_center=True,
        featurewise_std_normalization= True)
    
    # fit generator to sample from training data
    image_generator.fit(data_sample)

    # get test generator
    valid_generator = image_generator.flow_from_dataframe(
            dataframe=valid_df,
            directory=image_dir,
            x_col=x_col,
            y_col=y_cols,
            class_mode="raw",
            batch_size=batch_size,
            shuffle=False,
            seed=seed,
            target_size=(target_w,target_h))

    test_generator = image_generator.flow_from_dataframe(
            dataframe=test_df,
            directory=image_dir,
            x_col=x_col,
            y_col=y_cols,
            class_mode="raw",
            batch_size=batch_size,
            shuffle=False,
            seed=seed,
            target_size=(target_w,target_h))
    return valid_generator, test_generator

In [None]:
IMAGE_DIR = "./images/"
train_generator = get_train_generator(train_df, IMAGE_DIR, "Image", labels)
valid_generator, test_generator= get_test_and_valid_generator(valid_df, test_df, train_df, IMAGE_DIR, "Image", labels)

In [None]:
x, y = train_generator.__getitem__(0)
plt.imshow(x[0]);

In [None]:
plt.xticks(rotation=90)
plt.bar(x=labels, height=np.mean(train_generator.labels, axis=0))
plt.title("Frequency of Each Class")
plt.show()

In [None]:
def compute_class_freqs(labels):
    """
    Compute positive and negative frequences for each class.

    Args:
        labels (np.array): matrix of labels, size (num_examples, num_classes)
    Returns:
        positive_frequencies (np.array): array of positive frequences for each
                                         class, size (num_classes)
        negative_frequencies (np.array): array of negative frequences for each
                                         class, size (num_classes)
    """
    N = labels.shape[0]
    positive_frequencies = np.sum(labels,axis=0) / N
    negative_frequencies = 1 - np.sum(labels,axis=0) / N
    
    return positive_frequencies, negative_frequencies

In [None]:
### do not edit this code cell       
compute_class_freqs_test(compute_class_freqs)

In [None]:
freq_pos, freq_neg = compute_class_freqs(train_generator.labels)
freq_pos

In [None]:
data = pd.DataFrame({"Class": labels, "Label": "Positive", "Value": freq_pos})
data = data.append([{"Class": labels[l], "Label": "Negative", "Value": v} for l,v in enumerate(freq_neg)], ignore_index=True)
plt.xticks(rotation=90)
f = sns.barplot(x="Class", y="Value", hue="Label" ,data=data)

In [None]:
pos_weights = freq_neg
neg_weights = freq_pos
pos_contribution = freq_pos * pos_weights 
neg_contribution = freq_neg * neg_weights

In [None]:
data = pd.DataFrame({"Class": labels, "Label": "Positive", "Value": pos_contribution})
data = data.append([{"Class": labels[l], "Label": "Negative", "Value": v} 
                        for l,v in enumerate(neg_contribution)], ignore_index=True)
plt.xticks(rotation=90)
sns.barplot(x="Class", y="Value", hue="Label" ,data=data);

In [None]:
def get_weighted_loss(pos_weights, neg_weights, epsilon=1e-7):
    """
    Return weighted loss function given negative weights and positive weights.

    Args:
      pos_weights (np.array): array of positive weights for each class, size (num_classes)
      neg_weights (np.array): array of negative weights for each class, size (num_classes)
    
    Returns:
      weighted_loss (function): weighted loss function
    """
    def weighted_loss(y_true, y_pred):
        """
        Return weighted loss value. 

        Args:
            y_true (Tensor): Tensor of true labels, size is (num_examples, num_classes)
            y_pred (Tensor): Tensor of predicted labels, size is (num_examples, num_classes)
        Returns:
            loss (float): overall scalar loss summed across all classes
        """
        # initialize loss to zero
        loss = 0.0
        for i in range(len(pos_weights)):
            # # for each class, add average weighted loss for that class 
            # typ = pos_weights[i].dtype
            # true = tensorflow.cast(y_true[:,i], typ)
            # pred = tensorflow.cast(y_pred[:,i], typ) 
            # loss+= -(K.mean((pos_weights[i] * true * K.log(pred + epsilon)) + (neg_weights[i] * (1-true) * K.log(1-pred + epsilon)),axis = 0))
        
            loss += K.mean(-(pos_weights[i] * y_true[:,i] * K.log(y_pred[:,i] + epsilon) 
                             + neg_weights[i]* (1 - y_true[:,i]) * K.log( 1 - y_pred[:,i] + epsilon)))        
        return loss 

    return weighted_loss

In [None]:
# test with a large epsilon in order to catch errors. 
# set epsilon = 1
epsilon = 1

### do not edit anything below
sess = K.get_session()
get_weighted_loss_test(get_weighted_loss, epsilon, sess)

In [None]:
# create the base pre-trained model
## in case we already have our own pretrained weights we use it like this:
## base_model = DenseNet121(weights='./models/densenet.hdf5', include_top=False)
## in case we use some specific pretrained weights e.g. from imagenet:
base_model = DenseNet121(weights='imagenet', include_top=False)

x = base_model.output

# add a global spatial average pooling layer
x = GlobalAveragePooling2D()(x)

# and a logistic layer
predictions = Dense(len(labels), activation="sigmoid")(x)

model = Model(inputs=base_model.input, outputs=predictions)
model.compile(optimizer='adam', loss=get_weighted_loss(pos_weights, neg_weights))

In [None]:
## if we would already have finetuned model on our medical dataset we would load the model like this:
## model.load_weights("./models/pretrained_model.h5") 

history = model.fit(train_generator, 
                    validation_data=valid_generator,
                    steps_per_epoch=100, 
                    validation_steps=25, 
                    epochs = 3
                    )

plt.plot(history.history['loss'])
plt.ylabel("loss")
plt.xlabel("epoch")
plt.title("Training Loss Curve")
plt.show()

In [None]:
predicted_vals = model.predict(test_generator, steps = len(test_generator))

In [None]:
auc_rocs = get_roc_curve(labels, predicted_vals, test_generator)

In [None]:
df = pd.read_csv("./train-small.csv")
IMAGE_DIR = "./images/"

# calculate general reusable metrics for mean and std
metrics = get_mean_std_per_batch(IMAGE_DIR, df)
mean, std = metrics

# only show the labels with top 4 AUC
labels_to_show = np.take(labels, np.argsort(auc_rocs)[::-1])[:4]
labels_to_show, mean[0], std[0]

In [None]:
compute_gradcam(model, '00008270_015.png', IMAGE_DIR, df, labels, labels_to_show, metrics=metrics)

In [None]:
### again select the image from training set but randomly
random_img = df.sample(1)["Image"].values[0]
compute_gradcam(model, random_img, IMAGE_DIR, df, labels, labels_to_show, metrics=metrics)

# GradCam interpretation in more detail

In [None]:
def grad_cam(input_model, image, category_index, layer_name):
    """
    GradCAM method for visualizing input saliency.
    
    Args:
        input_model (Keras.model): model to compute cam for
        image (tensor): input to model, shape (1, H, W, 3), where H (int) is height W (int) is width
        category_index (int): class to compute cam with respect to
        layer_name (str): relevant layer in model
    Return:
        cam ()
    """
    cam = None
    
    # 1. Get placeholders for class output and last layer
    # Get the model's output
    output_with_batch_dim = input_model.output
    
    # Remove the batch dimension
    output_all_categories = output_with_batch_dim[0]
    
    # Retrieve only the disease category at the given category index
    y_c = output_all_categories[category_index]
    
    # Get the input model's layer specified by layer_name, and retrive the layer's output tensor
    spatial_map_layer = input_model.get_layer(layer_name).output

    # 2. Get gradients of last layer with respect to output

    # get the gradients of y_c with respect to the spatial map layer (it's a list of length 1)
    grads_l = K.gradients(y_c, spatial_map_layer)
    
    # Get the gradient at index 0 of the list
    grads = grads_l[0]
        
    # 3. Get hook for the selected layer and its gradient, based on given model's input
    # Hint: Use the variables produced by the previous two lines of code
    spatial_map_and_gradient_function = K.function([input_model.input], [spatial_map_layer, grads])
    
    # Put in the image to calculate the values of the spatial_maps (selected layer) and values of the gradients
    spatial_map_all_dims, grads_val_all_dims = spatial_map_and_gradient_function([image])

    # Reshape activations and gradient to remove the batch dimension
    # Shape goes from (B, H, W, C) to (H, W, C)
    # B: Batch. H: Height. W: Width. C: Channel    
    # Reshape spatial map output to remove the batch dimension
    spatial_map_val = spatial_map_all_dims[0]
    
    # Reshape gradients to remove the batch dimension
    grads_val = grads_val_all_dims[0]
    
    # 4. Compute weights using global average pooling on gradient 
    # grads_val has shape (Height, Width, Channels) (H,W,C)
    # Take the mean across the height and also width, for each channel
    # Make sure weights have shape (C)
    weights = np.mean(grads_val,axis=(0,1))
    
    # 5. Compute dot product of spatial map values with the weights
    cam = np.dot(spatial_map_val,weights)
    
    # We'll take care of the postprocessing.
    H, W = image.shape[1], image.shape[2]
    cam = np.maximum(cam, 0) # ReLU so we only get positive importance
    cam = cv2.resize(cam, (W, H), cv2.INTER_NEAREST)
    cam = cam / cam.max()

    return cam


def compute_gradcam(model, img, mean, std, data_dir, df, 
                    labels, selected_labels, layer_name='conv5_block16_concat'):
    """
    Compute GradCAM for many specified labels for an image. 
    This method will use the `grad_cam` function.
    
    Args:
        model (Keras.model): Model to compute GradCAM for
        img (string): Image name we want to compute GradCAM for.
        mean (float): Mean to normalize to image.
        std (float): Standard deviation to normalize the image.
        data_dir (str): Path of the directory to load the images from.
        df(pd.Dataframe): Dataframe with the image features.
        labels ([str]): All output labels for the model.
        selected_labels ([str]): All output labels we want to compute the GradCAM for.
        layer_name: Intermediate layer from the model we want to compute the GradCAM for.
    """
    img_path = data_dir + img
    preprocessed_input = load_image_normalize(img_path, mean, std)
    predictions = model.predict(preprocessed_input)
    print("Ground Truth: ", ", ".join(np.take(labels, np.nonzero(df[df["Image"] == img][labels].values[0]))[0]))

    plt.figure(figsize=(15, 10))
    plt.subplot(151)
    plt.title("Original")
    plt.axis('off')
    plt.imshow(load_image(img_path, df, preprocess=False), cmap='gray')
    
    j = 1
    
    # Loop through all labels
    for i in range(len(labels)): # complete this line
        # Compute CAM and show plots for each selected label.
        
        # Check if the label is one of the selected labels
        if labels[i] in selected_labels: # complete this line
            
            # Use the grad_cam function to calculate gradcam
            gradcam = grad_cam(model, preprocessed_input, i, layer_name)
            
            print("Generating gradcam for class %s (p=%2.2f)" % (labels[i], round(predictions[0][i], 3)))
            plt.subplot(151 + j)
            plt.title(labels[i] + ": " + str(round(predictions[0][i], 3)))
            plt.axis('off')
            plt.imshow(load_image(img_path, df, preprocess=False), cmap='gray')
            plt.imshow(gradcam, cmap='magma', alpha=min(0.5, predictions[0][i]))
            j +=1


