# Learning process


In [None]:
import pandas as pd
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt

from keras import optimizers, applications, callbacks
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from sklearn.metrics import classification_report, multilabel_confusion_matrix, roc_auc_score, roc_curve


In [None]:
column_types = {
    'Path': str,
    'No Finding': np.float32,
    'Lung Opacity': np.float32,
    'Lung Lesion': np.float32,
    'Edema': np.float32,
    'Consolidation': np.float32,
    'Pneumonia': np.float32,
    'Atelectasis': np.float32,
    'Pneumothorax': np.float32,
    'Pleural Effusion': np.float32,
    'Pleural Other': np.float32,
    'Fracture': np.float32
}

train_df = pd.read_csv(
    filepath_or_buffer='../dataset/train.csv', dtype=column_types)

val_df = pd.read_csv(
    filepath_or_buffer='../dataset/validation.csv', dtype=column_types)

test_df = pd.read_csv(
    filepath_or_buffer='../dataset/test.csv', dtype=column_types)


In [None]:
train_df.columns = column_types.keys()
val_df.columns = column_types.keys()
test_df.columns = column_types.keys()


In [None]:
list_columns = list(train_df.columns)
y_cols = list_columns[1::]

train_datagen = ImageDataGenerator().flow_from_dataframe(
    dataframe=train_df,
    directory='..',
    x_col='Path',
    y_col=y_cols,
    validate_filenames=False,
    target_size=(224, 224),
    color_mode='rgb',
    class_mode='raw',
    batch_size=20,
)

val_datagen = ImageDataGenerator().flow_from_dataframe(
    dataframe=val_df,
    directory='..',
    x_col='Path',
    y_col=y_cols,
    validate_filenames=False,
    target_size=(224, 224),
    color_mode='rgb',
    class_mode='raw',
    batch_size=20,
)

test_datagen = ImageDataGenerator().flow_from_dataframe(
    dataframe=test_df,
    directory='..',
    x_col='Path',
    y_col=y_cols,
    validate_filenames=False,
    target_size=(224, 224),
    color_mode='rgb',
    class_mode='raw',
    batch_size=20,
)


In [None]:
input = (224, 224, 3)
models = {
    'VGG16': applications.VGG16(input_shape=input, classes=11, weights=None),
    'ResNet50': applications.ResNet50(input_shape=input, classes=11, weights=None),
    'DenseNet121': applications.DenseNet121(input_shape=input, classes=11, weights=None)
}
lr = 1e-5
opt = optimizers.Adam(learning_rate=lr)
epochs = 15
loss = 'binary_crossentropy'
metrics = ['accuracy', 'binary_accuracy']


In [None]:
for model in models.values():
    model.compile(loss=loss, optimizer=opt, metrics=metrics)


In [None]:
checkpoint = callbacks.ModelCheckpoint(
    filepath='VGG16_model.{epoch:02d}-{val_loss:.2f}.hdf5',
    monitor='val_accuracy',
    verbose=0,
    save_best_only=True,
    save_weights_only=False,
    mode='max'
)


In [None]:
with tf.device('/GPU:0'):
    history_res = models['VGG16'].fit(
        train_datagen,
        validation_data=val_datagen,
        epochs=epochs,
        callbacks=[checkpoint]
    )


# ROC curve


In [None]:
predictions = models['VGG16'].predict(
    x=test_datagen,
    steps=len(test_datagen),
    workers=1,
    verbose=1
)


In [None]:
predictions = (predictions >= 0.5).astype(np.uint8)


In [None]:
report = classification_report(
    y_true=test_datagen.labels,
    y_pred=predictions,
    digits=4,
    target_names=y_cols
)


In [None]:
confusion_matrix = multilabel_confusion_matrix(
    y_true=test_datagen.labels,
    y_pred=predictions
)


In [None]:
auc_roc_values = []

for i in range(len(y_cols)):
    y_true = test_datagen.y_cols[:, i]
    y_pred = predictions[:, i]

    auc_roc = roc_auc_score(y_true=y_true, y_score=y_pred)
    auc_roc_values.append(auc_roc)

    false_positive_rates, true_positive_rates, _ = roc_curve(
        y_true=y_true, y_score=y_pred)

    plt.figure(1, figsize=(13, 13))
    plt.plot([0, 1], [0, 1], 'k--')
    plt.plot(
        false_positive_rates,
        true_positive_rates,
        label=y_cols[i] + " (" + str(round(auc_roc, 4)) + ")"
    )
    plt.xlabel('False positive rate')
    plt.ylabel('True positive rate')
    plt.title('VGG-16')
    plt.legend(loc='best')

plt.show()
