In [8]:
# Chest X-ray Image Classification

In [22]:
#Load Library
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications import Xception
from tensorflow.keras.layers import AveragePooling2D
from tensorflow.keras.layers import Dropout
from tensorflow.keras.layers import Flatten
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import Input
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.optimizers import RMSprop
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd 
from sklearn.utils import shuffle




In [11]:
metadata = pd.read_csv('Chest_xray_Corona_Metadata.csv')
summary = pd.read_csv('Chest_xray_Corona_dataset_Summary.csv')

metadata.sample(5)

Unnamed: 0.1,Unnamed: 0,X_ray_image_name,Label,Dataset_type,Label_2_Virus_category,Label_1_Virus_category
1969,1969,person1318_virus_2274.jpeg,Pnemonia,TRAIN,,Virus
4878,4878,person7_bacteria_24.jpeg,Pnemonia,TRAIN,,bacteria
1533,1533,person1078_virus_1788.jpeg,Pnemonia,TRAIN,,Virus
5205,5205,person894_virus_1546.jpeg,Pnemonia,TRAIN,,Virus
494,494,IM-0656-0001.jpeg,Normal,TRAIN,,


In [20]:
train_data = metadata[metadata['Dataset_type'] == 'TRAIN']
test_data = metadata[metadata['Dataset_type'] == 'TEST']
assert train_data.shape[0] + test_data.shape[0] == metadata.shape[0]
print(train_data.shape)
print(test_data.shape)
print(metadata.shape)
test_data.sample(5)

(5286, 6)
(624, 6)
(5910, 6)


Unnamed: 0.1,Unnamed: 0,X_ray_image_name,Label,Dataset_type,Label_2_Virus_category,Label_1_Virus_category
5293,5316,IM-0011-0001-0002.jpeg,Normal,TEST,,
5331,5354,IM-0099-0001.jpeg,Normal,TEST,,
5470,5493,NORMAL2-IM-0120-0001.jpeg,Normal,TEST,,
5434,5457,NORMAL2-IM-0246-0001.jpeg,Normal,TEST,,
5775,5798,person78_bacteria_384.jpeg,Pnemonia,TEST,,bacteria


In [25]:
# Generate the final train data from original train data with conditions refered from EDA inference
final_train_data = train_data[(train_data['Label'] == 'Normal') | 
                              ((train_data['Label'] == 'Pnemonia') & (train_data['Label_2_Virus_category'] == 'COVID-19'))]


# Create a target attribute where value = positive if 'Pnemonia + COVID-19' or value = negative if 'Normal'
final_train_data['target'] = ['negative' if holder == 'Normal' else 'positive' for holder in final_train_data['Label']]

final_train_data = shuffle(final_train_data, random_state=1)

final_validation_data = final_train_data.iloc[1000:, :]
final_train_data = final_train_data.iloc[:1000, :]

print(final_train_data.shape)
final_train_data.sample(30)

(1000, 7)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  import sys


Unnamed: 0.1,Unnamed: 0,X_ray_image_name,Label,Dataset_type,Label_2_Virus_category,Label_1_Virus_category,target
5259,5273,5931B64A-7B97-485D-BE60-3F1EA76BC4F0.jpeg,Pnemonia,TRAIN,COVID-19,Virus,positive
1047,1047,NORMAL2-IM-0983-0001.jpeg,Normal,TRAIN,,,negative
490,490,IM-0660-0001-0001.jpeg,Normal,TRAIN,,,negative
578,578,IM-0703-0001.jpeg,Normal,TRAIN,,,negative
754,754,NORMAL2-IM-0587-0001.jpeg,Normal,TRAIN,,,negative
711,711,NORMAL2-IM-0491-0001.jpeg,Normal,TRAIN,,,negative
783,783,NORMAL2-IM-0629-0001.jpeg,Normal,TRAIN,,,negative
5272,5291,171CB377-62FF-4B76-906C-F3787A01CB2E.jpeg,Pnemonia,TRAIN,COVID-19,Virus,positive
94,94,IM-0262-0001.jpeg,Normal,TRAIN,,,negative
482,482,IM-0666-0001-0001.jpeg,Normal,TRAIN,,,negative


In [44]:
#Load file & Create Model
train_data_dir = 'Coronahack-Chest-XRay-Dataset/Coronahack-Chest-XRay-Dataset/train'
test_data_dir = 'Coronahack-Chest-XRay-Dataset/Coronahack-Chest-XRay-Dataset/test'

epochs = 5

model = tf.keras.models.Sequential([
# YOUR CODE HERE
    tf.keras.layers.Conv2D(32, (3,3), activation='relu',input_shape=(224, 224, 3)),
    tf.keras.layers.MaxPool2D(2,2),
    
    tf.keras.layers.Conv2D(64, (3,3), activation='relu'),
    tf.keras.layers.MaxPooling2D(2,2),
    
    tf.keras.layers.Conv2D(128, (3,3), activation='relu'),
    tf.keras.layers.MaxPooling2D(2,2),
    
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(512, activation='relu'),
    tf.keras.layers.Dense(1, activation='sigmoid')
])

model.compile(loss='binary_crossentropy',
              optimizer='rmsprop',
              metrics=['accuracy','TruePositives','TrueNegatives','FalsePositives','FalseNegatives'])

#os.listdir(validation_data_dir)
print(train_data_dir)
print(validation_data_dir)
model.summary()

Coronahack-Chest-XRay-Dataset/Coronahack-Chest-XRay-Dataset/train
Coronahack-Chest-XRay-Dataset/Coronahack-Chest-XRay-Dataset/test
Model: "sequential_7"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_21 (Conv2D)           (None, 222, 222, 32)      896       
_________________________________________________________________
max_pooling2d_21 (MaxPooling (None, 111, 111, 32)      0         
_________________________________________________________________
conv2d_22 (Conv2D)           (None, 109, 109, 64)      18496     
_________________________________________________________________
max_pooling2d_22 (MaxPooling (None, 54, 54, 64)        0         
_________________________________________________________________
conv2d_23 (Conv2D)           (None, 52, 52, 128)       73856     
_________________________________________________________________
max_pooling2d_23 (MaxPooling (None, 26, 26, 128)       

In [38]:
# this is the augmentation configuration we will use for training
train_image_generator = ImageDataGenerator(
    rescale=1./255,
    featurewise_center=True,
    featurewise_std_normalization=True,
    rotation_range=90,
    width_shift_range=0.15,
    height_shift_range=0.15,
    horizontal_flip=True,
    zoom_range=[0.9, 1.25],
    brightness_range=[0.5, 1.5]
)

test_image_generator = ImageDataGenerator(
    rescale=1./255
)

train_generator = train_image_generator.flow_from_dataframe(
    dataframe=final_train_data,
    directory=train_data_dir,
    x_col='X_ray_image_name',
    y_col='target',
    target_size=(224, 224),
    batch_size=8,
    seed=2020,
    shuffle=True,
    class_mode='binary'
)

validation_generator = train_image_generator.flow_from_dataframe(
    dataframe=final_validation_data,
    directory=train_data_dir,
    x_col='X_ray_image_name',
    y_col='target',
    target_size=(224, 224),
    batch_size=8,
    seed=2020,
    shuffle=True,
    class_mode='binary'
)

test_generator = test_image_generator.flow_from_dataframe(
    dataframe=test_data,
    directory=test_data_dir,
    x_col='X_ray_image_name',
    target_size=(224, 224),
    shuffle=False,
    batch_size=16,
    class_mode=None
)

history = model.fit_generator(
    train_generator,
    steps_per_epoch=20,
    epochs=epochs,
    validation_data=validation_generator,
    verbose = 1,
    validation_steps=3)

Found 1000 validated image filenames belonging to 2 classes.
Found 400 validated image filenames belonging to 2 classes.
Found 624 validated image filenames.
Epoch 1/5




 1/20 [>.............................] - ETA: 58s - loss: 0.6746 - accuracy: 0.7500 - TruePositives: 0.0000e+00 - TrueNegatives: 6.0000 - FalsePositives: 1.0000 - FalseNegatives: 1.0000



 2/20 [==>...........................] - ETA: 45s - loss: 1.3013 - accuracy: 0.8125 - TruePositives: 0.0000e+00 - TrueNegatives: 13.0000 - FalsePositives: 1.0000 - FalseNegatives: 2.0000



 3/20 [===>..........................] - ETA: 39s - loss: 1.5103 - accuracy: 0.8333 - TruePositives: 0.0000e+00 - TrueNegatives: 20.0000 - FalsePositives: 1.0000 - FalseNegatives: 3.0000



 4/20 [=====>........................] - ETA: 35s - loss: 1.1327 - accuracy: 0.8750 - TruePositives: 0.0000e+00 - TrueNegatives: 28.0000 - FalsePositives: 1.0000 - FalseNegatives: 3.0000































































Epoch 2/5




 1/20 [>.............................] - ETA: 36s - loss: 0.0000e+00 - accuracy: 1.0000 - TruePositives: 0.0000e+00 - TrueNegatives: 8.0000 - FalsePositives: 0.0000e+00 - FalseNegatives: 0.0000e+00



 2/20 [==>...........................] - ETA: 34s - loss: 0.0000e+00 - accuracy: 1.0000 - TruePositives: 0.0000e+00 - TrueNegatives: 16.0000 - FalsePositives: 0.0000e+00 - FalseNegatives: 0.0000e+00



 3/20 [===>..........................] - ETA: 32s - loss: 0.0000e+00 - accuracy: 1.0000 - TruePositives: 0.0000e+00 - TrueNegatives: 24.0000 - FalsePositives: 0.0000e+00 - FalseNegatives: 0.0000e+00



 4/20 [=====>........................] - ETA: 30s - loss: 0.0000e+00 - accuracy: 1.0000 - TruePositives: 0.0000e+00 - TrueNegatives: 32.0000 - FalsePositives: 0.0000e+00 - FalseNegatives: 0.0000e+00































































Epoch 3/5




 1/20 [>.............................] - ETA: 39s - loss: 0.0000e+00 - accuracy: 1.0000 - TruePositives: 0.0000e+00 - TrueNegatives: 8.0000 - FalsePositives: 0.0000e+00 - FalseNegatives: 0.0000e+00



 2/20 [==>...........................] - ETA: 35s - loss: 0.0000e+00 - accuracy: 1.0000 - TruePositives: 0.0000e+00 - TrueNegatives: 16.0000 - FalsePositives: 0.0000e+00 - FalseNegatives: 0.0000e+00



 3/20 [===>..........................] - ETA: 33s - loss: 0.6427 - accuracy: 0.9583 - TruePositives: 0.0000e+00 - TrueNegatives: 23.0000 - FalsePositives: 0.0000e+00 - FalseNegatives: 1.0000        



 4/20 [=====>........................] - ETA: 31s - loss: 0.9641 - accuracy: 0.9375 - TruePositives: 0.0000e+00 - TrueNegatives: 30.0000 - FalsePositives: 0.0000e+00 - FalseNegatives: 2.0000































































Epoch 4/5




 1/20 [>.............................] - ETA: 39s - loss: 0.0000e+00 - accuracy: 1.0000 - TruePositives: 0.0000e+00 - TrueNegatives: 8.0000 - FalsePositives: 0.0000e+00 - FalseNegatives: 0.0000e+00



 2/20 [==>...........................] - ETA: 36s - loss: 0.0000e+00 - accuracy: 1.0000 - TruePositives: 0.0000e+00 - TrueNegatives: 16.0000 - FalsePositives: 0.0000e+00 - FalseNegatives: 0.0000e+00



 3/20 [===>..........................] - ETA: 34s - loss: 0.0000e+00 - accuracy: 1.0000 - TruePositives: 0.0000e+00 - TrueNegatives: 24.0000 - FalsePositives: 0.0000e+00 - FalseNegatives: 0.0000e+00



 4/20 [=====>........................] - ETA: 31s - loss: 1.4461 - accuracy: 0.9062 - TruePositives: 0.0000e+00 - TrueNegatives: 29.0000 - FalsePositives: 0.0000e+00 - FalseNegatives: 3.0000        































































Epoch 5/5




 1/20 [>.............................] - ETA: 38s - loss: 0.0000e+00 - accuracy: 1.0000 - TruePositives: 0.0000e+00 - TrueNegatives: 8.0000 - FalsePositives: 0.0000e+00 - FalseNegatives: 0.0000e+00



 2/20 [==>...........................] - ETA: 35s - loss: 0.0000e+00 - accuracy: 1.0000 - TruePositives: 0.0000e+00 - TrueNegatives: 16.0000 - FalsePositives: 0.0000e+00 - FalseNegatives: 0.0000e+00



 3/20 [===>..........................] - ETA: 33s - loss: 0.0000e+00 - accuracy: 1.0000 - TruePositives: 0.0000e+00 - TrueNegatives: 24.0000 - FalsePositives: 0.0000e+00 - FalseNegatives: 0.0000e+00



 4/20 [=====>........................] - ETA: 31s - loss: 0.0000e+00 - accuracy: 1.0000 - TruePositives: 0.0000e+00 - TrueNegatives: 32.0000 - FalsePositives: 0.0000e+00 - FalseNegatives: 0.0000e+00

































































In [None]:
#model.save('initial_model.h5')

In [None]:
def plot_loss_acc(history):
    
    plt.figure(figsize=(14,6))
    plt.subplot2grid((1,2),(0,0))
    plt.plot(history.history['loss'], label = ('Train Loss'))
    plt.plot(history.history['val_loss'], label ='Validation Loss')
    plt.title('Model Loss')
    plt.legend()

    plt.subplot2grid((1,2),(0,1))
    plt.plot(history.history['accuracy'], label='Train Accuracy')
    plt.plot(history.history['val_accuracy'], label ='Validation Accuracy')
    plt.title('Model Accuracy')
    plt.legend()
    
    plt.show()

In [None]:
def classification_report(history):
    #Train report
    TP = history.history['tp'][-1] #True Positive
    FP = history.history['fp'][-1] #False Positif
    TN = history.history['tn'][-1] #True Negative
    FN = history.history['fn'][-1] #False Negatife

    confusion_matrix = np.matrix([[TP,FP],[FN,TN]])
    recall = 100*TP/(TP+FN)
    precision = 100*TP/(TP+FP)
    acc = history.history['accuracy'][-1]
    F_Measure = (2 * precision * recall) / (precision + recall)
    
    #Validation Report
    vTP = history.history['val_tp'][-1] #True Positive
    vFP = history.history['val_fp'][-1] #False Positif
    vTN = history.history['val_tn'][-1] #True Negative
    vFN = history.history['val_fn'][-1] #False Negatife

    val_confusion_matrix = np.matrix([[vTP,vFP],[vFN,vTN]])
    val_recall = 100*vTP/(vTP+vFN)
    val_precision = 100*vTP/(vTP+vFP)
    val_acc = history.history['val_accuracy'][-1]
    val_F_Measure = (2 * val_precision * val_recall) / (val_precision + val_recall)
    
    print('Training Report : \n')
    print('Confusion Matrix : \n', confusion_matrix,'\n')
    print('Recall : ', recall)
    print("Precision : ",precision)
    print('Accuracy : ', 100*acc)
    print('F-Measure : ', F_Measure)
    print('')
    print('Validation Report : \n')
    print('Confusion Matrix : \n', val_confusion_matrix,'\n')
    print('Recall : ', val_recall)
    print("Precision : ",val_precision)
    print('Accuracy : ', 100*val_acc)
    print('F-Measure : ', val_F_Measure)

In [None]:
plot_loss_acc(history)
classification_report(history)