In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import tensorflow as tf
from pathlib import Path
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from keras.applications.mobilenet import MobileNet
from keras.models import Sequential
from keras.layers import Flatten, Dense, Dropout, BatchNormalization, AveragePooling2D

Using TensorFlow backend.


In [2]:
print(tf.__version__)

2.0.1


In [3]:
# Config
IMG_SIZE = 128
BATCH_SIZE=32
train_path = 'data/coronahack-chest-x-ray/train/'
test_path = 'data/coronahack-chest-x-ray/test/'

In [4]:
# Read Metadata csv
df = pd.read_csv('data/coronahack-chest-x-ray/Chest_xray_Corona_Metadata.csv')
df.head()

Unnamed: 0.1,Unnamed: 0,X_ray_image_name,Label,Dataset_type,Label_2_Virus_category,Label_1_Virus_category
0,0,IM-0128-0001.jpeg,Normal,TRAIN,,
1,1,IM-0127-0001.jpeg,Normal,TRAIN,,
2,2,IM-0125-0001.jpeg,Normal,TRAIN,,
3,3,IM-0122-0001.jpeg,Normal,TRAIN,,
4,4,IM-0119-0001.jpeg,Normal,TRAIN,,


In [5]:
df_train = df[df['Dataset_type'] == 'TRAIN'].copy()
df_test = df[df['Dataset_type'] == 'TEST'].copy()

# Add path in order to load the image in the ImageDataGenerator
df_train['X_ray_image_name'] = train_path + df_train['X_ray_image_name'].astype(str)
df_test['X_ray_image_name'] = test_path + df_test['X_ray_image_name'].astype(str)

In [6]:
df_train.head()

Unnamed: 0.1,Unnamed: 0,X_ray_image_name,Label,Dataset_type,Label_2_Virus_category,Label_1_Virus_category
0,0,data/coronahack-chest-x-ray/train/IM-0128-0001...,Normal,TRAIN,,
1,1,data/coronahack-chest-x-ray/train/IM-0127-0001...,Normal,TRAIN,,
2,2,data/coronahack-chest-x-ray/train/IM-0125-0001...,Normal,TRAIN,,
3,3,data/coronahack-chest-x-ray/train/IM-0122-0001...,Normal,TRAIN,,
4,4,data/coronahack-chest-x-ray/train/IM-0119-0001...,Normal,TRAIN,,


In [7]:
df_test.head()

Unnamed: 0.1,Unnamed: 0,X_ray_image_name,Label,Dataset_type,Label_2_Virus_category,Label_1_Virus_category
5286,5309,data/coronahack-chest-x-ray/test/IM-0021-0001....,Normal,TEST,,
5287,5310,data/coronahack-chest-x-ray/test/IM-0019-0001....,Normal,TEST,,
5288,5311,data/coronahack-chest-x-ray/test/IM-0017-0001....,Normal,TEST,,
5289,5312,data/coronahack-chest-x-ray/test/IM-0016-0001....,Normal,TEST,,
5290,5313,data/coronahack-chest-x-ray/test/IM-0015-0001....,Normal,TEST,,


In [8]:
# Train datagen
train_datagen = ImageDataGenerator(
        rescale=1./255,
        rotation_range=0.1)

# Train generator
train_generator = train_datagen.flow_from_dataframe(
        dataframe = df_train,
        directory = None,
        x_col='X_ray_image_name',
        y_col='Label',
        target_size = (IMG_SIZE, IMG_SIZE),
        batch_size = BATCH_SIZE,
        color_mode = 'grayscale',
        class_mode = 'binary')

Found 5286 validated image filenames belonging to 2 classes.


In [19]:
# should be 5309 images... 
# we need to find out why there are only 5286 loaded

In [9]:
raw_model = MobileNet(input_shape=(IMG_SIZE, IMG_SIZE, 1), include_top = False, weights = None)
model = Sequential()
model.add(AveragePooling2D((2,2), input_shape = (IMG_SIZE, IMG_SIZE, 1)))
model.add(BatchNormalization())
model.add(raw_model)
model.add(Flatten())
model.add(Dropout(0.5))
model.add(Dense(64))
model.add(Dense(1, activation = 'sigmoid'))
model.compile(optimizer = 'adam', loss = 'binary_crossentropy', metrics = ['acc'])
model.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
average_pooling2d_1 (Average (None, 64, 64, 1)         0         
_________________________________________________________________
batch_normalization_1 (Batch (None, 64, 64, 1)         4         
_________________________________________________________________
mobilenet_1.00_128 (Model)   multiple                  3228288   
_________________________________________________________________
flatten_1 (Flatten)          (None, 4096)              0         
_________________________________________________________________
dropout_1 (Dropout)          (None, 4096)              0         
_________________________________________________________________
dense_1 (Dense)              (None, 64)                262208    
_________________________________________________________________
dense_2 (Dense)              (None, 1)                

In [10]:
history = model.fit(train_generator, epochs=5, verbose = True)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [57]:
# Here we need to do the prediction
# y_proba = model.predict()
# y_classes = keras.np_utils.probas_to_classes(y_proba)

ValueError: If predicting from data tensors, you should specify the `steps` argument.