In [2]:
import os
import cv2
import glob
import random
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt

from sklearn.model_selection import train_test_split
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.layers.experimental import preprocessing
from keras.preprocessing.image import ImageDataGenerator

In [3]:
BATCH_SIZE = 25 
IMAGE_SIZE = [150, 150]

In [4]:
train_images = []
for r, d, f in os.walk('../input/chest_xray/train'):
     for file in f:
        if file.endswith(".jpeg"):
            train_images.append((os.path.join(r, file)))

In [5]:
train_labels = []
for i in range(len(train_images)):
    if 'bacteria' in train_images[i]:
        train_labels.append((train_images[i],'BACTERIAL'))
    elif 'virus' in train_images[i]:
        train_labels.append((train_images[i],'VIRUS'))
    else:
        train_labels.append((train_images[i],'NORMAL'))

In [6]:
train_df = pd.DataFrame(train_labels, columns = ['IMAGE','LABEL'])

In [7]:
test_images = []
for r, d, f in os.walk('../input/chest_xray/test'):
     for file in f:
        if file.endswith(".jpeg"):
            test_images.append((os.path.join(r, file)))

In [8]:
test_labels = []
for i in range(len(test_images)):
    if 'bacteria' in test_images[i]:
        test_labels.append((test_images[i],'BACTERIAL'))
    elif 'virus' in test_images[i]:
        test_labels.append((test_images[i],'VIRUS'))
    else:
        test_labels.append((test_images[i],'NORMAL'))

In [9]:
test_df = pd.DataFrame(test_labels, columns =['IMAGE','LABEL'])

In [10]:
val_images = []
for r, d, f in os.walk('../input/chest_xray/val'):
     for file in f:
        if file.endswith(".jpeg"):
            val_images.append((os.path.join(r, file)))

In [11]:
val_labels = []
for i in range(len(val_images)):
    if 'bacteria' in val_images[i]:
        val_labels.append((val_images[i],'BACTERIAL'))
    elif 'virus' in val_images[i]:
        val_labels.append((val_images[i],'VIRUS'))
    else:
        val_labels.append((val_images[i],'NORMAL'))

In [12]:
val_df = pd.DataFrame(val_labels, columns = ['IMAGE','LABEL'])

In [13]:
# Data Augmentation
from keras_preprocessing.image import ImageDataGenerator
train_datagen = ImageDataGenerator(
      rotation_range=40,
      height_shift_range=0.2,
      shear_range=0.2,
      zoom_range=0.2,
      horizontal_flip=True,
     )

In [14]:
train_generator = train_datagen.flow_from_dataframe(
    train_df, x_col="IMAGE", y_col="LABEL",
    target_size=(150,150),
    color_mode='grayscale', #we use grayscale images I think
    batch_size=32,
    class_mode='categorical',
    shuffle=True,
    rescale=1.0/255,
    seed=8)
data_list = []
batch_index = 0

while batch_index <= train_generator.batch_index:
    data = train_generator.next()
    data_list.append(data[0])
    batch_index = batch_index + 1

# now, data_array is the numeric data of whole images
train_ds = np.asarray(data_list)

Found 5216 validated image filenames belonging to 3 classes.


In [15]:
test_generator = train_datagen.flow_from_dataframe(
    test_df, x_col="IMAGE", y_col="LABEL",
    target_size=(150,150),
    color_mode='grayscale',
    batch_size=32,
    class_mode='categorical',
    shuffle=True,
    rescale=1.0/255,
    seed=8)
data_list = []
batch_index = 0

while batch_index <= test_generator.batch_index:
    data = test_generator.next()
    data_list.append(data[0])
    batch_index = batch_index + 1

# now, data_array is the numeric data of whole images
test_ds = np.asarray(data_list)

Found 624 validated image filenames belonging to 3 classes.


In [16]:
val_generator = train_datagen.flow_from_dataframe(
    val_df, x_col="IMAGE", y_col="LABEL",
    target_size=(150,150),
    color_mode='grayscale',
    batch_size=32,
    class_mode='categorical',
    shuffle=True,
    rescale=1.0/255,
    seed=8)
data_list = []
batch_index = 0

while batch_index <= val_generator.batch_index:
    data = val_generator.next()
    data_list.append(data[0])
    batch_index = batch_index + 1

# now, data_array is the numeric data of whole images
val_ds = np.asarray(data_list)

Found 16 validated image filenames belonging to 2 classes.


In [17]:
def conv_block(filters, inputs):
    x = layers.SeparableConv2D(filters, 3, activation="relu", padding="same")(inputs)
    x = layers.SeparableConv2D(filters, 3, activation="relu", padding="same")(x)
    x = layers.BatchNormalization()(x)
    outputs = layers.MaxPool2D()(x)
    return outputs

def dense_block(units, dropout_rate, inputs):
    x = layers.Dense(units, activation="relu")(inputs)
    x = layers.BatchNormalization()(x)
    outputs = layers.Dropout(dropout_rate)(x)
    return outputs

In [18]:
def build_model():
    inputs = keras.Input(shape=(IMAGE_SIZE[0], IMAGE_SIZE[1], 3))
    x = preprocessing.Rescaling(1.0 / 255)(inputs)
    x = layers.Conv2D(16, 3, activation="relu", padding="same")(x)
    x = layers.Conv2D(16, 3, activation="relu", padding="same")(x)
    x = layers.MaxPool2D()(x)

    x = conv_block(32, x)
    x = conv_block(64, x)

    x = conv_block(128, x)
    x = layers.Dropout(0.2)(x)

    x = conv_block(256, x)
    x = layers.Dropout(0.2)(x)

    x = layers.Flatten()(x)
    x = dense_block(512, 0.7, x)
    x = dense_block(128, 0.5, x)
    x = dense_block(64, 0.3, x)

    outputs = layers.Dense(1, activation="sigmoid")(x)

    model = keras.Model(inputs=inputs, outputs=outputs)
    return model

In [19]:
early_stop = tf.keras.callbacks.EarlyStopping(
    patience = 10, restore_best_weights = True
)

initial_learning_rate = 0.015
lr_schedule = tf.keras.optimizers.schedules.ExponentialDecay(
    initial_learning_rate, decay_steps = 100000, decay_rate = 0.96, staircase = True
)

In [23]:
from sklearn.utils import class_weight
class_weights = class_weight.compute_class_weight('balanced',
                                                 np.unique(train_df['LABEL']),
                                                 train_df['LABEL'])

1       BACTERIAL
2       BACTERIAL
3           VIRUS
4       BACTERIAL
          ...    
5211       NORMAL
5212       NORMAL
5213       NORMAL
5214       NORMAL
5215       NORMAL
Name: LABEL, Length: 5216, dtype: object as keyword args. From version 0.25 passing these as positional arguments will result in an error


In [24]:
model = build_model()

METRICS = [
    tf.keras.metrics.BinaryAccuracy(),
    tf.keras.metrics.Precision(name = "precision"),
    tf.keras.metrics.Recall(name = "recall"),
    tf.keras.metrics.AUC(name = "AUC"),
    ]

model.compile(
    optimizer = tf.keras.optimizers.Adam(learning_rate = lr_schedule),
    loss = "binary_crossentropy",
    metrics = METRICS,
    )

history = model.fit(
    train_ds,
    epochs = 100,
    validation_data = val_ds,
    class_weight = class_weight,
    callbacks = [early_stop],
)

ValueError: The truth value of an array with more than one element is ambiguous. Use a.any() or a.all()