In [1]:
import os
import keras
import csv
import matplotlib.image as mpimg
import numpy as np
import matplotlib.pyplot as plt
import cv2
%matplotlib inline

Using TensorFlow backend.


In [6]:
WIDTH = 320
HEIGHT = 320
num_classes = 4

In [7]:
def read_data(filepath):
    """
    Reads and returns the data from csv file.
    
    Assumes that the first row is column names.

    Parameters
    ----------
    filepath : str
        File path of the file to be read

    Returns
    -------
    (column_names, data)
        Tuple of names of the columns and the data.
    """
    data = []
    column_names = []
    
    with open(filepath, 'rt') as csvfile:
        data_reader = csv.reader(csvfile, delimiter=',')
        flag = False
        for row in data_reader:
            if not flag:
                column_names = row
                flag = True
            else:
                data.append(row)

    return (column_names, data)

In [8]:
train_csv_path = '/floyd/input/chexpert/train.csv'
column_names, data = read_data(train_csv_path)

In [9]:
column_names

['Path',
 'Sex',
 'Age',
 'Frontal/Lateral',
 'AP/PA',
 'No Finding',
 'Enlarged Cardiomediastinum',
 'Cardiomegaly',
 'Lung Opacity',
 'Lung Lesion',
 'Edema',
 'Consolidation',
 'Pneumonia',
 'Atelectasis',
 'Pneumothorax',
 'Pleural Effusion',
 'Pleural Other',
 'Fracture',
 'Support Devices']

In [32]:
def get_X(filepaths, cur_dir, num_images=None):
    """
    Reads each image as np array for model training
    
    Parameters
    ----------
    filepaths : array
        a list of file paths of the images 
    cur_dir : str
        the current directory

    Returns
    -------
    np.array
        an array of images 
    """
    X = []
    for path in filepaths[:num_images]:
        img_path = os.path.join(cur_dir, path)
        img_path = img_path.replace('home/CheXpert-v1.0-small', 'input/chexpert')
        img = image.load_img(img_path, target_size=(WIDTH, HEIGHT))
        X.append(image.img_to_array(img))
    return np.array(X)

In [10]:
from keras.preprocessing import image
data = np.array(data)
filepaths = np.array(data)[:, 0]

# print some filepaths
filepaths[:8]

array(['CheXpert-v1.0-small/train/patient00001/study1/view1_frontal.jpg',
       'CheXpert-v1.0-small/train/patient00002/study2/view1_frontal.jpg',
       'CheXpert-v1.0-small/train/patient00002/study1/view1_frontal.jpg',
       'CheXpert-v1.0-small/train/patient00002/study1/view2_lateral.jpg',
       'CheXpert-v1.0-small/train/patient00003/study1/view1_frontal.jpg',
       'CheXpert-v1.0-small/train/patient00004/study1/view1_frontal.jpg',
       'CheXpert-v1.0-small/train/patient00004/study1/view2_lateral.jpg',
       'CheXpert-v1.0-small/train/patient00005/study1/view1_frontal.jpg'],
      dtype='<U64')

In [11]:
print(column_names[7])
y_cardiomegaly = data[:, 7]

def preprocess_y(raw_y):
    raw_y[raw_y == ''] = -2
    raw_y = raw_y.astype(float)
    y = raw_y.astype(int)
    return y

y_cardiomegaly = preprocess_y(y_cardiomegaly)
y = y_cardiomegaly

Cardiomegaly


In [12]:
# Referencing the following blog to generate the data 
# https://stanford.edu/~shervine/blog/keras-how-to-generate-data-on-the-fly
# 

import numpy as np
import keras

class DataGenerator(keras.utils.Sequence):
    'Generates data for Keras'
    def __init__(self, 
                 list_IDs, 
                 labels, 
                 batch_size=32, 
                 dim=(320, 320), 
                 n_channels=1,
                 n_classes=10, 
                 shuffle=True):
        self.dim = dim
        self.batch_size = batch_size
        self.labels = labels
        self.list_IDs = list_IDs
        self.n_channels = n_channels
        self.n_classes = n_classes
        self.shuffle = shuffle
        self.on_epoch_end()
        
    def __len__(self):
        'Denotes the number of batches per epoch'
        return int(np.floor(len(self.list_IDs) / self.batch_size))
    
    def __getitem__(self, index):
        'Generate one batch of data'
        # generates the indexes of the batch
        indexes = self.indexes[index * self.batch_size: (index + 1) * self.batch_size]
        
        # find the list of ids
        list_IDs_temp = [self.list_IDs[k] for k in indexes]
        # print(list_IDs_temp)
        
        # data generation
        X, y = self.__data_generation(list_IDs_temp)
        
        return X, y
    
    def on_epoch_end(self):
        'Updates indexes after each epoch'
        self.indexes = np.arange(len(self.list_IDs))
        if self.shuffle == False:
            np.random.shuffle(self.indexes)
    
    def __data_generation(self, list_IDs_temp):
        'Generates data containing batch_size samples' # X : (n_samples, *dim, n_channels)
        X = np.empty((self.batch_size, *self.dim, self.n_channels))
        y = np.empty((self.batch_size), dtype=int)
        
        cur_dir = os.getcwd()
        
        # Generate data
        for i, ID in enumerate(list_IDs_temp):
            # Store sample

            img_path = os.path.join(cur_dir, ID)
            img_path = img_path.replace('home/CheXpert-v1.0-small', 'input/chexpert')
            img = image.load_img(img_path, target_size=(WIDTH, HEIGHT), color_mode='grayscale')
            
            X[i, ] = image.img_to_array(img) / 255
            
            # store the class 
            y[i] = self.labels[ID]
        
        return X, keras.utils.to_categorical(y, num_classes=self.n_classes)

In [13]:
params = {
    'dim': (320, 320),
    'batch_size': 32,
    'n_classes': 4,
    'n_channels': 1,
    'shuffle': True
} 

partition = {
    'train': [],
    'validation': []
}

partition['train'] = data[:, 0]
labels = {id_: y_ for id_, y_ in list(zip(data[:, 0], y)) }

In [14]:
training_generator = DataGenerator(partition['train'], labels, **params)

In [15]:
from keras import layers
from keras import models

def get_model():
    model = models.Sequential()
    model.add(layers.Conv2D(32, (3, 3), activation='relu',
                            input_shape=(320, 320, 1)))
    model.add(layers.MaxPooling2D((2, 2)))
    model.add(layers.Conv2D(64, (3, 3), activation='relu'))
    model.add(layers.MaxPooling2D((2, 2)))
    model.add(layers.Conv2D(128, (3, 3), activation='relu'))
    model.add(layers.MaxPooling2D((2, 2)))
    model.add(layers.Conv2D(128, (3, 3), activation='relu'))
    model.add(layers.MaxPooling2D((2, 2)))
    model.add(layers.Flatten())
    model.add(layers.Dropout(0.5))
    model.add(layers.Dense(512, activation='relu'))
    model.add(layers.Dense(4, activation='sigmoid'))
    
    return model

model = get_model()
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_1 (Conv2D)            (None, 318, 318, 32)      320       
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 159, 159, 32)      0         
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 157, 157, 64)      18496     
_________________________________________________________________
max_pooling2d_2 (MaxPooling2 (None, 78, 78, 64)        0         
_________________________________________________________________
conv2d_3 (Conv2D)            (None, 76, 76, 128)       73856     
_________________________________________________________________
max_pooling2d_3 (MaxPooling2 (None, 38, 38, 128)       0         
_________________________________________________________________
conv2d_4 (Conv2D)            (None, 36, 36, 128)       147584    
__________

In [16]:
data.shape[0]

223414

In [55]:

model.compile(optimizer=optimizers.RMSprop(lr=2e-5),
              loss='categorical_crossentropy',
              metrics=['acc'])

spe = int(np.floor(data.shape[0]/ 32))
model.fit_generator(generator=training_generator,
                    steps_per_epoch=spe,
                    epochs=10)

Epoch 1/10
  11/6981 [..............................] - ETA: 18:03:11 - loss: 1.1035 - acc: 0.7244

KeyboardInterrupt: 

In [17]:
from keras.models import Sequential, Model
from keras.layers import Dense, Activation, Flatten
from keras import optimizers

model = keras.applications.densenet.DenseNet121(
                                        include_top=True, 
                                        weights=None, 
                                        input_shape=(320, 320, 1),
                                        pooling='max', 
                                        classes=4)

In [18]:
model.summary()

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            (None, 320, 320, 1)  0                                            
__________________________________________________________________________________________________
zero_padding2d_1 (ZeroPadding2D (None, 326, 326, 1)  0           input_1[0][0]                    
__________________________________________________________________________________________________
conv1/conv (Conv2D)             (None, 160, 160, 64) 3136        zero_padding2d_1[0][0]           
__________________________________________________________________________________________________
conv1/bn (BatchNormalization)   (None, 160, 160, 64) 256         conv1/conv[0][0]                 
__________________________________________________________________________________________________
conv1/relu

In [19]:
optimizer = optimizers.Adam(lr=0.1, beta_1=0.9, beta_2=0.999, epsilon=1e-08)
    
model.compile(loss='categorical_crossentropy',
            optimizer=optimizer,
            metrics=["accuracy"])

In [21]:
spe = int(np.floor(data.shape[0]/ 32))
model.fit_generator(generator=training_generator,
                    steps_per_epoch=spe,
                    epochs=10)

Epoch 1/10
  43/6981 [..............................] - ETA: 32:01 - loss: 2.9636 - acc: 0.8161

KeyboardInterrupt: 