In [3]:
import cv2
import glob
import os
import pandas as pd
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from keras.preprocessing.image import ImageDataGenerator



#patch_size = 256

In [4]:
#import csv file and record images path
xray_data = pd.read_csv('sample_labels.csv')
print('Number of observations (csv):', len(xray_data))
img_path = glob.glob('images/*.png')
print('Number of observations (imgs):', len(img_path))

img_path_dict = {}
for i in range(len(img_path)):
    img_path_dict[xray_data['Image Index'][i]] = img_path[i]
print('Number of observations (imgs_abs):', len(img_path_dict))

Number of observations (csv): 5606
Number of observations (imgs): 5606
Number of observations (imgs_abs): 5606


In [5]:
xray_data['Image Absolute Path'] = xray_data['Image Index'].map(img_path_dict)

In [6]:
xray_data['Patient Age'] = xray_data['Patient Age'].map(lambda x:int(x[:-1]))

In [7]:
# labels for
labels = ['Atelectasis', 'Cardiomegaly', 'Effusion', 'Infiltration', 'Mass', 'Nodule', 'Pneumonia', 'Pneumothorax', 'Consolidation', 'Edema', 'Emphysema', 'Fibrosis', 'Pleural_Thickening', 'Hernia']

In [8]:
len(labels)

14

In [9]:
for l in labels:
    xray_data[l] = xray_data['Finding Labels'].map(lambda x: 1 if l in x else 0)
xray_data.head(2)

Unnamed: 0,Image Index,Finding Labels,Follow-up #,Patient ID,Patient Age,Patient Gender,View Position,OriginalImageWidth,OriginalImageHeight,OriginalImagePixelSpacing_x,...,Mass,Nodule,Pneumonia,Pneumothorax,Consolidation,Edema,Emphysema,Fibrosis,Pleural_Thickening,Hernia
0,00000013_005.png,Emphysema|Infiltration|Pleural_Thickening|Pneu...,5,13,60,M,AP,3056,2544,0.139,...,0,0,0,1,0,0,1,0,1,0
1,00000013_026.png,Cardiomegaly|Emphysema,26,13,57,M,AP,2500,2048,0.168,...,0,0,0,0,0,0,1,0,0,0


In [10]:
type(xray_data[labels])

pandas.core.frame.DataFrame

In [11]:
xray_data[labels].values

array([[0, 0, 0, ..., 0, 1, 0],
       [0, 1, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       ...,
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0]])

In [12]:
# set target values
xray_data['targets'] = xray_data.apply(lambda x:[x[labels].values], 1).map(lambda target: target[0].tolist())

In [13]:
type(xray_data['targets'])

pandas.core.series.Series

In [14]:
xray_data.head(2)

Unnamed: 0,Image Index,Finding Labels,Follow-up #,Patient ID,Patient Age,Patient Gender,View Position,OriginalImageWidth,OriginalImageHeight,OriginalImagePixelSpacing_x,...,Nodule,Pneumonia,Pneumothorax,Consolidation,Edema,Emphysema,Fibrosis,Pleural_Thickening,Hernia,targets
0,00000013_005.png,Emphysema|Infiltration|Pleural_Thickening|Pneu...,5,13,60,M,AP,3056,2544,0.139,...,0,0,1,0,0,1,0,1,0,"[0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0]"
1,00000013_026.png,Cardiomegaly|Emphysema,26,13,57,M,AP,2500,2048,0.168,...,0,0,0,0,0,1,0,0,0,"[0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0]"


In [15]:
train, test = train_test_split(xray_data, test_size = 0.2)

In [16]:
print(len(train))
print(len(test))

4484
1122


In [17]:
type(train)

pandas.core.frame.DataFrame

In [31]:
def mk_train_generator(df, directory, x_col, y_col, labels):
    datagen = ImageDataGenerator(samplewise_center=True, 
                                 samplewise_std_normalization=True,
                                 rotation_range=20,
                                 width_shift_range=0.2,
                                 height_shift_range=0.2,
                                 rescale=1./255,
                                 horizontal_flip=True)

    generator = datagen.flow_from_dataframe(dataframe=df, 
                                            directory=directory,
                                            x_col=x_col, 
                                            y_col=y_col, 
                                            classes=labels,
                                            class_mode='categorical',
                                            color_mode='grayscale',
                                            batch_size=32)
    
    return generator

In [32]:
datagen = ImageDataGenerator(samplewise_center=True, 
                                 samplewise_std_normalization=True,
                                 rotation_range=20,
                                 width_shift_range=0.2,
                                 height_shift_range=0.2,
                                 horizontal_flip=True)

In [46]:
training_set = mk_train_generator(train, './images/', 'Image Index', 'targets', labels)


Found 0 validated image filenames belonging to 14 classes.


In [47]:
test_datagen = ImageDataGenerator(rescale=1./255)

In [63]:
test_set = test_datagen.flow_from_dataframe(dataframe=test, 
                                            directory='./images/',
                                            x_col='Image Index',
                                            y_col='targets',
                                            classes=labels,
                                            class_mode='categorical',
                                            color_mode='grayscale',
                                            batch_size=32)

Found 1122 validated image filenames belonging to 2 classes.


In [49]:
from keras.models import Sequential
from keras.layers import Conv2D
from keras.layers import MaxPooling2D
from keras.layers import Flatten
from keras.layers import Dense

In [50]:
classifier = Sequential()
classifier.add(Conv2D(32, (3, 3), input_shape=(256,256,3), activation='relu'))

In [51]:
classifier.add(MaxPooling2D(pool_size=(2,2)))

In [52]:
classifier.add(Flatten())

In [53]:
classifier.add(Dense(units=128,activation='relu'))

In [54]:
classifier.add(Dense(units=1,activation='sigmoid'))

In [55]:
classifier.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

In [56]:
classifier.fit(training_set, batch_size=32,epochs=50)

Instructions for updating:
Please use Model.fit, which supports generators.
Epoch 1/50


InvalidArgumentError:  input and filter must have the same depth: 1 vs 3
	 [[node sequential_2/conv2d_1/Relu (defined at <ipython-input-56-6d17a9251f45>:1) ]] [Op:__inference_train_function_805]

Function call stack:
train_function


In [67]:
training_set.

[]