In [19]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import os
from glob import glob

import matplotlib.pyplot as plt
from itertools import chain
import tensorflow as tf
import cv2

In [20]:
all_xray_df = pd.read_csv('./data/training_labels.csv')
all_image_paths = {os.path.basename(x): x for x in 
                   glob(os.path.join( './data/training', '*.png'))}
print('Scans found:', len(all_image_paths), ', Total Headers', all_xray_df.shape[0])
all_xray_df['path'] = all_xray_df['Image Index'].map(all_image_paths.get)
#all_xray_df.sample(3)

Scans found: 4000 , Total Headers 4000


In [21]:
all_xray_df['Label'] = all_xray_df['Finding Labels'].map(lambda x: 0 if x=='No Finding' else 1.0)
#all_xray_df.sample(10)

In [22]:
all_xray_df.shape

(4000, 4)

In [23]:
from sklearn.utils  import shuffle



normal =  all_xray_df [ all_xray_df['Label'] == 0.0 ]
abnormal =  all_xray_df [ all_xray_df['Label'] == 1.0 ]

X_train = pd.concat([normal.sample(frac=0.8, random_state=0),\
                     abnormal.sample(frac=0.8,random_state=0)], axis=0)
X_valid = all_xray_df.loc[~all_xray_df.index.isin(X_train.index)]

X_train = shuffle(X_train)
X_valid = shuffle(X_valid)

# Data agumentation with train set

In [24]:
import os
import glob

out_path = './data/agument/'

abnormal_df =  X_train [ X_train['Label'] == 1.0 ] 

if not os.path.exists(out_path):
  os.mkdir(out_path)

  for idx,row in abnormal_df.iterrows():
      img = cv2.imread(row['path'],cv2.IMREAD_COLOR)
      fimg = cv2.flip(img,1)
      cv2.imwrite(out_path+'F'+os.path.basename(row['path']),fimg)

column_name = ['Image Index','Finding Labels','path','Label']
data_list = []
flist = glob.glob(out_path+'*.png')
for f in flist:
    path_name = f
    base_name = os.path.basename(f)
    data_list.append([base_name,'Effusion',path_name,1.0])

flip_df = pd.DataFrame(columns=column_name,data=data_list)


X_train = pd.concat([X_train,flip_df])
X_train.reset_index(inplace=True)


In [25]:
X_train.groupby('Label').size()

Label
0.0    2560
1.0    1280
dtype: int64

# Over sample

In [26]:
# Over sample
train_over = pd.concat([X_train, X_train[X_train['Label'] == 1].sample(300) ])
train_over = shuffle(train_over)

In [27]:
#train_x = X_train['path'].values
#train_y = X_train['Label'].values

train_x = train_over['path'].values
train_y = train_over['Label'].values

train_image = []
for f in train_x:
    img = cv2.imread(f,cv2.IMREAD_GRAYSCALE)
    m,s = cv2.meanStdDev(img)
    std_img = (img- m)/(1.e-6 + s)
    
    train_image.append(std_img.reshape((64,64,1)))
    
train_label = np.column_stack([1-train_y,train_y])


valid_x = X_valid['path'].values
valid_y = X_valid['Label'].values

valid_image = []
for f in valid_x:
    img = cv2.imread(f,cv2.IMREAD_GRAYSCALE)
    m,s = cv2.meanStdDev(img)
    std_img = (img- m)/(1.e-6 + s)
    valid_image.append(std_img.reshape((64,64,1)))
    
valid_label = np.column_stack([1-valid_y,valid_y])

In [28]:
len(train_x),len(valid_x)

(4140, 800)

# CNN Model

In [29]:

tf.reset_default_graph()

images = tf.placeholder(tf.float32, [None, 64, 64, 1])
true_out = tf.placeholder(tf.float32, [ None, 2])
kprob = tf.placeholder_with_default(0.9,shape=())
train_mode = tf.placeholder_with_default(True,shape=())

xavi_init = tf.contrib.layers.xavier_initializer_conv2d()
w1 = tf.Variable(xavi_init(shape=[3,3,1,32]),name='conv1_w')
conv1 = tf.nn.conv2d(images, w1, strides=[1, 1, 1, 1], padding='SAME')
norm1 = tf.layers.batch_normalization(conv1,training=train_mode)
relu1 = tf.nn.relu(norm1)
relu1 = tf.nn.dropout(relu1,kprob)


w2 = tf.Variable(xavi_init(shape=[3,3,32,32]),name='conv2_w')
conv2 = tf.nn.conv2d(relu1, w2, strides=[1, 1, 1, 1], padding='SAME')
norm2 = tf.layers.batch_normalization(conv2,training=train_mode)
relu2 = tf.nn.relu(norm2)
relu2 = tf.nn.dropout(relu2,kprob)

# 32, 32
pool1 = tf.nn.max_pool(relu2,ksize=[1,2,2,1],strides=[1,2,2,1],padding='SAME')


w3 = tf.Variable(xavi_init(shape=[3,3,32,64]),name='conv3_w')
conv3 = tf.nn.conv2d(pool1, w3, strides=[1, 1, 1, 1], padding='SAME')
norm3 = tf.layers.batch_normalization(conv3,training=train_mode)
relu3 = tf.nn.relu(norm3)
relu3 = tf.nn.dropout(relu3,kprob)


w4 = tf.Variable(xavi_init(shape=[3,3,64,64]),name='conv4_w')
conv4 = tf.nn.conv2d(relu3, w4, strides=[1, 1, 1, 1], padding='SAME')
norm4 = tf.layers.batch_normalization(conv4,training=train_mode)
relu4 = tf.nn.relu(norm4)
relu4 = tf.nn.dropout(relu4,kprob)


w5 = tf.Variable(xavi_init(shape=[3,3,64,64]),name='conv5_w')
conv5 = tf.nn.conv2d(relu4, w5, strides=[1, 1, 1, 1], padding='SAME')
norm5 = tf.layers.batch_normalization(conv5,training=train_mode)
relu5 = tf.nn.relu(norm5)
relu5 = tf.nn.dropout(relu5,kprob)



# 16, 16 , 64
pool2 = tf.nn.max_pool(relu5,ksize=[1,2,2,1],strides=[1,2,2,1],padding='SAME')

fc1_reshape = tf.reshape(pool2, [-1, 16384])
fc1_w = tf.Variable(xavi_init(shape=[16384,1024]),name='fc1_w')
fc1_n = tf.layers.batch_normalization(tf.matmul(fc1_reshape,fc1_w) ,training=train_mode)
fc1 = tf.nn.relu(fc1_n)
fc1 = tf.nn.dropout(fc1,kprob)



fc2_w = tf.Variable(xavi_init(shape=[1024,128]),name='fc2_w')
fc1_n = tf.layers.batch_normalization(tf.matmul(fc1,fc2_w) ,training=train_mode)
fc2 = tf.nn.relu(fc1_n)
fc2 = tf.nn.dropout(fc2,kprob)

fc3_w = tf.Variable(xavi_init(shape=[128,2]),name='fc3_w')
fc3_b = tf.Variable(tf.zeros(shape=[2]),name='fc3_b')
logit = tf.nn.bias_add( tf.matmul(fc2,fc3_w) , fc3_b)




# cost function , accuracy

In [30]:
cross_entropy = tf.nn.softmax_cross_entropy_with_logits_v2(labels=true_out,logits=logit)
total_loss = tf.reduce_mean(cross_entropy)

update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)

with tf.control_dependencies(update_ops):
    train_op = tf.train.AdamOptimizer(0.001).minimize(total_loss)

#train_op = tf.train.AdamOptimizer(0.001).minimize(total_loss)

correct_prediction = tf.equal(tf.argmax(logit, 1),tf.argmax(true_out, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
    
    

# Saver

In [31]:
saver = tf.train.Saver()

CHECKPOINT_PATH = './ckpt/vgg_xray.ckpt'
CHECKPOINT_FILE = '%s.meta' % CHECKPOINT_PATH

In [33]:
#batch_size = 500
batch_size = 100
init_op = tf.global_variables_initializer()

with tf.Session() as sess:
    
    if os.path.isfile(CHECKPOINT_FILE):
        saver.restore(sess, CHECKPOINT_PATH)
        print('Restoring values from %s...' % CHECKPOINT_PATH)
    else:
        sess.run(init_op)

    for epoch in range(350):

        for start in range(0,len(train_image),batch_size):

            end = min( start+batch_size ,len(train_image))
            image_iter = train_image[start:end]
            label_iter = train_label[start:end]

            sess.run(train_op, feed_dict={images: image_iter, true_out:label_iter})

            if end == len(train_image) :
                loss,accr,train_logit = sess.run([total_loss,accuracy,logit], feed_dict={images: image_iter,kprob:0.8,true_out:label_iter})
                print(epoch,loss,accr)

                
        
        tp= fn= fp= tn= vloss = 0.0
        
        for start in range(0,len(valid_image),batch_size):

            end = min( start+batch_size ,len(valid_image))

            image_iter = valid_image[start:end]
            label_iter = valid_label[start:end]
            
            val_loss,val_logit = sess.run([total_loss,logit], feed_dict={images: image_iter, true_out:label_iter,kprob:1.0,train_mode:False})
            
            
            
            y_true = np.argmax(label_iter,axis=1)
            y_pred = np.argmax(val_logit,axis=1) 

            true_index =np.where(y_true == 1)
            flase_index = np.where(y_true == 0)


            tp += np.sum(y_pred[true_index])
            fn += np.sum(y_pred[true_index] == 0)
            fp += np.sum(y_pred[flase_index])
            tn += np.sum(y_pred[flase_index] == 0)
            
            vloss += loss

        recall = tp/(tp+fn+1e-7)
        precision = tp/(tp+fp+1e-7)
        f1score = 2*(recall*precision)/(recall+precision+1e-7)


        print('VALID ----- EPOCH : {:.3f}, LOSS : {:.3f}, ACCR : --'.format(epoch,vloss))
        print('RECALL :{:.3f} , PRECISION : {:.3f} , F1 SCORE :{:.3f}'.format(recall, precision, f1score ))
        print(tp+fn+fp+tn, tp+tn)
        
    #saver.save(sess, CHECKPOINT_PATH)

ResourceExhaustedError: OOM when allocating tensor with shape[100,64,32,32] and type float on /job:localhost/replica:0/task:0/device:GPU:0 by allocator GPU_0_bfc
	 [[Node: Conv2D_3 = Conv2D[T=DT_FLOAT, _class=["loc:@batch_normalization_3/cond/FusedBatchNorm_1/Switch"], data_format="NCHW", dilations=[1, 1, 1, 1], padding="SAME", strides=[1, 1, 1, 1], use_cudnn_on_gpu=true, _device="/job:localhost/replica:0/task:0/device:GPU:0"](dropout_2/mul, conv4_w/read)]]
Hint: If you want to see a list of allocated tensors when OOM happens, add report_tensor_allocations_upon_oom to RunOptions for current allocation info.

	 [[Node: gradients/dropout/mul_grad/Shape/_83 = _Recv[client_terminated=false, recv_device="/job:localhost/replica:0/task:0/device:CPU:0", send_device="/job:localhost/replica:0/task:0/device:GPU:0", send_device_incarnation=1, tensor_name="edge_1683_gradients/dropout/mul_grad/Shape", tensor_type=DT_INT32, _device="/job:localhost/replica:0/task:0/device:CPU:0"]()]]
Hint: If you want to see a list of allocated tensors when OOM happens, add report_tensor_allocations_upon_oom to RunOptions for current allocation info.


Caused by op 'Conv2D_3', defined at:
  File "/opt/anaconda3/envs/tensorgpu/lib/python3.6/runpy.py", line 193, in _run_module_as_main
    "__main__", mod_spec)
  File "/opt/anaconda3/envs/tensorgpu/lib/python3.6/runpy.py", line 85, in _run_code
    exec(code, run_globals)
  File "/opt/anaconda3/envs/tensorgpu/lib/python3.6/site-packages/ipykernel_launcher.py", line 16, in <module>
    app.launch_new_instance()
  File "/opt/anaconda3/envs/tensorgpu/lib/python3.6/site-packages/traitlets/config/application.py", line 658, in launch_instance
    app.start()
  File "/opt/anaconda3/envs/tensorgpu/lib/python3.6/site-packages/ipykernel/kernelapp.py", line 486, in start
    self.io_loop.start()
  File "/opt/anaconda3/envs/tensorgpu/lib/python3.6/site-packages/tornado/platform/asyncio.py", line 127, in start
    self.asyncio_loop.run_forever()
  File "/opt/anaconda3/envs/tensorgpu/lib/python3.6/asyncio/base_events.py", line 422, in run_forever
    self._run_once()
  File "/opt/anaconda3/envs/tensorgpu/lib/python3.6/asyncio/base_events.py", line 1432, in _run_once
    handle._run()
  File "/opt/anaconda3/envs/tensorgpu/lib/python3.6/asyncio/events.py", line 145, in _run
    self._callback(*self._args)
  File "/opt/anaconda3/envs/tensorgpu/lib/python3.6/site-packages/tornado/platform/asyncio.py", line 117, in _handle_events
    handler_func(fileobj, events)
  File "/opt/anaconda3/envs/tensorgpu/lib/python3.6/site-packages/tornado/stack_context.py", line 276, in null_wrapper
    return fn(*args, **kwargs)
  File "/opt/anaconda3/envs/tensorgpu/lib/python3.6/site-packages/zmq/eventloop/zmqstream.py", line 450, in _handle_events
    self._handle_recv()
  File "/opt/anaconda3/envs/tensorgpu/lib/python3.6/site-packages/zmq/eventloop/zmqstream.py", line 480, in _handle_recv
    self._run_callback(callback, msg)
  File "/opt/anaconda3/envs/tensorgpu/lib/python3.6/site-packages/zmq/eventloop/zmqstream.py", line 432, in _run_callback
    callback(*args, **kwargs)
  File "/opt/anaconda3/envs/tensorgpu/lib/python3.6/site-packages/tornado/stack_context.py", line 276, in null_wrapper
    return fn(*args, **kwargs)
  File "/opt/anaconda3/envs/tensorgpu/lib/python3.6/site-packages/ipykernel/kernelbase.py", line 283, in dispatcher
    return self.dispatch_shell(stream, msg)
  File "/opt/anaconda3/envs/tensorgpu/lib/python3.6/site-packages/ipykernel/kernelbase.py", line 233, in dispatch_shell
    handler(stream, idents, msg)
  File "/opt/anaconda3/envs/tensorgpu/lib/python3.6/site-packages/ipykernel/kernelbase.py", line 399, in execute_request
    user_expressions, allow_stdin)
  File "/opt/anaconda3/envs/tensorgpu/lib/python3.6/site-packages/ipykernel/ipkernel.py", line 208, in do_execute
    res = shell.run_cell(code, store_history=store_history, silent=silent)
  File "/opt/anaconda3/envs/tensorgpu/lib/python3.6/site-packages/ipykernel/zmqshell.py", line 537, in run_cell
    return super(ZMQInteractiveShell, self).run_cell(*args, **kwargs)
  File "/opt/anaconda3/envs/tensorgpu/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2662, in run_cell
    raw_cell, store_history, silent, shell_futures)
  File "/opt/anaconda3/envs/tensorgpu/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2785, in _run_cell
    interactivity=interactivity, compiler=compiler, result=result)
  File "/opt/anaconda3/envs/tensorgpu/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2903, in run_ast_nodes
    if self.run_code(code, result):
  File "/opt/anaconda3/envs/tensorgpu/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2963, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-29-16cd0b588346>", line 35, in <module>
    conv4 = tf.nn.conv2d(relu3, w4, strides=[1, 1, 1, 1], padding='SAME')
  File "/opt/anaconda3/envs/tensorgpu/lib/python3.6/site-packages/tensorflow/python/ops/gen_nn_ops.py", line 956, in conv2d
    data_format=data_format, dilations=dilations, name=name)
  File "/opt/anaconda3/envs/tensorgpu/lib/python3.6/site-packages/tensorflow/python/framework/op_def_library.py", line 787, in _apply_op_helper
    op_def=op_def)
  File "/opt/anaconda3/envs/tensorgpu/lib/python3.6/site-packages/tensorflow/python/framework/ops.py", line 3392, in create_op
    op_def=op_def)
  File "/opt/anaconda3/envs/tensorgpu/lib/python3.6/site-packages/tensorflow/python/framework/ops.py", line 1718, in __init__
    self._traceback = self._graph._extract_stack()  # pylint: disable=protected-access

ResourceExhaustedError (see above for traceback): OOM when allocating tensor with shape[100,64,32,32] and type float on /job:localhost/replica:0/task:0/device:GPU:0 by allocator GPU_0_bfc
	 [[Node: Conv2D_3 = Conv2D[T=DT_FLOAT, _class=["loc:@batch_normalization_3/cond/FusedBatchNorm_1/Switch"], data_format="NCHW", dilations=[1, 1, 1, 1], padding="SAME", strides=[1, 1, 1, 1], use_cudnn_on_gpu=true, _device="/job:localhost/replica:0/task:0/device:GPU:0"](dropout_2/mul, conv4_w/read)]]
Hint: If you want to see a list of allocated tensors when OOM happens, add report_tensor_allocations_upon_oom to RunOptions for current allocation info.

	 [[Node: gradients/dropout/mul_grad/Shape/_83 = _Recv[client_terminated=false, recv_device="/job:localhost/replica:0/task:0/device:CPU:0", send_device="/job:localhost/replica:0/task:0/device:GPU:0", send_device_incarnation=1, tensor_name="edge_1683_gradients/dropout/mul_grad/Shape", tensor_type=DT_INT32, _device="/job:localhost/replica:0/task:0/device:CPU:0"]()]]
Hint: If you want to see a list of allocated tensors when OOM happens, add report_tensor_allocations_upon_oom to RunOptions for current allocation info.



# Test 

In [None]:

X_test = glob.glob('./data/test/*.png')
test_image = []
for f in X_test:
    img = cv2.imread(f,cv2.IMREAD_GRAYSCALE)
    m,s = cv2.meanStdDev(img)
    std_img = (img- m)/(1.e-6 + s)
    
    test_image.append(std_img.reshape((64,64,1)))
    
print('test set' , np.shape(test_image))

In [None]:
batch_size = 50

with tf.Session() as sess:
    
    if os.path.isfile(CHECKPOINT_FILE):
        saver.restore(sess, CHECKPOINT_PATH)
        print('Restoring values from %s...' % CHECKPOINT_PATH)
               
        
        logit_list = []
        
        for start in range(0,len(test_image),batch_size):

            end = start + batch_size  if start + batch_size  < len(test_image)  else  len(test_image)

            image_iter = test_image[start:end]        
            test_logit = sess.run([logit], feed_dict={images: image_iter,kprob:1.0,train_mode:False})
            logit_list.append(test_logit)
            
        print('----done---')

In [50]:
pred_test = np.row_stack([ i[0] for i in logit_list])
pred_label = np.argmax(pred_test,axis=1)

In [51]:
test_data = {'path':X_test,'label':pred_label}
test_df = pd.DataFrame(test_data)