In [1]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import os
from glob import glob
# %matplotlib inline
import matplotlib.pyplot as plt
from itertools import chain
import tensorflow as tf
import cv2

  from ._conv import register_converters as _register_converters


In [2]:
all_xray_df = pd.read_csv('./data/training_labels.csv')
all_image_paths = {os.path.basename(x): x for x in 
                   glob(os.path.join( './data/training', '*.png'))}
print('Scans found:', len(all_image_paths), ', Total Headers', all_xray_df.shape[0])
all_xray_df['path'] = all_xray_df['Image Index'].map(all_image_paths.get)
all_xray_df.sample(3)

Scans found: 4000 , Total Headers 4000


Unnamed: 0,Image Index,Finding Labels,path
812,00011542_000.png,No Finding,./data/training/00011542_000.png
987,00010684_008.png,No Finding,./data/training/00010684_008.png
3632,00009595_014.png,Effusion,./data/training/00009595_014.png


In [3]:
#all_image_paths

In [4]:
all_xray_df['Label'] = all_xray_df['Finding Labels'].map(lambda x: 0 if x=='No Finding' else 1.0)
#all_xray_df.sample(10)

In [5]:
all_xray_df.shape

(4000, 4)

# Data agumentation

In [6]:
import os
import glob

out_path = './data/agument/'

abnormal =  all_xray_df [ all_xray_df['Label'] == 1.0 ] 

if not os.path.exists(out_path):
  os.mkdir(out_path)

  for idx,row in abnormal.iterrows():
      img = cv2.imread(row['path'],cv2.IMREAD_COLOR)
      fimg = cv2.flip(img,1)
      cv2.imwrite(out_path+'F'+os.path.basename(row['path']),fimg)

column_name = ['Image Index','Finding Labels','path','Label']
data_list = []
flist = glob.glob(out_path+'*.png')
for f in flist:
    path_name = f
    base_name = os.path.basename(f)
    data_list.append([base_name,'Effusion',path_name,1.0])

flip_df = pd.DataFrame(columns=column_name,data=data_list)


all_xray_df = pd.concat([all_xray_df,flip_df])
all_xray_df.reset_index(inplace=True)


In [7]:
from sklearn.utils  import shuffle



normal =  all_xray_df [ all_xray_df['Label'] == 0.0 ]
abnormal =  all_xray_df [ all_xray_df['Label'] == 1.0 ]

X_train = pd.concat([normal.sample(frac=0.8, random_state=0),\
                     abnormal.sample(frac=0.8,random_state=0)], axis=0)
X_valid = all_xray_df.loc[~all_xray_df.index.isin(X_train.index)]

X_train = shuffle(X_train)
X_valid = shuffle(X_valid)

In [8]:
train_x = X_train['path'].values
train_y = X_train['Label'].values

train_image = []
for f in train_x:
    img = cv2.imread(f,cv2.IMREAD_GRAYSCALE)
    m,s = cv2.meanStdDev(img)
    std_img = (img- m)/(1.e-6 + s)
    
    train_image.append(std_img.reshape((64,64,1)))
    
train_label = np.column_stack([1-train_y,train_y])


valid_x = X_valid['path'].values
valid_y = X_valid['Label'].values

valid_image = []
for f in valid_x:
    img = cv2.imread(f,cv2.IMREAD_GRAYSCALE)
    m,s = cv2.meanStdDev(img)
    std_img = (img- m)/(1.e-6 + s)
    valid_image.append(std_img.reshape((64,64,1)))
    
valid_label = np.column_stack([1-valid_y,valid_y])

In [9]:
len(train_x),len(valid_x)

(3840, 960)

# CNN Model

In [10]:

tf.reset_default_graph()

images = tf.placeholder(tf.float32, [None, 64, 64, 1])
true_out = tf.placeholder(tf.float32, [ None, 2])
kprob = tf.placeholder_with_default(0.9,shape=())


xavi_init = tf.contrib.layers.xavier_initializer_conv2d()
w1 = tf.Variable(xavi_init(shape=[3,3,1,32]),name='conv1_w')
b1 = tf.Variable(tf.zeros([32]) ,name='conv1_b')

conv1 = tf.nn.conv2d(images, w1, strides=[1, 1, 1, 1], padding='SAME')
bias1 = tf.nn.bias_add(conv1, b1)
relu1 = tf.nn.relu(bias1)
relu1 = tf.nn.dropout(relu1,kprob)


w2 = tf.Variable(xavi_init(shape=[3,3,32,32]),name='conv2_w')
b2 = tf.Variable(tf.zeros([32]) ,name='conv2_b')

conv2 = tf.nn.conv2d(relu1, w2, strides=[1, 1, 1, 1], padding='SAME')
bias2 = tf.nn.bias_add(conv2, b2)
relu2 = tf.nn.relu(bias2)
relu2 = tf.nn.dropout(relu2,kprob)

# 32, 32
pool1 = tf.nn.max_pool(relu2,ksize=[1,2,2,1],strides=[1,2,2,1],padding='SAME')


w3 = tf.Variable(xavi_init(shape=[3,3,32,64]),name='conv3_w')
b3 = tf.Variable(tf.zeros([64]) ,name='conv3_b')

conv3 = tf.nn.conv2d(pool1, w3, strides=[1, 1, 1, 1], padding='SAME')
bias3 = tf.nn.bias_add(conv3, b3)
relu3 = tf.nn.relu(bias3)
relu3 = tf.nn.dropout(relu3,kprob)


w4 = tf.Variable(xavi_init(shape=[3,3,64,64]),name='conv4_w')
b4 = tf.Variable(tf.zeros([64]) ,name='conv4_b')

conv4 = tf.nn.conv2d(relu3, w4, strides=[1, 1, 1, 1], padding='SAME')
bias4 = tf.nn.bias_add(conv4, b4)
relu4 = tf.nn.relu(bias4)
relu4 = tf.nn.dropout(relu4,kprob)

w5 = tf.Variable(xavi_init(shape=[3,3,64,64]),name='conv5_w')
b5 = tf.Variable(tf.zeros([64]) ,name='conv5_b')

conv5 = tf.nn.conv2d(relu4, w5, strides=[1, 1, 1, 1], padding='SAME')
bias5 = tf.nn.bias_add(conv5, b5)
relu5 = tf.nn.relu(bias5)
relu5 = tf.nn.dropout(relu5,kprob)


# 16, 16 , 64
pool2 = tf.nn.max_pool(relu5,ksize=[1,2,2,1],strides=[1,2,2,1],padding='SAME')

fc1_reshape = tf.reshape(pool2, [-1, 16384])
fc1_w = tf.Variable(xavi_init(shape=[16384,1024]),name='fc1_w')
fc1_b = tf.Variable(tf.zeros(shape=[1024]),name='fc1_b')
fc1 = tf.nn.relu( tf.nn.bias_add( tf.matmul(fc1_reshape,fc1_w) , fc1_b) )
fc1 = tf.nn.dropout(fc1,kprob)



fc2_w = tf.Variable(xavi_init(shape=[1024,128]),name='fc2_w')
fc2_b = tf.Variable(tf.zeros(shape=[128]),name='fc2_b')
fc2 = tf.nn.relu( tf.nn.bias_add( tf.matmul(fc1,fc2_w) , fc2_b) )
fc2 = tf.nn.dropout(fc2,kprob)

fc3_w = tf.Variable(xavi_init(shape=[128,2]),name='fc3_w')
fc3_b = tf.Variable(tf.zeros(shape=[2]),name='fc3_b')
logit = tf.nn.bias_add( tf.matmul(fc2,fc3_w) , fc3_b)




# cost function , accuracy

In [11]:
cross_entropy = tf.nn.softmax_cross_entropy_with_logits_v2(labels=true_out,logits=logit)
total_loss = tf.reduce_mean(cross_entropy)

train_op = tf.train.AdamOptimizer(0.001).minimize(total_loss)

correct_prediction = tf.equal(tf.argmax(logit, 1),tf.argmax(true_out, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
    

In [14]:
#batch_size = 500
batch_size = 100

init_op = tf.global_variables_initializer()
with tf.Session() as sess:
    sess.run(init_op)

    for epoch in range(100):

        for start in range(0,len(train_image),batch_size):

            end = start + batch_size  if start + batch_size  < len(train_image)  else  len(train_image)

            image_iter = train_image[start:end]
            label_iter = train_label[start:end]

            sess.run(train_op, feed_dict={images: image_iter, true_out:label_iter})

            if end == len(train_image) :
                loss,accr,train_logit = sess.run([total_loss,accuracy,logit], feed_dict={images: image_iter, true_out:label_iter})
                print(epoch,loss,accr)

                
        
        tp= fn= fp= tn= vloss = 0.0
        
        for start in range(0,len(valid_image),batch_size):

            end = start + batch_size  if start + batch_size  < len(valid_image)  else  len(valid_image)

            image_iter = valid_image[start:end]
            label_iter = valid_label[start:end]
            
            val_loss,val_logit = sess.run([total_loss,logit], feed_dict={images: image_iter, true_out:label_iter,kprob:1.0})
            
            
            
            y_true = np.argmax(label_iter,axis=1)
            y_pred = np.argmax(val_logit,axis=1) 

            true_index =np.where(y_true == 1)
            flase_index = np.where(y_true == 0)


            tp += np.sum(y_pred[true_index])
            fn += np.sum(y_pred[true_index] == 0)
            fp += np.sum(y_pred[flase_index])
            tn += np.sum(y_pred[flase_index] == 0)
            
            vloss += loss

        recall = tp/(tp+fn+1e-7)
        precision = tp/(tp+fp+1e-7)
        f1score = 2*(recall*precision)/(recall+precision+1e-7)


        print('VALID ----- EPOCH : {}, LOSS : {}, ACCR : --'.format(epoch,vloss))
        print('RECALL :{} , PRECISION : {} , F1 SCORE :{}'.format(recall, precision, f1score ))
        print(tp+fn+fp+tn, tp+tn)

0 0.55787516 0.55
VALID ----- EPOCH : 0, LOSS : 5.578751564025879, ACCR : --
RECALL :0.3687499998847656 , PRECISION : 0.6145833330132379 , F1 SCORE :0.4609374529449511
960.0 684.0
1 0.48833308 0.725
VALID ----- EPOCH : 1, LOSS : 4.883330762386322, ACCR : --
RECALL :0.6937499997832031 , PRECISION : 0.7070063692015903 , F1 SCORE :0.7003154071968112
960.0 770.0
2 0.45540628 0.8
VALID ----- EPOCH : 2, LOSS : 4.554062783718109, ACCR : --
RECALL :0.7687499997597655 , PRECISION : 0.6473684208822714 , F1 SCORE :0.7028570930236769
960.0 752.0
3 0.40815917 0.825
VALID ----- EPOCH : 3, LOSS : 4.0815916657447815, ACCR : --
RECALL :0.7406249997685547 , PRECISION : 0.6970588233243944 , F1 SCORE :0.7181817680101045
960.0 774.0
4 0.38542658 0.8
VALID ----- EPOCH : 4, LOSS : 3.854265809059143, ACCR : --
RECALL :0.771874999758789 , PRECISION : 0.663978494445167 , F1 SCORE :0.7138727824459588
960.0 762.0
5 0.41598377 0.825
VALID ----- EPOCH : 5, LOSS : 4.1598376631736755, ACCR : --
RECALL :0.728124999772

KeyboardInterrupt: 