In [11]:
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import load_files
from keras.utils import np_utils
from glob import glob
% matplotlib inline

In [19]:
train_path = 'F:/Udacity/deep_learning/deep_learning_projects/cancer_detection/data/train'
validation_path = 'F:/Udacity/deep_learning/deep_learning_projects/cancer_detection/data/valid'
test_path = 'F:/Udacity/deep_learning/deep_learning_projects/cancer_detection/data/test'
def load_images(path):
    data = load_files(path)
    cancer_files = np.array(data['filenames'])
    cancer_targets = np_utils.to_categorical(np.array(data['target']),3)
    return cancer_files,cancer_targets
train_files,train_targets  = load_images(train_path)
valid_files,valid_targets  = load_images(validation_path)
test_files,test_targets  = load_images(test_path)

In [15]:
cancer_types = [item for item in glob('data/train/*')]
print('Number of cancer types',len(cancer_types))
print('Number of training files',len(train_files))
print('Number of validation files',len(valid_files))
print('Number of test files',len(test_files))

Number of cancer types 3
Number of training files 2000
Number of validation files 150
Number of test files 600


In [32]:
# image preprocessing
from keras.preprocessing import image
from tqdm import tqdm
def path_to_tensor(path):
    img = image.load_img(path,target_size=(512,512))
    x = image.img_to_array(img)
    return np.expand_dims(x,axis=0)
def paths_to_tensor(path):
    list_of_tensors = [path_to_tensor(img_path) for img_path in tqdm(path)]
    return np.vstack(list_of_tensors)

In [36]:
from PIL import ImageFile
ImageFile.LOAD_TRUNCATED_IMAGES=True

train_tensors = paths_to_tensor(train_files).astype('float32')/255








  0%|                                                                                         | 0/2000 [00:00<?, ?it/s]





  0%|                                                                                 | 3/2000 [00:00<01:18, 25.57it/s]





  0%|▏                                                                                | 4/2000 [00:00<02:12, 15.07it/s]





  0%|▏                                                                                | 5/2000 [00:00<02:57, 11.21it/s]





  0%|▎                                                                                | 7/2000 [00:00<03:29,  9.52it/s]





  0%|▎                                                                                | 8/2000 [00:00<04:53,  6.79it/s]





  0%|▎                                                                                | 9/2000 [00:01<06:12,  5.34it/s]





  1%|▍                                                                               | 11/2000 [00:01<05:14,  6.32it/s]

In [38]:
validation_tensor = paths_to_tensor(valid_files).astype('float32')/255







  0%|                                                                                          | 0/150 [00:00<?, ?it/s]





  1%|▌                                                                                 | 1/150 [00:00<00:55,  2.71it/s]





  1%|█                                                                                 | 2/150 [00:00<00:46,  3.15it/s]





  2%|█▋                                                                                | 3/150 [00:00<00:39,  3.72it/s]





  3%|██▏                                                                               | 4/150 [00:01<01:00,  2.41it/s]





  3%|██▋                                                                               | 5/150 [00:01<00:56,  2.55it/s]





  4%|███▎                                                                              | 6/150 [00:02<01:14,  1.95it/s]





  5%|███▊                                                                              | 7/150 [00:02<00:57,  2.47it/s]

In [39]:
test_tensor = paths_to_tensor(test_files).astype('float32')/255







  0%|                                                                                          | 0/600 [00:00<?, ?it/s]





  0%|▏                                                                                 | 1/600 [00:00<04:57,  2.01it/s]





  0%|▎                                                                                 | 2/600 [00:00<04:02,  2.46it/s]





  0%|▍                                                                                 | 3/600 [00:01<05:53,  1.69it/s]





  1%|▌                                                                                 | 4/600 [00:02<05:14,  1.89it/s]





  1%|▋                                                                                 | 5/600 [00:03<06:22,  1.56it/s]





  1%|▊                                                                                 | 6/600 [00:03<05:30,  1.80it/s]





  1%|▉                                                                                 | 7/600 [00:03<04:27,  2.22it/s]

In [64]:
train_tensors[0].shape

(512, 512, 3)

In [65]:
#creating weights
def create_weights(shape):
    return tf.Variable(tf.random_normal(shape))
def create_biases(size):
    return tf.Variable(tf.constant(0.05,shape=[size]))


In [62]:
def create_convolution_layer(input,num_input_channels,conv_filter_size,num_filters):
        weights = create_weights(shape=[conv_filter_size,conv_filter_size,num_input_channels,num_filters])
        biases = create_biases(num_filters)
        layer = tf.nn.conv2d(input=input,filter=weights,strides=[1,1,1,1],padding='SAME')
        layer += biases
        
        #creating maxpooling layer
        layer = tf.nn.max_pool(value=layer,ksize=[1,2,2,1],strides=[1,2,2,1],padding='SAME')
        layer = tf.nn.relu(layer)
        return layer
        

In [55]:
def create_fc_layer(input,num_inputs,num_outputs,use_relu=True):
    weights = create_weights(shape=[num_inputs,num_outputs])
    biases = create_biases(num_outputs)
    
    layer = tf.nn.matmul(input,weights) + biases
    
    if use_relu:
        layer = tf.nn.relu(layer)
    
    return layer

In [50]:
def create_flatten_layer(layer):
    layer_shape = layer.get_shape()
    num_features = layer_shape[1:4].num_elements()
    layer = tf.reshape(layer,[-1,num_features])
    
    return layer

In [53]:
''' # placeholders and input
x = tf.placeholder(tf.float32,shape=[None,img_size,img_size,num_channels], name='x')

y_true = tf.placeholder(tf.float32,shape=[None,num_classes],name='y_true')
y_true_cls = tf.argmax(y_ture,dimension=1)
'''

" # placeholders and input\nx = tf.placeholder(tf.float32,shape=[None,img_size,img_size,num_channels], name='x')\n\ny_true = tf.placeholder(tf.float32,shape=[None,num_classes],name='y_true')\ny_true_cls = tf.argmax(y_ture,dimension=1)\n"

In [54]:
#Network design
learning_rate = 0.01
epochs = 3
batch_size = 128
n_classes = 3
drop_out = 0.75

In [56]:
def conv_net(x,num_input_channels,conv_filter_size,num_filters,dropout):
    conv1 = create_convolution_layer(x,num_input_channels,conv_filter_size,num_filters)
    conv2 = create_convolution_layer(conv1,conv1.shape[3],conv_filter_size,64)
    conv3 = create_convolution_layer(conv2,conv2.shape[3],conv_filter_size,128)
    # layer flattening
    flat = create_flatten_layer(conv3)
    # fully connected layer
    fc1 = create_fc_layer(flat,flat.get_shape()[1:4].num_elements(),128,use_relu=False)
    fc2 = create_fc_layer(fc1,128,3,use_relu=False)
    y_pred = tf.nn.softmax(fc2,name='y_pred')
    #y_pred_cls = tf.argmax(y_pred,dimension=1)
    return y_pred
    

In [66]:
x = tf.placeholder(tf.float32,[None,512,512,3])
y = tf.placeholder(tf.float32,[None,n_classes])
keep_prob = tf.placeholder(tf.float32)

logits = conv_net(x,3,3,32,keep_prob)

cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=logits,labels=y))

optimizer = tf.train.GradientDescentOptimiser(learning_rate=learning_rate).minimize(cost)

correct_pred = tf.equal(tf.argmax(logits,1),tf.argmax(y,1))
accuracy = tf.reduce_mean(tf.cast(correct_pred,tf.float32))

init = tf.global_variables_initializer()

with tf.Session as sess:
    sess.run(init)
    for epoch in range(epochs):
        for batch in range(train_tensors.shape[0]//batch_size):
            batch_x= train_tensors.next_batch(batch_size)
            batch_y = train_targets.next_batch(batch_size)
            sess.run(optimizer,feed_dict={x:batch_x,y:batch_y,keep_prob:dropout})
            
            loss = sess.run(cost,feed_dict={x:batch_x,y:batch_y,keep_prob:1.})
            
            valid_acc = sess.run(accuracy,feed_dict={x:validation_tensor,y:valid_targets,keep_prob:1.})
            
            print('Epoch {:>2}, Batch {:>3} -'
                  'Loss: {:>10.4f} Validation Accuracy: {:.6f}'.format(
                epoch + 1,
                batch + 1,
                loss,
                valid_acc))
            
            test_acc = sess.run(accuracy,feed_dict={x:test_tensor,y:test_targets,keep_prob:1.})
            
            print('Testing Accuracy: {}'.format(test_acc))


TypeError: Expected binary or unicode string, got 3