# Histopathologic Cancer Detection

In [1]:
from distutils.version import LooseVersion
import warnings
import tensorflow as tf

# Check TensorFlow Version
assert LooseVersion(tf.__version__) >= LooseVersion('1.0'), 'Please use TensorFlow version 1.0 or newer.  You are using {}'.format(tf.__version__)
print('TensorFlow Version: {}'.format(tf.__version__))

# Check for a GPU
if not tf.test.gpu_device_name():
    warnings.warn('No GPU found. Please ensure you have installed TensorFlow correctly')
else:
    print('Default GPU Device: {}'.format(tf.test.gpu_device_name()))

TensorFlow Version: 2.1.0
Default GPU Device: /device:GPU:0


# Libraries

In [2]:
import tensorflow as tf
from tensorflow import keras
import numpy as np
import matplotlib.pyplot as plt
import os
import cv2
import pandas as pd 
from tqdm import tqdm

from keras.datasets import cifar10
from keras.preprocessing.image import ImageDataGenerator
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation, Flatten
from keras.layers import Conv2D, MaxPooling2D
from keras.preprocessing.image import ImageDataGenerator

#VGG16
from keras.applications.vgg16 import VGG16
from keras.preprocessing import image
from keras.applications.vgg16 import preprocess_input

from cfg import Config
import pickle

#from tensorflow.keras.layers import Conv2D, Flatten, MaxPooling2D, Dropout, BatchNormalization
from keras.preprocessing.image import ImageDataGenerator
from keras.applications.nasnet import NASNetMobile
from keras.applications.xception import Xception
from keras.utils.vis_utils import plot_model
from keras.layers import Dropout, Flatten, Dense, GlobalAveragePooling2D, Average, Input, Concatenate, GlobalMaxPooling2D
from keras.models import Model
from keras.callbacks import CSVLogger, ModelCheckpoint
from keras.optimizers import Adam

Using TensorFlow backend.


# Data Visualization

In [None]:
input_dir = '/Users/ACER/Desktop/hpc_dataset/'
visual_dir = input_dir + 'train/1'
train_dir = input_dir + 'train/'
val_dir = input_dir + 'val/'

for img in os.listdir(visual_dir):
    img_array = cv2.imread(os.path.join(visual_dir,img)).astype(np.float32)
    img_array = img_array/255.0
    plt.imshow(img_array,cmap="gray")#what is cmap?
    plt.show()
    break

# Data Loading

In [None]:
# Data augmentation
training_data_generator = ImageDataGenerator( rescale=1./255,
                                             horizontal_flip=True,
                                             vertical_flip=True,
                                             rotation_range=90,
                                             zoom_range=0.2, 
                                             width_shift_range=0.1,
                                             height_shift_range=0.1,
                                             shear_range=0.05,
                                             channel_shift_range=0.1)

In [None]:
# Data augmentation
val_data_generator = ImageDataGenerator(rescale=1./255)

In [None]:
# Data generation
training_generator = training_data_generator.flow_from_directory(train_dir,
                                                                 target_size=(96,96),
                                                                 batch_size=64,
                                                                 class_mode='binary')

In [None]:
validation_generator = val_data_generator.flow_from_directory(val_dir,
                                                              target_size=(96,96),
                                                              batch_size=64,
                                                              class_mode='binary')

# Model Building and Training

In [None]:
def VGG16_model():
    base_model = VGG16(include_top=False,
                       input_shape = (96,96,3),
                       weights = 'imagenet')

    for layer in base_model.layers[:-5]:
        layer.trainable = False
    
    for layer in base_model.layers:
        print(layer,layer.trainable)
    
    model = Sequential()
    model.add(base_model)
    
    model.add(Flatten())
    model.add(Dense(1024,activation='relu'))
    model.add(Dropout(0.4))
    model.add(Dense(1,activation='sigmoid'))
    #model.layers[0].trainable=False
    model.summary()
    model.compile(loss='binary_crossentropy', 
                  optimizer=Adam(lr=0.0002),
                  metrics=['acc'] )
    return model

In [None]:
model = VGG16_model()

In [None]:
model.fit_generator(training_generator,
                        steps_per_epoch=len(training_generator), 
                        validation_data=validation_generator,
                        validation_steps=len(validation_generator),
                        epochs=1,
                        verbose=1)
# Save the best model
model_file = "models/model_v1.h5"
ModelCheckpoint(model_file, monitor='val_acc',
                verbose=1, save_best_only=True,
                mode='max')
model.save(model_file)

## Model Testing

In [3]:
from keras.models import load_model
model = load_model("models/model_v1.h5")

from skimage.io import imread
from glob import glob
from tqdm import tqdm

testing_batch_size = 1000

testing_files = glob(os.path.join('/Users/ACER/Desktop/hpc_dataset/test/','*.tif'))
submission = pd.DataFrame()
for index in tqdm(range(0, len(testing_files), testing_batch_size)):
    data_frame = pd.DataFrame({'path': testing_files[index:index+testing_batch_size]})
    data_frame['id'] = data_frame.path.map(lambda x: x.split('/')[3].split(".")[0])
    data_frame['image'] = data_frame['path'].map(imread)
    images = np.stack(data_frame.image, axis=0)
    predicted_labels = [model.predict(np.expand_dims(image/255.0, axis=0))[0][0] for image in images]
    predictions = np.array(predicted_labels)
    data_frame['label'] = predictions
    submission = pd.concat([submission, data_frame[["id", "label"]]])
submission.to_csv("submission_ar_8.csv", index=False, header=True)

100%|██████████████████████████████████████████| 58/58 [40:54<00:00, 42.33s/it]
