# Histopathologic Cancer Detection

# Libraries

In [None]:
import tensorflow as tf
from tensorflow import keras
import numpy as np
import matplotlib.pyplot as plt
import os
import cv2
import pandas as pd 
from tqdm import tqdm

from keras.datasets import cifar10
from keras.preprocessing.image import ImageDataGenerator
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation, Flatten
from keras.layers import Conv2D, MaxPooling2D

#VGG16
from keras.applications.vgg16 import VGG16
from keras.preprocessing import image
from keras.applications.vgg16 import preprocess_input

from cfg import Config
import pickle

#from tensorflow.keras.layers import Conv2D, Flatten, MaxPooling2D, Dropout, BatchNormalization
from keras.preprocessing.image import ImageDataGenerator
from keras.applications.nasnet import NASNetMobile
from keras.applications.xception import Xception
from keras.utils.vis_utils import plot_model
from keras.layers import Dropout, Flatten, Dense, GlobalAveragePooling2D, Average, Input, Concatenate, GlobalMaxPooling2D
from keras.models import Model
from keras.callbacks import CSVLogger, ModelCheckpoint
from keras.optimizers import Adam

# Data Visualization

In [None]:
input_dir = '/Users/ACER/Desktop/hpc_dataset/'
training_dir = input_dir + 'train/'

for img in os.listdir(training_dir):
    img_array = cv2.imread(os.path.join(training_dir,img)) 
    plt.imshow(img_array,cmap="gray")#what is cmap?
    plt.show()
    break

In [None]:
print(img_array.shape)
print(img_array.dtype)
#img_array = img_array/255
#print(img_array.dtype)


# Data Loading

In [None]:
# Labels. source:Seth Adams
df = pd.read_csv('/Users/ACER/Desktop/hpc_dataset/train_labels.csv')
df.set_index('id', inplace=True)


def build_training_data():
    X = []
    y = []
    classes = [0,1]
    
    for img in tqdm(os.listdir(training_dir)[:2000]):
        img_array = cv2.imread(os.path.join(training_dir,img)) 
        img_array = img_array/255
        img = img.replace('.tif','')
        label = df.at[img, 'label']
        X.append(img_array)
        y.append(classes.index(label))
    X, y = np.array(X), np.array(y)
    
    return X, y

In [None]:
X, y = build_training_data()

In [None]:
import pickle

pickle_out_X = open("X.pickle","wb")
pickle.dump(X, pickle_out_X)
pickle_out_X.close()

pickle_out_y = open("y.pickle","wb")
pickle.dump(y, pickle_out_y)
pickle_out_y.close()

X = pickle.load(open("X.pickle","rb"))
y = pickle.load(open("y.pickle","rb"))

# Model Building and Training

In [None]:
def initial_model():
    model = Sequential()
    model.add(Conv2D(256, (3, 3)))
    model.add(Activation('relu'))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Flatten())  # this converts our 3D feature maps to 1D feature vectors
    model.add(Dense(64))
    model.add(Dense(2, activation='softmax'))
    model.compile(loss='binary_crossentropy',
              optimizer='adam',
              metrics=['accuracy'])
    return model

In [None]:
def VGG16_model():
    base_model = VGG16(include_top=False,
                       pooling='avg',
                       input_shape = (96,96,3),
                       weights = 'imagenet')

    model = Sequential()
    model.add(base_model)
    
    #model.add(Flatten())
    #model.add(Dense(256,activation='relu'))
    #model.add(Dropout(0.5))
    #model.add(Dense(128,activation='sigmoid'))
    model.add(Dense(1,activation='sigmoid'))
    model.layers[0].trainable=False
    model.summary()
    model.compile(loss='binary_crossentropy', 
                  optimizer='adam',
                  metrics=['acc'] )
    return model

In [None]:
# Train the model
model = VGG16_model()
model.fit(X, y, epochs=3, batch_size=32, validation_split=0.1)

# Save the best model
model_file = "model.h5"
ModelCheckpoint(model_file, monitor='val_acc',
                verbose=1, save_best_only=True,
                mode='max')
model.save(model_file)