# Histopathologic Cancer Detection

## Libraries

In [None]:
import numpy as np 
import pandas as pd 
from glob import glob 
from skimage.io import imread 
import os
import shutil
import matplotlib.pyplot as plt
from sklearn.metrics import roc_curve, auc, roc_auc_score
from sklearn.model_selection import train_test_split
from keras.preprocessing.image import ImageDataGenerator
from keras.applications.nasnet import NASNetMobile
from keras.applications.xception import Xception
from keras.utils.vis_utils import plot_model
from keras.layers import Dropout, Flatten, Dense, GlobalAveragePooling2D, Average, Input, Concatenate, GlobalMaxPooling2D
from keras.models import Model
from keras.callbacks import CSVLogger, ModelCheckpoint
from keras.optimizers import Adam
#!pip install livelossplot
from livelossplot import PlotLossesKeras
import cv2

#VGG16
from keras.applications.vgg16 import VGG16
from keras.preprocessing import image
from keras.applications.vgg16 import preprocess_input

## Data Preparation

In [None]:
input_dir = '/Users/ACER/Desktop/hpc_dataset/'
training_dir = input_dir + 'train/'

for img in os.listdir(training_dir):
    img_array = cv2.imread(os.path.join(training_dir,img)) 
    plt.imshow(img_array,cmap="gray")#what is cmap?
    plt.show()
    break

In [None]:
# Output files
TRAINING_LOGS_FILE = "training_logs.csv"
MODEL_SUMMARY_FILE = "model_summary.txt"
MODEL_PLOT_FILE = "model_plot.png"
MODEL_FILE = "model.h5"
TRAINING_PLOT_FILE = "training.png"
VALIDATION_PLOT_FILE = "validation.png"
ROC_PLOT_FILE = "roc.png"
KAGGLE_SUBMISSION_FILE = "kaggle_submission.csv"

In [None]:
# Hyper-parameters
SAMPLE_COUNT = 85000
TRAINING_RATIO = 0.9
IMAGE_SIZE = 96
EPOCHS = 10
BATCH_SIZE = 192
VERBOSITY = 1
TESTING_BATCH_SIZE = 5000

In [None]:
# Labels. source:Seth Adams
df = pd.read_csv('/Users/ACER/Desktop/hpc_dataset/train_labels.csv')
df.set_index('fname', inplace=True)

# Sentdex
training_data = []

def build_training_data():
    for img in os.listdir(training_dir):
        img_array = cv2.imread(os.path.join(training_dir,img)) 
        label = df.at[img_array, 'label']
        training_data.append(img_array)
        labels.append(classes.index(label))
    
    return training_data, labels

In [None]:
def VGG16_model:
    base_model = VGG16(include_top=False,
                       input_shape = (96,96,3),
                       weights = 'imagenet')
    model = Sequential()
    model.add(base_model)
    model.add(Flatten())
    model.add(Dense(256,activation='relu'))
    model.add(Dropout(0.5))
    model.add(Dense(128,activation='sigmoid'))
    model.add(Dense(2,activation='softmax'))
    model.summary()
    model.compile(loss='categorical_crossentropy', 
                  optimizer=keras.optimizers.Adam(lr=0.001), 
                  metrics=['acc'] )
    return model

In [None]:
X, y = build_training_data()
y_flat = np.argmax(y, axis=1) #return the index value
model = VGG16_model()

class_weight = compute_class_weight('balanced', 
                                    np.unique(y_flat),
                                    y_flat)

checkpoint = ModelCheckpoint(config.model_path, monitor='val_acc', verbose=1, mode='max',
                             save_best_only=True, save_weights_only=False, period=1)

history = model.fit(X, y, epochs=1000, batch_size=200, 
          shuffle=True, validation_split=0.1,
          callbacks = [checkpoint])

model.save(config.model_path)