In [None]:
import cv2 
import imageio
import keras
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd 
import random
import scipy.ndimage as ndi
import seaborn as sns; sns.set()
import tensorflow as tf

# keras packages
from keras import optimizers, regularizers
from keras.callbacks import ModelCheckpoint 
from keras_preprocessing.image import ImageDataGenerator
from keras.layers import Activation, BatchNormalization, Conv2D
from keras.layers import Dense, Dropout, Flatten
from keras.layers import AveragePooling2D, MaxPooling2D 
from keras.models import Sequential

In [None]:
def append_ext(fn):
    return fn+".tif"

# create training dataframe, add file extentions
traindf=pd.read_csv("/kaggle/input/histopathologic-cancer-detection/train_labels.csv",
                    dtype=str)
traindf["id"]=traindf["id"].apply(append_ext)
# throw in random shifts and flips 
# to make sure model has objects in different placements
datagen = ImageDataGenerator(rescale=1./255.,
                           validation_split=0.25,
                           width_shift_range=4.0,
                           height_shift_range=4.0,
                           horizontal_flip=True,
                           vertical_flip=True 
                          )
# used this tutorial to figure out flow_from_dataframe  
# https://medium.com/@vijayabhaskar96/tutorial-on-keras-flow-from-dataframe-1fd4493d237c
train_generator=datagen.flow_from_dataframe(
                dataframe=traindf,
                directory="/kaggle/input/histopathologic-cancer-detection/train/",
                x_col="id",
                y_col="label",
                subset="training",
                batch_size=32,
                seed=42,
                shuffle=True,
                class_mode="categorical",
                target_size=(96,96))

valid_generator=datagen.flow_from_dataframe(
                dataframe=traindf,
                directory="/kaggle/input/histopathologic-cancer-detection/train/",
                x_col="id",
                y_col="label",
                subset="validation",
                batch_size=14,
                seed=42,
                shuffle=True,
                class_mode="categorical",
                target_size=(96,96))


# create test dataframe, add file extensions 
testdf=pd.read_csv("/kaggle/input/histopathologic-cancer-detection/sample_submission.csv",
                   dtype=str)
testdf["id"]=testdf["id"].apply(append_ext)

test_datagen = ImageDataGenerator(rescale=1./255.,
                           validation_split=0.25,
                           width_shift_range=4.0,
                           height_shift_range=4.0,
                           horizontal_flip=True,
                           vertical_flip=True 
                          )

test_generator=test_datagen.flow_from_dataframe(
               dataframe=testdf,
               directory="/kaggle/input/histopathologic-cancer-detection/test/",    
               x_col="id",
               y_col="label",
               batch_size = 2,
               seed=42,
               shuffle=False,
               class_mode="categorical",
               target_size=(96,96))


In [None]:
model = Sequential() 

model.add(Conv2D(16, kernel_size=3, activation='relu',
                 input_shape=(96,96,3),
                 padding='same'))
model.add(Conv2D(16, kernel_size=3, activation='relu',
                 padding='same'))
model.add(MaxPooling2D(2))
 

model.add(Conv2D(32, kernel_size=3, activation='relu',
                 padding='same'))
model.add(Conv2D(32, kernel_size=3, activation='relu',
                 padding='same'))
model.add(MaxPooling2D(2))


model.add(Conv2D(64, kernel_size=3, activation='relu',
                 padding='same'))
model.add(Conv2D(64, kernel_size=3, activation='relu',
                 padding='same'))
model.add(MaxPooling2D(2))

model.add(Flatten())
model.add(Dense(512))
model.add(Activation('relu'))
model.add(Dropout(0.5))
model.add(Dense(2, activation='sigmoid'))

model.summary()


In [None]:
model.compile(optimizer='adam', 
              loss='binary_crossentropy',
              metrics=['binary_accuracy', 'AUC'])

In [None]:
batch_size = 14
STEP_SIZE_TRAIN=train_generator.n//train_generator.batch_size
STEP_SIZE_VALID=valid_generator.n//valid_generator.batch_size
STEP_SIZE_TEST=test_generator.n//test_generator.batch_size
model.fit_generator(generator=train_generator,
                    steps_per_epoch=STEP_SIZE_TRAIN,
                    validation_data=valid_generator,
                    validation_steps=STEP_SIZE_VALID,
                    epochs=10
)

STEP_SIZE_TEST=test_generator.n//test_generator.batch_size

test_generator.reset()
pred=model.predict_generator(test_generator,
                             steps=STEP_SIZE_TEST,
                             verbose=2)

predicted_class_indices=np.argmax(pred,axis=1)

labels = (train_generator.class_indices)
labels = dict((v,k) for k,v in labels.items())
predictions = [labels[k] for k in predicted_class_indices]

filenames=test_generator.filenames

results=pd.DataFrame({"Filename":filenames,
                      "Predictions":predictions})
results.to_csv("results.csv",index=False)

In [None]:
model.evaluate_generator(generator=valid_generator,
steps=STEP_SIZE_TEST)