In [None]:
import os
import numpy as np
import pandas as pd
import time
import keras as k 
from keras_preprocessing.image import ImageDataGenerator
from keras.models import Sequential
from keras.applications import vgg16
from keras.models import Model
from keras.models import Sequential
from keras.layers import Dense, Activation, Flatten, Dropout,InputLayer
from keras.layers import Conv2D, MaxPooling2D
from keras import regularizers, optimizers
from keras.metrics import categorical_accuracy
from keras.callbacks import EarlyStopping, ReduceLROnPlateau

In [None]:
np.random.seed(1)

df = pd.read_csv(r'C:\Users\casocha\Downloads\histopathologic-cancer-detection/train_labels.csv')

def append_ext(fn):
    return fn+".tif"

df["id"]=df["id"].apply(append_ext)
                   

df["label"]=df["label"].astype(str)

train_path = r'C:\Users\casocha\Downloads\histopathologic-cancer-detection\train'
valid_path = r'C:\Users\casocha\Downloads\histopathologic-cancer-detection\train'

testdf= pd.read_csv(r'C:\Users\casocha\Downloads\histopathologic-cancer-detection/sample_submission.csv')

def append_ext2(fn):
    return r'C:\Users\casocha\Downloads\histopathologic-cancer-detection\test/' + fn+".tif"

testdf["id"]=testdf["id"].apply(append_ext2)
                   
                   
testdf = testdf.drop(['label'],axis=1)

test_path = r'C:\Users\casocha\Downloads\histopathologic-cancer-detection\test/'

In [None]:
test_datagen = ImageDataGenerator(rescale=1./255)

test_generator=test_datagen.flow_from_dataframe(
dataframe=testdf,
directory="test_path",
x_col="id",
y_col=None,
batch_size=64,
seed=1,
shuffle=False,
class_mode=None,
use_mutiprocessing = True,
target_size=(96,96))

train_datagen = ImageDataGenerator(
       # horizontal_flip=True,
       #vertical_flip=True,
       #brightness_range=[0.5, 1.5],
       #fill_mode='reflect',                               
        #rotation_range=15,
        rescale=1./255,
        #shear_range=0.2,
        #zoom_range=0.2
        validation_split=0.15
    
)


train_generator = train_datagen.flow_from_dataframe(
                dataframe=df,
                directory=train_path,
                x_col = 'id',
                y_col = 'label',
                has_ext=False,
                subset='training',
                target_size=(96, 96),
                batch_size=64,
                seed=1,
                class_mode='binary',
                use_mutiprocessing = True
                )

validation_generator = train_datagen.flow_from_dataframe(
                dataframe=df,
                directory=valid_path,
                x_col = 'id',
                y_col = 'label',
                has_ext=False,
                subset='validation', # This is the trick to properly separate train and validation dataset
                target_size=(96, 96),
                batch_size=64,
                seed=1,
                shuffle=False,
                class_mode='binary',
                use_mutiprocessing = True
                )

In [None]:
input_shape = (96, 96, 3)

vgg_model = vgg16.VGG16(include_top=False, weights='imagenet', 
                                     input_shape=input_shape)

In [None]:
vgg_model.trainable = False
for layer in vgg_model.layers:
    layer.trainable = False

In [None]:
pd.set_option('max_colwidth', -1)
layers = [(layer, layer.name, layer.trainable) for layer in vgg_model.layers]
pd.DataFrame(layers, columns=['Layer Type', 'Layer Name', 'Layer Trainable'])  

In [None]:
model = Sequential()
model.add(vgg_model)
model.add(Flatten())
model.add(Dense(64,activation='relu'))
model.add(Dense(1,activation = 'sigmoid'))
model.summary()

In [None]:
model.compile(loss='binary_crossentropy',
              optimizer=optimizers.RMSprop(lr=1e-4),
              metrics=['accuracy'])

In [None]:
STEP_SIZE_TRAIN=train_generator.n//train_generator.batch_size
STEP_SIZE_VALID=validation_generator.n//validation_generator.batch_size
STEP_SIZE_TEST=test_generator.n//test_generator.batch_size

earlystopper = EarlyStopping(monitor='val_loss', patience=2, verbose=1, restore_best_weights=True)
reducel = ReduceLROnPlateau(monitor='val_loss', patience=1, verbose=1, factor=0.1)

start = time.time()
history = model.fit_generator(generator=train_generator,
                    steps_per_epoch=STEP_SIZE_TRAIN,
                    validation_data=validation_generator,
                    validation_steps=STEP_SIZE_VALID,
                    epochs=6,callbacks=[reducel, earlystopper]
)

end = time.time()
print("Training time: ", (end - start)/60, " minutes")

In [None]:
model.evaluate_generator(generator=validation_generator,
steps=STEP_SIZE_TEST)

In [None]:
import matplotlib.pyplot as plt
plt.figure(figsize=(12,6))
# summarize history for accuracy
plt.subplot(121)
plt.plot(history.history['acc'])
plt.plot(history.history['val_acc'])
plt.title('Transfer Learning')
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.legend(['train', 'validation'], loc='upper left')
# summarize history for loss
plt.subplot(122)
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('Transfer Learning')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'validation'], loc='upper right')
plt.show()