In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load in 

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the "../input/" directory.
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# Any results you write to the current directory are saved as output.

In [None]:
data = pd.read_csv('/kaggle/input/histopathologic-cancer-detection/train_labels.csv',dtype=str)

In [None]:
from sklearn.model_selection import train_test_split
train,valid = train_test_split(data,test_size=0.2)

In [None]:
def add_tif(val):
    return val+'.tif'

In [None]:
train['id'] = train['id'].apply(add_tif)
valid['id'] = valid['id'].apply(add_tif)

In [None]:
from keras.preprocessing.image import ImageDataGenerator
train_datagen = ImageDataGenerator(
        rescale=1./255,
        shear_range=0.2,
        zoom_range=0.2,
        horizontal_flip=True)

test_datagen = ImageDataGenerator(rescale=1./255)

In [None]:
train_generator = train_datagen.flow_from_dataframe(
    dataframe = train,
    directory='/kaggle/input/histopathologic-cancer-detection/train',
    x_col='id',
    y_col='label',
    batch_size=64,
    shuffle=True,
    class_mode='binary',
    target_size=(96,96))

In [None]:
valid_generator=train_datagen.flow_from_dataframe(
    dataframe=valid,
    directory='/kaggle/input/histopathologic-cancer-detection/train',
    x_col='id',
    y_col='label',
    batch_size=64,
    shuffle=True,
    class_mode='binary',
    target_size=(96,96))

In [None]:
from keras.applications.resnet50 import ResNet50
res_50_model = ResNet50(weights='imagenet', include_top=False)

In [None]:
from keras.applications.resnet50 import ResNet50
from keras.models import Sequential
from keras import layers
       
model = Sequential()
add_layer = ResNet50(
    weights='imagenet',
    include_top=False,
    input_shape=(96,96,3)
)
model.add(add_layer)
model.add(layers.Flatten())
model.add(layers.Dense(256, use_bias=False))
model.add(layers.BatchNormalization())
model.add(layers.Activation("relu"))
model.add(layers.Dense(64, use_bias=False))
model.add(layers.BatchNormalization())
model.add(layers.Activation("relu"))
model.add(layers.Dropout(0.5))
model.add(layers.Dense(1, activation = "sigmoid"))

In [None]:
add_layer.Trainable=True

set_trainable=False
for layer in add_layer.layers:
    if layer.name == 'res5a_branch2a':
        set_trainable = True
    if set_trainable:
        layer.trainable = True
    else:
        layer.trainable = False

In [None]:
from keras import optimizers
model.compile(optimizer='adam', loss = "binary_crossentropy", metrics=["accuracy"])

In [None]:
from keras.callbacks import EarlyStopping, ReduceLROnPlateau
earlystopper = EarlyStopping(monitor='val_loss', patience=2, verbose=1, restore_best_weights=True)
reducel = ReduceLROnPlateau(monitor='val_loss', patience=1, verbose=1, factor=0.1)

In [None]:
model.summary()

In [None]:
history = model.fit_generator(train_generator, 
                    validation_data=valid_generator,

                    epochs=16,
                              
                   callbacks=[reducel, earlystopper])

In [None]:
img_test_path = '/kaggle/input/histopathologic-cancer-detection/test'


In [None]:
from glob import glob 

test = pd.DataFrame({'path': glob(os.path.join(img_test_path,'*.tif'))})
test['id'] = test.path.map(lambda x: x.split('/')[3].split(".")[0])

In [None]:
from skimage.io import imread
test['image'] = test['path'].map(imread)
test_images = np.stack(test.image, axis = 0)
test_images.shape

In [None]:
import gc
gc.collect()

In [None]:
predicted_labels =  [model.predict(np.expand_dims(tensor, axis=0))[0][0] for tensor in test_images]

In [None]:
predictions = np.array(predicted_labels)
test['label'] = predictions
submission = test[["id", "label"]]


In [None]:
from IPython.display import HTML
import pandas as pd
import numpy as np
import base64

# function that takes in a dataframe and creates a text link to  
# download it (will only work for files < 2MB or so)
def create_download_link(df, title = "Download CSV file", filename = "data.csv"):  
    csv = df.to_csv()
    b64 = base64.b64encode(csv.encode())
    payload = b64.decode()
    html = '<a download="{filename}" href="data:text/csv;base64,{payload}" target="_blank">{title}</a>'
    html = html.format(payload=payload,title=title,filename=filename)
    return HTML(html)

# create a random sample dataframe
df = pd.DataFrame(np.random.randn(50, 4), columns=list('ABCD'))

# create a link to download the dataframe
create_download_link(submission)