In [None]:
import os
import shutil
import warnings
warnings.filterwarnings('ignore')

import cv2
import matplotlib.pyplot as plt

import pandas as pd
import numpy as np
from numpy.random import seed
seed(123)

from sklearn.utils import shuffle
from sklearn.model_selection import train_test_split

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Dense, Dropout, Flatten, Activation
from tensorflow.keras.models import Sequential
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau, ModelCheckpoint
from tensorflow.keras.optimizers import Adam
tf.random.set_seed(123)

In [None]:
os.listdir('../input/histopathologic-cancer-detection')

In [None]:
print(len(os.listdir('../input/histopathologic-cancer-detection/train')))
print(len(os.listdir('../input/histopathologic-cancer-detection/test')))

In [None]:
df_train = pd.read_csv('../input/histopathologic-cancer-detection/train_labels.csv')
df_sample_submission = pd.read_csv('../input/histopathologic-cancer-detection/sample_submission.csv')
print(df_train.shape)

In [None]:
df_train.head()

In [None]:
plt.imshow(cv2.imread('../input/histopathologic-cancer-detection/train/f38a6374c348f90b587e046aac6079959adf3835.tif'))
plt.show()
plt.imshow(cv2.imread('../input/histopathologic-cancer-detection/train/c18f2d887b7ae4f6742ee445113fa1aef383ed77.tif'))
plt.show()

In [None]:
df_train['label'].value_counts()

In [None]:
df0 = df_train[df_train['label']==0].sample(500)
df1 = df_train[df_train['label']==1].sample(500)
df_data = pd.concat([df0, df1], axis=0).reset_index(drop=True)

df_data = shuffle(df_data)


df_data['label'].value_counts()

In [None]:
y = df_data['label']

df_train, df_val = train_test_split(df_data, test_size=0.20, stratify=y)

print(df_train.shape)
print(df_val.shape)

In [None]:
os.mkdir('base')
os.mkdir('base/train')
os.mkdir('base/val')
os.mkdir('base/train/0')
os.mkdir('base/train/1')
os.mkdir('base/val/0')
os.mkdir('base/val/1')

In [None]:
for image in list(df_train[df_train['label']==0]['id']):
    shutil.copyfile('../input/histopathologic-cancer-detection/train/'+image+'.tif', 'base/train/0/'+image+'.tif')

for image in list(df_train[df_train['label']==1]['id']):
    shutil.copyfile('../input/histopathologic-cancer-detection/train/'+image+'.tif', 'base/train/1/'+image+'.tif')
    
for image in list(df_val[df_val['label']==0]['id']):
    shutil.copyfile('../input/histopathologic-cancer-detection/train/'+image+'.tif', 'base/val/0/'+image+'.tif')
    
for image in list(df_val[df_val['label']==1]['id']):
    shutil.copyfile('../input/histopathologic-cancer-detection/train/'+image+'.tif', 'base/val/1/'+image+'.tif')

In [None]:
print(len(os.listdir('base/train/0')))
print(len(os.listdir('base/train/1')))
print(len(os.listdir('base/val/0')))
print(len(os.listdir('base/val/1')))

In [None]:
# Set up the generators
train_path = 'base/train'
valid_path = 'base/val'
test_path = '../input/histopathologic-cancer-detection/test'

num_train_samples = len(df_train)
num_val_samples = len(df_val)
train_batch_size = 10
val_batch_size = 10


train_steps = int(np.ceil(num_train_samples // train_batch_size))
val_steps = int(np.ceil(num_val_samples // val_batch_size))

In [None]:
datagen = ImageDataGenerator(rescale=1.0/255)

train_gen = datagen.flow_from_directory(train_path,
                                        target_size=(96,96),
                                        batch_size=train_batch_size,
                                        class_mode='categorical')

val_gen = datagen.flow_from_directory(valid_path,
                                        target_size=(96,96),
                                        batch_size=val_batch_size,
                                        class_mode='categorical')

# Note: shuffle=False causes the test dataset to not be shuffled
test_gen = datagen.flow_from_directory('../input/histopathologic-cancer-detection',
                                        target_size=(96,96),
                                        batch_size=1,
                                        classes=['test'],
                                        shuffle=False)

In [None]:
kernel_size = (3,3)
pool_size= (2,2)
first_filters = 64
second_filters = 128
third_filters = 256
fourth_filters = 512

dropout_conv = 0.5
dropout_dense = 0.5

model = Sequential()
model.add(Conv2D(first_filters, kernel_size, activation = 'relu',padding='same', input_shape = (96, 96, 3)))
model.add(Conv2D(first_filters, kernel_size, activation = 'relu',padding='same'))
model.add(MaxPooling2D(pool_size = pool_size)) 

model.add(Conv2D(second_filters, kernel_size, activation ='relu',padding='same'))
model.add(Conv2D(second_filters, kernel_size, activation ='relu',padding='same'))
model.add(MaxPooling2D(pool_size = pool_size))

model.add(Conv2D(third_filters, kernel_size, activation ='relu',padding='same'))
model.add(Conv2D(third_filters, kernel_size, activation ='relu',padding='same'))
model.add(Conv2D(third_filters, kernel_size, activation ='relu',padding='same'))
model.add(MaxPooling2D(pool_size = pool_size))

model.add(Conv2D(fourth_filters, kernel_size, activation ='relu',padding='same'))
model.add(Conv2D(fourth_filters, kernel_size, activation ='relu',padding='same'))
model.add(Conv2D(fourth_filters, kernel_size, activation ='relu',padding='same'))

model.add(Flatten())
model.add(Dense(4096, activation = "relu"))
model.add(Dropout(dropout_dense))
model.add(Dense(4096, activation = "relu"))
model.add(Dropout(dropout_dense))
model.add(Dense(2, activation = "softmax"))

model.summary()

In [None]:
model.compile(Adam(learning_rate=0.0001), loss='binary_crossentropy', 
              metrics=['AUC'])

In [None]:
print(val_gen.class_indices)

In [None]:
# filepath = "model.keras"
# checkpoint = ModelCheckpoint(filepath, monitor='val_accuracy', verbose=1, 
#                              save_best_only=True, mode='max')

# reduce_lr = ReduceLROnPlateau(monitor='val_accuracy', factor=0.5, patience=2, 
#                                    verbose=1, mode='max', min_lr=0.00001)
                              
                              
# callbacks_list = [checkpoint, reduce_lr]

history = model.fit(train_gen, 
                    validation_data=val_gen,
                    epochs=10, verbose=1)
# ,
#                    callbacks=callbacks_list)

In [None]:
tr_acc = history.history['AUC']
val_acc = history.history['val_AUC']

epoc = range(1, len(tr_acc) + 1)

plt.plot(epoc, tr_acc, label='Training acc')
plt.plot(epoc, val_acc, label='Validation acc')
plt.title('Accuracy')
plt.legend()
plt.show()

In [None]:
predictions = model.predict(test_gen, verbose=1)

In [None]:
predictions

In [None]:
df_preds = pd.DataFrame(predictions, columns=['0', '1'])

df_preds.head()

In [None]:
df_preds[df_preds['1']>0.5]

In [None]:
df_preds['file_names'] = test_gen.filenames

In [None]:
df_preds['id'] = df_preds['file_names'].str[5:-4]
df_preds[['id','1']].rename(columns={'1':'label'}).to_csv('submission.csv', columns=['id','label'],index=False) 

In [None]:
pd.read_csv('submission.csv')

In [None]:
shutil.rmtree('base')