In [None]:
import os
from datetime import datetime
import numpy as np
import pandas as pd

from keras_preprocessing.image import ImageDataGenerator
from keras.models import Sequential, Model
from keras import backend as K

from keras.layers import Input, Dense, Activation, ZeroPadding2D, BatchNormalization, Flatten, Conv2D
from keras.layers import AveragePooling2D, MaxPooling2D, Dropout, GlobalMaxPooling2D, GlobalAveragePooling2D
from keras.preprocessing import image
from keras.utils import layer_utils
from keras.utils.data_utils import get_file
from keras.applications.imagenet_utils import preprocess_input
from keras import optimizers
from keras.callbacks import EarlyStopping, ReduceLROnPlateau, ModelCheckpoint

from IPython.display import SVG
from keras.utils.vis_utils import model_to_dot
from keras.utils import plot_model

import keras.backend as K
K.set_image_data_format('channels_last')
import matplotlib.pyplot as plt
from matplotlib.pyplot import imshow

import shutil

%matplotlib inline

In [None]:
# Input data files are available in the "../data/" directory.

print(os.listdir("../data"))
train_dir = "../data/train/"
test_dir = "../data/test"
valid_dir = "../data/train/"
print("Number of training examples: ", len(os.listdir('../data/train/0')) 
      + len(os.listdir('../data/train/1')))
print("Number of test examples: ", len(os.listdir('../data/test/images')))



In [None]:
# see training dataframe and distribution
df_train = pd.read_csv('../data/train_labels.csv',dtype=str)
print(df_train.head())
print("Labels' value distribution:\n",df_train['label'].value_counts())

df_test=pd.read_csv("../data/sample_submission.csv",dtype=str)

# add extension to image filenames 
def append_ext(fn): 
    return fn+".tif"
df_train["id"]=df_train["id"].apply(append_ext)
df_test["id"]=df_test["id"].apply(append_ext)

print(df_train.head())

In [None]:
print(len(os.listdir('../data/train/0')))
print(len(os.listdir('../data/train/1')))
print(len(os.listdir('../data/train')))
print(len(os.listdir('../data/test/images')))

In [None]:
# image shape
img = plt.imread("../data/train/0/"+df_train.iloc[0]['id'])
print('Images shape', img.shape)

In [None]:
# visualize some images from test dataset
for i in range(3):
    img = plt.imread("../data/test/images/"+df_test.iloc[i]['id'])
    print(df_train.iloc[i]['label'])
    plt.imshow(img)
    plt.show()

In [None]:
df_train.head()

In [None]:
datagen = ImageDataGenerator(
       horizontal_flip=True,
       vertical_flip=True,
       brightness_range=[0.5, 1.5],
       fill_mode='reflect',                               
       rotation_range=15,
       rescale=1./255, # normalize image vectors
       shear_range=0.2,
       zoom_range=0.2,
       validation_split=0.15
       )

train_data = datagen.flow_from_directory(
                '../data/train/',
                target_size=(96, 96),
                classes=['0', '1'],
                batch_size=64,
                shuffle=True,    
                subset='training',
                class_mode='binary'
                )
validation_data = datagen.flow_from_directory(
                '../data/train/',
                target_size=(96, 96),
                classes=['0', '1'],
                batch_size=64,
                shuffle=False,    
                subset='validation',
                class_mode='binary'
                )

test_datagen = ImageDataGenerator(rescale=1./255)


In [None]:
# let's see how the image tensor looks like
validation_data[1]

In [None]:
# model definition
my_kernel_size = (3,3)
my_pool_size= (2,2)
dropout_conv = 0.3
dropout_dense = 0.3

model = Sequential()
model.add(Conv2D(filters = 16, kernel_size = my_kernel_size, padding = 'same', activation = 'relu', input_shape = (96, 96, 3)))
model.add(Conv2D(filters = 16, kernel_size = my_kernel_size, padding = 'same', use_bias=False))
model.add(BatchNormalization())
model.add(Activation("relu"))
model.add(Conv2D(filters = 16, kernel_size = my_kernel_size, padding = 'same', use_bias=False))
model.add(BatchNormalization())
model.add(Activation("relu"))
model.add(MaxPooling2D(pool_size = my_pool_size))
model.add(Dropout(dropout_conv))

model.add(Conv2D(filters = 32, kernel_size = my_kernel_size, padding = 'same', use_bias=False))
model.add(BatchNormalization())
model.add(Activation("relu"))
model.add(Conv2D(filters = 32, kernel_size = my_kernel_size, padding = 'same', use_bias=False))
model.add(BatchNormalization())
model.add(Activation("relu"))
model.add(Conv2D(filters = 32, kernel_size = my_kernel_size, padding = 'same', use_bias=False))
model.add(BatchNormalization())
model.add(Activation("relu"))
model.add(MaxPooling2D(pool_size = my_pool_size))
model.add(Dropout(dropout_conv))

model.add(Conv2D(filters = 64, kernel_size = my_kernel_size, padding = 'same', use_bias=False))
model.add(BatchNormalization())
model.add(Activation("relu"))
model.add(Conv2D(filters = 64, kernel_size = my_kernel_size, padding = 'same', use_bias=False))
model.add(BatchNormalization())
model.add(Activation("relu"))
model.add(Conv2D(filters = 64, kernel_size = my_kernel_size, padding = 'same', use_bias=False))
model.add(BatchNormalization())
model.add(Activation("relu"))
model.add(MaxPooling2D(pool_size = my_pool_size))
model.add(Dropout(dropout_conv))

model.add(Conv2D(filters = 128, kernel_size = my_kernel_size, padding = 'same', use_bias=False))
model.add(BatchNormalization())
model.add(Activation("relu"))
model.add(Conv2D(filters = 128, kernel_size = my_kernel_size, padding = 'same', use_bias=False))
model.add(BatchNormalization())
model.add(Activation("relu"))
model.add(Conv2D(filters = 128, kernel_size = my_kernel_size, padding = 'same', use_bias=False))
model.add(BatchNormalization())
model.add(Activation("relu"))
model.add(MaxPooling2D(pool_size = my_pool_size))
model.add(Dropout(dropout_conv))

model.add(Flatten())
model.add(Dense(128, use_bias=False))
model.add(BatchNormalization())
model.add(Activation("relu"))
model.add(Dropout(dropout_dense))
model.add(Dense(1, activation = 'sigmoid'))


model.summary()

In [None]:
# training the model
model.compile(optimizer= optimizers.Adam(learning_rate=0.01), loss='binary_crossentropy', metrics=['accuracy'])


In [None]:
# Get the labels that are associated with each index
print(validation_data.class_indices)

In [None]:
# datetime object containing current date and time
now = datetime.now()
dt_string = now.strftime("%d%m%Y_%H%M%S")
print("date and time =", dt_string)
filepath = "model_"+ dt_string +".h5"
print(filepath)

In [None]:
checkpoint = ModelCheckpoint(filepath, monitor='val_accuracy', verbose=1, 
                             save_best_only=True, mode='auto')

reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.3, patience=2, 
                                   verbose=1, mode='auto', min_lr=0.00001)
                              
                              
callbacks_list = [checkpoint, reduce_lr]


In [None]:

STEP_SIZE_TRAIN=train_data.n//train_data.batch_size
STEP_SIZE_VALID=validation_data.n//validation_data.batch_size

history = model.fit(train_data,
            steps_per_epoch=STEP_SIZE_TRAIN,
            epochs=15,
            validation_data=validation_data,
            validation_steps=STEP_SIZE_VALID,
            verbose = 1,
            callbacks = callbacks_list)

In [None]:
history


In [None]:
model.metrics_names

In [None]:
model.weights

In [None]:
test_data = test_datagen.flow_from_directory('../data/test/',
                                        target_size=(96, 96),
                                        batch_size=1,
                                        class_mode='binary',
                                        shuffle=False)


In [None]:
from glob import glob
from skimage.io import imread

test_files = glob(os.path.join(test_dir + '/images','*.tif'))
submission = pd.DataFrame()
file_batch = 5000
max_idx = len(test_files)


In [None]:
test_files[1].split('/')[-1].split('\\')[-1].split(".")[0]

In [None]:
for idx in range(0, max_idx, file_batch):
    print("Indexes: %i - %i"%(idx, idx+file_batch))
    test_df = pd.DataFrame({'path': test_files[idx:idx+file_batch]})
    test_df['id'] = test_df.path.map(lambda x: x.split('/')[-1].split('\\')[-1].split(".")[0])
    test_df['image'] = test_df['path'].map(imread)
    K_test = np.stack(test_df["image"].values)
    K_test = (K_test - K_test.mean()) / K_test.std()
    predictions = model.predict(K_test)
    test_df['label'] = predictions
    submission = pd.concat([submission, test_df[["id", "label"]]])
submission.head()

In [None]:
submission.tail()

In [None]:
submission.label.max()

In [None]:
submission.to_csv("../submission/submission_"+dt_string+".csv", index = False, header = True)

In [None]:

val_loss, val_acc = model.evaluate(validation_data)

print('val_loss:', val_loss)
print('val_acc:', val_acc)

In [None]:
predictions = model.predict(test_data, verbose=1)

In [None]:
print(predictions.shape)
print(test_data.class_indices)

In [None]:
predictions

In [None]:
test_labels = test_data.classes

In [None]:
from numpy.random import seed
seed(101)
#from tensorflow import set_random_seed
#set_random_seed(101)

import pandas as pd
import numpy as np


import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.layers import Conv2D, MaxPooling2D
from tensorflow.keras.layers import Dense, Dropout, Flatten, Activation
from tensorflow.keras.models import Sequential
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau, ModelCheckpoint
from tensorflow.keras.optimizers import Adam

import os
import cv2

from sklearn.utils import shuffle
from sklearn.metrics import confusion_matrix
from sklearn.model_selection import train_test_split
import itertools
import shutil
import matplotlib.pyplot as plt
%matplotlib inline
cm = confusion_matrix(test_labels, predictions.argmax(axis=1))
cm_plot_labels = ['no_tumor_tissue', 'has_tumor_tissue']

plot_confusion_matrix(cm, cm_plot_labels, title='Confusion Matrix')
