In [1]:
import numpy as np
import matplotlib.pyplot as plt
import sys
import os
import cv2
from tqdm import tqdm
from IPython.display import display, Markdown

%pylab inline

Populating the interactive namespace from numpy and matplotlib


### Patient's data preprocessing and preparation

In [3]:
data_dir = os.path.join(os.getcwd(), 'dataset')
cancer_dir = os.path.join(data_dir, 'cancer')
fibro_dir = os.path.join(data_dir, 'fibro')

In [4]:
import shutil
    
def classdir_prepare(class_pathlist, class_destdir, class_name='Class None'):
    if os.path.exists(class_destdir):
        shutil.rmtree(class_destdir)
    os.makedirs(class_destdir)
    for fname in tqdm(class_pathlist, ascii=True, desc=class_name):
        if not os.path.exists(fname):
            continue
        shutil.copyfile(fname, os.path.join(class_destdir, os.path.basename(fname)))

### Telegram notifications

In [52]:
import telepot

bot_token = '305845736:AAFEWvma4up5MgyvioxLr8lKJWqbCYstUf4'
user_id = 77680768

telebot = telepot.Bot(bot_token)

### Learning with ConvNets using Keras framework

In [11]:
from keras import backend as KBackend

from keras.models import Sequential
from keras.layers import Dense, Activation, Dropout
from keras.layers import Conv2D, MaxPooling2D, Flatten, ZeroPadding2D
from keras.preprocessing.image import ImageDataGenerator
from keras.optimizers import SGD

from keras.preprocessing.image import array_to_img, img_to_array, load_img
from keras.models import model_from_json
from keras.callbacks import Callback, ModelCheckpoint, TensorBoard

from sklearn.model_selection import StratifiedKFold

print('Keras backend:', KBackend.backend())
print('Keras image format:', KBackend.image_data_format(), '\n')

Keras backend: tensorflow
Keras image format: channels_last 



In [67]:
random_seed = 27297
np.random_seed = 27297
batch_size = 16

train_data_generator = ImageDataGenerator(horizontal_flip=True, 
                                          vertical_flip=True, 
                                          rotation_range=90,
                                          fill_mode='nearest')
test_data_generator = ImageDataGenerator()

kfold = StratifiedKFold(n_splits=5, shuffle=True, random_state=random_seed)

cancer_list = np.array([os.path.join(cancer_dir, x) for x in os.listdir(cancer_dir)])
fibro_list = np.array([os.path.join(fibro_dir, x) for x in os.listdir(fibro_dir)])
cancer_list = cancer_list[:fibro_list.shape[0] + 300]

X = np.concatenate((cancer_list, fibro_list), axis=0)
y = np.concatenate((np.zeros(cancer_list.shape), np.ones(fibro_list.shape)), axis=0)

In [68]:
class TelegramTrainingLog(Callback):
    def on_epoch_end(self, epoch, logs={}):
        msg = 'epoch: {}\nloss: {}\nacc: {}\nval_loss: {}\nval_acc: {}'.format(
            epoch, logs.get('loss'), logs.get('acc'), logs.get('val_loss'), logs.get('val_acc'))
        try:
            telebot.sendMessage(user_id, msg)
        except Exception:
            None

class TextTrainingLog(Callback):
    def on_epoch_end(self, epoch, logs={}):
        msg = ' - epoch: {0}\n  + loss   : {1:.6f}, acc:   {2:.6f}\n  - val_loss: {3:.6f}, val_acc: {4:.6f}\n'.format(
            epoch, logs.get('loss'), logs.get('acc'), logs.get('val_loss'), logs.get('val_acc'))
        with open('cnn.log') as f:
            f.write(msg)


def cnn_generator() -> Sequential:
    model = Sequential()
        
    model.add(Conv2D(filters=32, kernel_size=(3, 3), strides=1, input_shape=(160, 160, 3)))
    model.add(Activation('relu'))            
    model.add(Conv2D(filters=32, kernel_size=(3, 3), strides=1))
    model.add(Activation('relu'))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    
    model.add(Conv2D(filters=48, kernel_size=(3, 3), strides=1))
    model.add(Activation('relu'))    
    model.add(Conv2D(filters=48, kernel_size=(3, 3), strides=1))
    model.add(Activation('relu'))    
    model.add(MaxPooling2D(pool_size=(2, 2)))
    
    model.add(Conv2D(filters=64, kernel_size=(5, 5), strides=1))
    model.add(Activation('relu'))    
    model.add(Conv2D(filters=64, kernel_size=(5, 5), strides=1))
    model.add(Activation('relu'))    
    model.add(MaxPooling2D(pool_size=(2, 2)))            
    
    model.add(Flatten())
    
    model.add(Dense(32))
    model.add(Activation('relu'))    
    model.add(Dropout(0.3))        
    
    model.add(Dense(32))
    model.add(Activation('relu'))
    model.add(Dropout(0.3))
    
    model.add(Dense(1))
    model.add(Activation('sigmoid'))

    #sgd = SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True)
    #model.compile(loss='binary_crossentropy', optimizer=sgd)
    
    model.compile(loss='binary_crossentropy',
                  optimizer='rmsprop',
                  metrics=['accuracy'])

    description = '32C3x2-48C3x2-64C5x2-F32-F32'
    return description, model


_, model = cnn_generator()
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_315 (Conv2D)          (None, 158, 158, 32)      896       
_________________________________________________________________
activation_450 (Activation)  (None, 158, 158, 32)      0         
_________________________________________________________________
conv2d_316 (Conv2D)          (None, 156, 156, 32)      9248      
_________________________________________________________________
activation_451 (Activation)  (None, 156, 156, 32)      0         
_________________________________________________________________
max_pooling2d_168 (MaxPoolin (None, 78, 78, 32)        0         
_________________________________________________________________
conv2d_317 (Conv2D)          (None, 76, 76, 48)        13872     
_________________________________________________________________
activation_452 (Activation)  (None, 76, 76, 48)        0         
__________

In [69]:
kfold_iteration = 0
for train_index, test_index in kfold.split(X, y):
    kfold_iteration += 1
    if kfold_iteration > 1:
        break
        
    display(Markdown('**KFold iteration #{}**'.format(kfold_iteration)))

    """
    cancer_trainlist = (X[train_index])[np.nonzero(1 - y[train_index])]
    fibro_trainlist = (X[train_index])[np.nonzero(y[train_index])]
    cancer_traindest = os.path.join(os.getcwd(), 'train', 'cancer')
    fibro_traindest = os.path.join(os.getcwd(), 'train', 'fibro')        
    classdir_prepare(cancer_trainlist, cancer_traindest, '[TRAIN] generating cancer')
    classdir_prepare(fibro_trainlist, fibro_traindest, '[TRAIN] generating fibro ')

    cancer_testlist = (X[test_index])[np.nonzero(1 - y[test_index])]
    fibro_testlist = (X[test_index])[np.nonzero(y[test_index])]
    cancer_testdest = os.path.join(os.getcwd(), 'test', 'cancer')
    fibro_testdest = os.path.join(os.getcwd(), 'test', 'fibro')
    classdir_prepare(cancer_testlist, cancer_testdest, '[TEST ] generating cancer')
    classdir_prepare(fibro_testlist, fibro_testdest, '[TEST ] generating fibro ')
    """
    
    train_generator = train_data_generator.flow_from_directory(
            directory=os.path.join(os.getcwd(), 'train'),
            target_size=(160, 160),
            classes=['cancer', 'fibro'],
            class_mode='binary',
            color_mode='rgb',
            seed=random_seed,
            batch_size=batch_size)
    
    test_generator = train_data_generator.flow_from_directory(
            os.path.join(os.getcwd(), 'test'),
            target_size=(160, 160),
            classes=['cancer', 'fibro'],
            class_mode='binary',
            color_mode='rgb',
            seed=random_seed,
            batch_size=batch_size)
    
    desc, model = cnn_generator()
    telegram_log = TelegramTrainingLog()
    tensorboard_log = TensorBoard(log_dir='log_tb', 
                                  histogram_freq=1, 
                                  write_graph=True)
    
    checkpoint_fpath = 'log_cnn/' + desc + \
        str(kfold_iteration) + '_E{epoch:02d}_L{val_loss:.2f}_A{val_acc:.2f}.hdf5'
        
    checkpoint = ModelCheckpoint(
        filepath=checkpoint_fpath,
        monitor='val_loss',
        verbose=0,
        save_best_only=False,
        save_weights_only=False,
        mode='auto',
        period=1
    )
    
    try:
        telebot.sendMessage(user_id, '{}\n{}\n{}{}\n{}'.format('**********************************', 
                                                               desc,
                                                               'Iteration: ', kfold_iteration,
                                                               '**********************************'))
    except Exception:
        None
                        
    model.fit_generator(
            generator=train_generator,
            steps_per_epoch=train_index.shape[0] // (batch_size),
            epochs=80,
            callbacks=[telegram_log, checkpoint, tensorboard_log],
            validation_data=test_generator,
            validation_steps=test_index.shape[0] // (batch_size))
        
    model.save_weights('log_cnn/{}_it{}.h5'.format(desc, kfold_iteration))

**KFold iteration #1**

Found 3024 images belonging to 2 classes.
Found 758 images belonging to 2 classes.
INFO:tensorflow:Summary name conv2d_321/kernel:0 is illegal; using conv2d_321/kernel_0 instead.
INFO:tensorflow:Summary name conv2d_321/bias:0 is illegal; using conv2d_321/bias_0 instead.
INFO:tensorflow:Summary name conv2d_322/kernel:0 is illegal; using conv2d_322/kernel_0 instead.
INFO:tensorflow:Summary name conv2d_322/bias:0 is illegal; using conv2d_322/bias_0 instead.
INFO:tensorflow:Summary name conv2d_323/kernel:0 is illegal; using conv2d_323/kernel_0 instead.
INFO:tensorflow:Summary name conv2d_323/bias:0 is illegal; using conv2d_323/bias_0 instead.
INFO:tensorflow:Summary name conv2d_324/kernel:0 is illegal; using conv2d_324/kernel_0 instead.
INFO:tensorflow:Summary name conv2d_324/bias:0 is illegal; using conv2d_324/bias_0 instead.
INFO:tensorflow:Summary name conv2d_325/kernel:0 is illegal; using conv2d_325/kernel_0 instead.
INFO:tensorflow:Summary name conv2d_325/bias:0 is illegal; using conv

KeyboardInterrupt: 