##### Auteur: Antoine Cadiou

# Attention: pour fonctionner il faut récupérer les données sur ces liens: 
### images: https://www.kaggle.com/arroqc/siic-isic-224x224-images
### labels: https://www.kaggle.com/antocad/labels-siim-isic-224
#### et changer les liens de données dans: 4ème Box L2,  6ème Box L34 & 44 

In [None]:
import os, cv2
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import tensorflow as tf
from tensorflow import keras
from keras.preprocessing.image import ImageDataGenerator, load_img, array_to_img, img_to_array
from tensorflow.keras import datasets
from keras.models import Sequential, Model
from keras.layers import Conv2D,MaxPooling2D,Dense,Flatten,Dropout
from keras.layers.normalization import BatchNormalization
from tensorflow.keras.callbacks import ReduceLROnPlateau
from keras.regularizers import l2
from keras import optimizers, layers, regularizers

!pip install -U efficientnet
!pip install tensor-dash
import efficientnet.keras as efn
from tensordash.tensordash import Tensordash

In [None]:
CFG = dict(
    DEVICE = 'GPU',
    
    train_size = 0.8,
    random_seed = 42, #None ou int
    
    img_size = 224,
    epochs = 100,
    batch_size = 32,
    
    lr_start = 0.000006,
    lr_max = 0.00000145,
    lr_min = 0.000001,
    lr_rampup = 5,
    lr_sustain = 0,
    lr_decay = 0.85,
    optimizer = 'adam',
    label_smooth_fac  =   0.05,
    
    net_count = 1,
    tta_steps = 1,
)

In [None]:
if CFG['DEVICE'] == "TPU":
    print("connecting to TPU...")
    try:
        tpu = tf.distribute.cluster_resolver.TPUClusterResolver()
        print('Running on TPU ', tpu.master())
    except ValueError:
        print("Could not connect to TPU")
        tpu = None

    if tpu:
        try:
            print("initializing  TPU ...")
            tf.config.experimental_connect_to_cluster(tpu)
            tf.tpu.experimental.initialize_tpu_system(tpu)
            strategy = tf.distribute.experimental.TPUStrategy(tpu)
            print("TPU initialized")
        except _:
            print("failed to initialize TPU")
    else:
        CFG['DEVICE'] = "GPU"

if CFG['DEVICE'] != "TPU":
    print("Using default strategy for CPU and single GPU")
    strategy = tf.distribute.get_strategy()

if CFG['DEVICE'] == "GPU":
    print("Num GPUs Available: ", len(tf.config.experimental.list_physical_devices('GPU')))
    

AUTO     = tf.data.experimental.AUTOTUNE
REPLICAS = strategy.num_replicas_in_sync
print(f'REPLICAS: {REPLICAS}')

In [None]:
#on lit les données csv
train_original = pd.read_csv('../input/labels-siim-isic-224/train.csv', dtype=str)
#on construit un dataframe réduit, plus équilibré
part_true = train_original[train_original["target"] == '1']
part_false = train_original[train_original["target"] == '0'].sample(len(part_true) * 3)
train_balanced = pd.concat([part_true, part_false])
#on shuffle les lignes du dataframe
if CFG['random_seed']!=None:
    np.random.seed(CFG['random_seed'])
cols = train_balanced.columns
idshuffle = np.arange(len(train_balanced))
np.random.shuffle(idshuffle)
df = pd.DataFrame(np.array(train_balanced)[idshuffle])
df.columns = cols
#on construit la colonne filename + nettoyage
df['filename'] = df['image_name']+'.png'
df['target'] = df['benign_malignant']
df = df.drop(columns=['image_name', 'patient_id','sex','age_approx','anatom_site_general_challenge','diagnosis','benign_malignant'])
df = df[['filename', 'target']]
df.info()
df.head()

In [None]:
def view_images_crop(img, sigmaX=10):   
    height, width, depth = img.shape    
    x = int(width/2)
    y = int(height/2)
    r = np.amin((x,y))
    circle_img = np.zeros((height, width), np.uint8)
    cv2.circle(circle_img, (x,y), int(r), 1, thickness=-1)
    img = cv2.bitwise_and(img, img, mask=circle_img)
    img = cv2.addWeighted ( img,4, cv2.GaussianBlur( img , (0,0) , sigmaX) ,-4 ,128)
    return img 

def view_images_bengraham(image):
    image = cv2.cvtColor(image, cv2.COLOR_RGB2HSV)
    image = cv2.addWeighted ( image,4, cv2.GaussianBlur( image , (0,0) , 256/10) ,-4 ,128)
    return image

def view_images_neuronengineer(image):
    image = cv2.addWeighted ( image,4, cv2.GaussianBlur( image , (0,0) , 10) ,-4 ,128)
    return image

def dullrazor(img, lowbound=20, filterstruc=7, inpaintmat=3):
    #grayscale
    imgtmp1 = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)
    #applying a blackhat
    filterSize =(filterstruc, filterstruc)
    kernel = cv2.getStructuringElement(cv2.MORPH_RECT, filterSize) 
    imgtmp2 = cv2.morphologyEx(imgtmp1, cv2.MORPH_BLACKHAT, kernel)
    #0=skin and 255=hair
    ret, mask = cv2.threshold(imgtmp2, lowbound, 255, cv2.THRESH_BINARY)
    #inpainting
    img_final = cv2.inpaint(img, mask, inpaintmat ,cv2.INPAINT_TELEA)
    return img_final

def preprocess(input_img):
    img = (input_img.copy()).astype('uint8')
    #White Balancing
    def wb(channel, perc = 0.05):
        mi, ma = (np.percentile(channel, perc), np.percentile(channel,100.0-perc))
        channel = np.uint8(np.clip((channel-mi)*255.0/(ma-mi), 0, 255))
        return channel
    img  = np.dstack([wb(channel, 0.05) for channel in cv2.split(img)])
    #Hair removal
    img = dullrazor(img)
    #Crop view
    img = view_images_crop(img)
    return img_to_array(img)

def focal_loss(alpha=0.25,gamma=2.0):
    def focal_crossentropy(y_true, y_pred):
        bce = K.binary_crossentropy(y_true, y_pred)
        
        y_pred = K.clip(y_pred, K.epsilon(), 1.- K.epsilon())
        p_t = (y_true*y_pred) + ((1-y_true)*(1-y_pred))
        
        alpha_factor = 1
        modulating_factor = 1

        alpha_factor = y_true*alpha + ((1-alpha)*(1-y_true))
        modulating_factor = K.pow((1-p_t), gamma)

        # compute the final loss and return
        return K.mean(alpha_factor*modulating_factor*bce, axis=-1)
    return focal_crossentropy

def getLearnRateCallback(cfg):
    ''' Using callbacks for learning rate adjustments. '''
    lr_max = cfg['lr_max'] * strategy.num_replicas_in_sync * cfg['batch_size']

    def lrfn(epoch):
        if epoch < cfg['lr_rampup']:
            lr = (lr_max - cfg['lr_start']) / cfg['lr_rampup'] * epoch + cfg['lr_start']
        elif epoch < cfg['lr_rampup'] + cfg['lr_sustain']:
            lr = lr_max
        else:
            lr = (lr_max - cfg['lr_min']) * cfg['lr_decay']**(epoch - cfg['lr_rampup'] - cfg['lr_sustain']) + cfg['lr_min']
        return lr

    lr_callback = tf.keras.callbacks.LearningRateScheduler(lrfn, verbose=True)
    return lr_callback

In [None]:
def cross_val(CFG, df, batch_size, cv=5, name='model'):
    l,c = df.shape
    idx = np.arange(l)
    np.random.shuffle(idx)
    sizeFold = int(np.floor(l/cv))
    scores = []
    histories = []
    models = []
    for i in range(cv):
        #CREER LES FOLDS
        idxVal = idx[i*sizeFold : (i+1)*sizeFold]
        idxTrain = np.ones(l)
        idxTrain[idxVal] = 0
        idxTrain = idx[idxTrain.astype('bool')]
        df_train = df.iloc[idxTrain,:]
        df_val = df.iloc[idxVal,:]
        #CREER LES GENERATEURS DE DONNEES (lecture des images)
        train_datagen = ImageDataGenerator(
            rotation_range = 170,
            zoom_range = 0.1,
            width_shift_range = 0.1,
            height_shift_range = 0.1,
            horizontal_flip = True,
            vertical_flip = True,
            rescale = 1./255.,
            #preprocessing_function = preprocess,
        )
        val_datagen = ImageDataGenerator(
            rescale = 1./255.,
            #preprocessing_function=preprocess,
        )
        train_generator = train_datagen.flow_from_dataframe(
            dataframe = df_train, 
            directory = "../input/siic-isic-224x224-images/train", 
            x_col = "filename", 
            y_col = "target",
            class_mode = "categorical", #"binary"
            target_size = (CFG['img_size'],CFG['img_size']),
            batch_size = batch_size,
            validate_filenames = False,
        )
        val_generator = val_datagen.flow_from_dataframe(
            dataframe = df_val,
            directory = "../input/siic-isic-224x224-images/train", 
            x_col = "filename",
            y_col = "target",
            class_mode = "categorical", #"binary"
            target_size = (CFG['img_size'],CFG['img_size']), 
            batch_size = batch_size,
            validate_filenames = False,
        )
        #RECOMPILER UN MODELE
        base_model = efn.EfficientNetB0(
            weights='noisy-student',
            include_top=False,
            input_shape=(CFG['img_size'], CFG['img_size'], 3)
        )
        base_model.trainable = True
        model = Sequential()
        model.add(base_model)
        model.add(layers.GlobalAveragePooling2D())
        model.add(Dense(128, activation='relu'))
        model.add(Dropout(0.2))
        model.add(BatchNormalization())
        model.add(Dense(2, activation='softmax'))
        losses = [tf.keras.losses.BinaryCrossentropy(label_smoothing = CFG['label_smooth_fac']), focal_loss]
        callbacks = [getLearnRateCallback(CFG), \
                     tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=5),
                     tf.keras.callbacks.ModelCheckpoint(name+"_"+str(i)+".h5", save_best_only=True, monitor='val_auc', mode='max', save_weights_only=True)]
        model.compile(optimizer=CFG['optimizer'],
                      loss=losses,
                      metrics=['accuracy', keras.metrics.AUC(name='auc')])
        #FIT LE MODELE
        history = model.fit_generator(
            train_generator,
            validation_data=val_generator,
            epochs=CFG['epochs'],
            validation_steps=len(df_val) // batch_size,
            callbacks=callbacks,
        )
        histories.append(history)
        models.append(model)
        scores.append(history.history['val_auc'][-1])
    return np.array(scores),histories,models

In [None]:
scores,H,M = cross_val(CFG, df, batch_size=32, cv=5) #wop = without preprocessing

In [None]:
best = np.argmax(scores)
history = H[best]
# loss
plt.figure(figsize=(20,8))
plt.plot(history.history['loss'],color="#F7728B", label='loss', marker='.', linestyle='--')
plt.plot(history.history['val_loss'], color="#3CA3EC", label='validation loss', marker='.', linestyle='--')
plt.legend(loc="upper right",fancybox=True, framealpha=1, shadow=True, borderpad=1)
plt.title(label="Loss & Validation Loss")
plt.grid()
for i in range(len(history.history['loss'])):
    yi = "{:.3f}".format(history.history['loss'][i])
    yi2 = "{:.3f}".format(history.history['val_loss'][i])
    s = str(yi)
    s2 = str(yi2)
    plt.text(i + 0.03, history.history['loss'][i] + 0.01, s)
    plt.text(i + 0.03, history.history['val_loss'][i] + 0.01, s2)
plt.show()
# accuracy
plt.figure(figsize=(20,8))
plt.plot(history.history['accuracy'],color="#51B232", label='accuracy', marker='.', linestyle='--')
plt.plot(history.history['val_accuracy'], color="#CF8F32", label='validation accuracy', marker='.', linestyle='--')
plt.legend(loc='lower right',fancybox=True, framealpha=1, shadow=True, borderpad=1)
plt.title(label="Accuracy & Validation Accuracy")
plt.grid()
for i in range(len(history.history['accuracy'])):
    yi = "{:.3f}".format(history.history['accuracy'][i])
    yi2 = "{:.3f}".format(history.history['val_accuracy'][i])
    s = str(yi)
    s2 = str(yi2)
    plt.text(i + 0.03, history.history['accuracy'][i] + 0.01, s)
    plt.text(i + 0.03, history.history['val_accuracy'][i] + 0.01, s2)
plt.show()
# AUC
plt.figure(figsize=(20,8))
plt.plot(history.history['auc'],color="#51B232", label='auc', marker='.', linestyle='--')
plt.plot(history.history['val_auc'], color="#CF8F32", label='validation auc', marker='.', linestyle='--')
plt.legend(loc='lower right',fancybox=True, framealpha=1, shadow=True, borderpad=1)
plt.title(label="AUC & Validation AUC")
plt.grid()
for i in range(len(history.history['auc'])):
    yi = "{:.3f}".format(history.history['auc'][i])
    yi2 = "{:.3f}".format(history.history['val_auc'][i])
    s = str(yi)
    s2 = str(yi2)
    plt.text(i + 0.03, history.history['auc'][i] + 0.01, s)
    plt.text(i + 0.03, history.history['val_auc'][i] + 0.01, s2)
plt.show()