In [1]:
!pip install -U efficientnet

import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import tensorflow as tf

from API import *

#!pip install tensor-dash
#from tensordash.tensordash import Tensordash

CFG = dict(
    #Execution parameters
    DEVICE = 'GPU',
    AUTOTUNE = None, #let as None
    REPLICAS = None, #let as None
    strategy = None, #let as None

    #Augmentation parameters
    rot = 170.,
    shr = 2.,
    hzoom = 8.0,
    wzoom = 8.0,
    hshift = 8.0,
    wshift = 8.0,
    
    #Learning_rate parameters
    lr_start = 0.000006,
    lr_max = 0.00000145,
    lr_min = 0.000001,
    lr_rampup = 5,
    lr_sustain = 0,
    lr_decay = 0.85,
    
    #Model parameters
    img_size = 224,
    tabular_size = 19,
    epochs = 100, #high number because we use an early-stopper
    batch_size = 32,
    net_count = 5, #1: effnetB0, X: effnetB0->X-1
    optimizer = 'adam',
    label_smooth_fac = 0.05, #for the loss
)

#Loading CPU, GPU or TPU
if CFG['DEVICE'] == "TPU":
    print("connecting to TPU...")
    try:
        tpu = tf.distribute.cluster_resolver.TPUClusterResolver()
        print('Running on TPU ', tpu.master())
    except ValueError:
        print("Could not connect to TPU")
        tpu = None

    if tpu:
        try:
            print("initializing  TPU ...")
            tf.config.experimental_connect_to_cluster(tpu)
            tf.tpu.experimental.initialize_tpu_system(tpu)
            strategy = tf.distribute.experimental.TPUStrategy(tpu)
            print("TPU initialized")
        except _:
            print("failed to initialize TPU")
    else:
        CFG['DEVICE'] = "GPU"
if CFG['DEVICE'] != "TPU":
    print("Using default strategy for CPU and single GPU")
    strategy = tf.distribute.get_strategy()
if CFG['DEVICE'] == "GPU":
    print("Num GPUs Available: ", len(tf.config.experimental.list_physical_devices('GPU')))
AUTOTUNE = tf.data.experimental.AUTOTUNE
REPLICAS = strategy.num_replicas_in_sync
print(f'REPLICAS: {REPLICAS}')
CFG['AUTOTUNE'] = AUTOTUNE
CFG['REPLICAS'] = REPLICAS
CFG['strategy'] = strategy

Using default strategy for CPU and single GPU
Num GPUs Available:  1
REPLICAS: 1


# Balancing / Reducing dataset :
1/6 = malignant and 5/6 = benign

In [2]:
# df = pd.read_csv('data/labels.csv')
# #cleaning data
# df.image_name = df.image_name + ".png"
# df['target'] = df['benign_malignant']
# df = df[['image_name', 'target']]
# df.columns = ['filename', 'target']
# #Reducing dataset size: 1/6 = malignant and 5/6 = benign
# malignants = df[df.target=='malignant']
# nbMalignant = len(malignants)
# benign = df[df.target=='benign']
# benign2 = benign.sample(nbMalignant*5, replace=False)
# #concatenating both parts + shuffle
# df = pd.concat([malignants,benign2])
# df = newdf.sample(frac=1).reset_index(drop=True)

# df.to_csv("data/df_clean.csv", index=False)
df = pd.read_csv("data/df_clean.csv")
df.head()

Unnamed: 0,filename,target
0,ISIC_9515811.png,benign
1,ISIC_2476876.png,benign
2,ISIC_1706912.png,malignant
3,ISIC_7130929.png,malignant
4,ISIC_9115484.png,benign


In [3]:
# files = os.listdir("data/tb3images")

# dftmp = []
# for i in range(len(files)):
#     if len(files[i].split('_'))==2:
#         dftmp.append(files[i])
# dftmp = pd.DataFrame(dftmp)
# dftmp.columns=['filename']

# df = pd.read_csv('data/tb3labels.csv')
# df.image_id = df.image_id + ".jpg"
# df = df[['image_id', 'melanoma']]
# df.columns = ['filename', 'target']
# tmp = df.copy()
# tmp[tmp.target==0] = 'benign'
# tmp[tmp.target==1] = 'malignant'
# df['target'] = tmp['target']

# idx = [np.array(df['filename'])[i] in np.array(dftmp['filename']) for i in range(len(df))]
# df = df[idx]
# df

# Create Segmentations
Possibility to skip this step if we give a segmentation to the input 

In [4]:
#get_segmentations(CFG, df, "data/images/", "data/masks/")

# Create Dataframe with tabular features

In [5]:
# df = get_tabular_dataframe(df, "data/images/", "data/masks/")
# df.to_csv("data/df_tabular.csv", index=False)
df = pd.read_csv("data/df_tabular.csv")
df.head()

1000
2000
3000


Unnamed: 0,filename,target,extent,solidity,d/D,4A/(pi*d^2),pi*d/P,4*pi*A/P^2,P/(pi*D),A1,...,B,F1,F2,F3,F4,F5,F6,F7,F8,F9
0,ISIC_9515811.png,benign,0.610266,0.944248,0.472005,0.003007,0.375608,0.111424,0.040737,0.302545,...,0.067995,0.861756,0.938873,0.812158,0.916008,0.931556,0.780384,0.209291,0.663173,0.711402
1,ISIC_2476876.png,benign,0.810011,0.951936,0.602981,0.003692,0.42822,0.119233,0.043821,0.13669,...,0.045367,0.873701,0.910631,0.704706,0.927349,0.896892,0.71713,0.205941,0.674485,0.710345
2,ISIC_1706912.png,malignant,0.684407,0.959819,0.392455,0.002557,0.334098,0.099989,0.039962,0.085542,...,0.086952,0.653675,0.474381,0.36834,0.740163,0.615185,0.616598,0.241312,0.704781,0.649807
3,ISIC_7130929.png,malignant,0.724099,0.896918,0.312285,0.002029,0.297994,0.08959,0.037972,0.145998,...,0.108981,0.811402,0.767166,0.543182,0.869015,0.80477,0.675182,0.225115,0.690675,0.67764
4,ISIC_9115484.png,benign,0.610257,0.816092,0.64573,0.005052,0.759458,0.448323,0.019013,0.197023,...,0.023681,0.796599,0.793255,0.744079,0.852521,0.846222,0.821233,0.235022,0.702246,0.659205


# Create TFRecord

In [None]:
#write_tfrecord(CFG, df, "data/images/", "data/tfrecord_labeled.tfrec", True)

# Read TFRecord

In [None]:
#dataset_train = read_tfrecord(CFG, tfrec, augment=True, repeat=True, shuffle=True)
#this dataset will be used for evaluating, this is why we don't augment/shuffle/repeat
#Normally, here, we have only 1 img in this dataset
#dataset_test = read_tfrecord(CFG, tfrec, augment=False, repeat=False, shuffle=False, ordered=True)

dataset = read_tfrecord(CFG, "data/tfrecord_labeled.tfrec", augment=False, repeat=False, shuffle=False, ordered=True, labeled=True)

# Create Model

In [None]:
m = get_model(CFG, fine_tune=False, model_weights="models/model_effnetB0-4_noisy-student_transfer-learning.h5")
#m.summary()

## fit the model

In [None]:
# callbacks = [tf.keras.callbacks.ReduceLROnPlateau(),
#              tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=5),
#              tf.keras.callbacks.ModelCheckpoint("models/best.h5", save_best_only=True, monitor='val_auc', mode='max', save_weights_only=True)]

# stepstrain = len(df)//CFG['batch_size'] 
# if len(df)%CFG['batch_size'] > 0: stepstrain+=1

# m.fit(
#     dataset_train,
#     steps_per_epoch=stepstrain,
#     epochs=CFG['epochs'],
#     callbacks=callbacks,
# )

## predict

In [None]:
preds = m.predict(
        dataset,
        steps = len(df)/CFG['batch_size'],
)

In [None]:
ytrue = np.array(df['target']=="malignant").astype('int64')
ypreds = np.argmax(preds, axis=1)

In [None]:
from sklearn.metrics import confusion_matrix, plot_confusion_matrix, ConfusionMatrixDisplay

#Nos predictions en colonne
cm = confusion_matrix(ytrue, ypreds)
disp = ConfusionMatrixDisplay(cm, display_labels=["benign", "malignant"])
disp.plot(cmap=plt.cm.Blues)
disp.ax_.set_title("Confusion Matrix, threshold: 0.5")

accuracy = np.mean(ytrue==ypreds)
precision = cm[0,0]/cm.sum(axis=0)[0]
recall = cm[0,0]/cm.sum(axis=1)[0]
F1score = 2*(recall*precision)/(recall+precision)
print("accuracy:",accuracy)
print("precision:",precision)
print("recall:",recall)
print("F1score:",F1score)

In [None]:
ismalignant = 0.70
ypreds2 = (preds[:,1]>ismalignant).astype('int')

cm = confusion_matrix(ytrue, ypreds2)
disp = ConfusionMatrixDisplay(cm, display_labels=["benign", "malignant"])
disp.plot(cmap=plt.cm.Blues)
disp.ax_.set_title("Confusion Matrix, threshold: 0.35")

accuracy = np.mean(ytrue==ypreds2)
precision = cm[0,0]/cm.sum(axis=0)[0]
recall = cm[0,0]/cm.sum(axis=1)[0]
F1score = 2*(recall*precision)/(recall+precision)
print("accuracy:",accuracy)
print("precision:",precision)
print("recall:",recall)
print("F1score:",F1score)