In [1]:
from tensorflow.keras.layers import (Conv2D, UpSampling2D, Conv2DTranspose, concatenate, MaxPooling2D, 
                                     Activation, Dropout, Cropping2D, Flatten, Dense, BatchNormalization)
from tensorflow.keras.models import Model
from tensorflow.keras import Input
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import ReduceLROnPlateau, EarlyStopping, ModelCheckpoint
import tensorflow as tf
import pandas as pd
from sklearn.model_selection import train_test_split
import numpy as np
from imblearn.over_sampling import SMOTE
from imblearn.under_sampling import RandomUnderSampler
from imblearn.pipeline import Pipeline
from sklearn.preprocessing import LabelEncoder

Using TensorFlow backend.


In [2]:
IMG_HEIGHT = 300
IMG_WIDTH = 300
TRAIN_IMAGE_PATH = "jpeg/train/"
TEST_IMAGE_PATH = "jpeg/test/"
BATCH_SIZE = 8
AUTO = tf.data.experimental.AUTOTUNE
REG = 0.0005
EPOCHS=40
METRICS = [
    tf.keras.metrics.TruePositives(name='tp'),
    tf.keras.metrics.FalsePositives(name='fp'),
    tf.keras.metrics.TrueNegatives(name='tn'),
    tf.keras.metrics.FalseNegatives(name='fn'), 
    tf.keras.metrics.BinaryAccuracy(name='accuracy'),
    tf.keras.metrics.Precision(name='precision'),
    tf.keras.metrics.Recall(name='recall'),
    tf.keras.metrics.AUC(name='auc'),
    tf.keras.metrics.binary_crossentropy,
]

In [3]:
train_df = pd.read_csv("train.csv")
test_df = pd.read_csv("test.csv")

In [4]:
train_df["image_name"] = train_df["image_name"].apply(lambda x: TRAIN_IMAGE_PATH + x + ".jpg")
test_df["image_name"] = test_df["image_name"].apply(lambda x: TEST_IMAGE_PATH + x + ".jpg")

In [5]:
train_df_target_1 = train_df[train_df["target"] == 1]
train_df_target_0 = train_df[train_df["target"] == 0]

In [6]:
del train_df

In [7]:
random_target_0 = np.random.randint(low=1, high=train_df_target_0.shape[0], 
                                    size=2 * train_df_target_1.shape[0])

In [8]:
train_df_d = pd.concat([train_df_target_0.iloc[random_target_0], train_df_target_1])

In [9]:
def decode_image(filename, label=None, image_size=(IMG_WIDTH, IMG_HEIGHT)):
    bits = tf.io.read_file(filename)
    image = tf.image.decode_jpeg(bits, channels=3)
    image = tf.image.convert_image_dtype(image, tf.float32)
    image = tf.image.resize(image, size=image_size)
    
    if label is None:
        return image
    else:
        return image, label
#image = tf.cast(image, tf.float32)/255.0
    
def data_augment(image, label=None):
    r_crop = np.random.uniform(low = 0.4, high = 1.0)
    r_rsize = np.random.uniform(low = 0.8, high = 1.2)
    image = tf.image.random_crop(image, (int(r_crop*IMG_HEIGHT), int(r_crop*IMG_WIDTH), 3))
    image = tf.image.rot90(image)
    image = tf.keras.preprocessing.image.random_shear(image, 20)
    image = tf.image.resize(image, (int(r_rsize*IMG_HEIGHT), int(r_rsize*IMG_WIDTH), 3), preserve_aspect_ratio=True)
    image = tf.image.random_flip_left_right(image)
    image = tf.image.random_flip_up_down(image)
    image = tf.image.random_saturation(image, lower=0.7, upper=1.3)
    image = tf.image.random_contrast(image, lower=0.7, upper=1.3)
    image = tf.image.random_brightness(image, lower=0.7, upper=1.3)
    image = tf.image.rgb_to_hsv(image)
    if label is None:
        return image
    else:
        return image, label
#     image = tf.image.rgb_to_hsv(image)
#     image = tf.image.random_flip_left_right(image)
#     image = tf.image.adjust_brightness(image, 0.2)
#     image = tf.image.rot90(image)
#     image = tf.image.central_crop(image, central_fraction=0.3)

In [10]:
x_train = train_df_d[["image_name"]]
y_train = train_df_d["target"].astype(np.float32).values
x_train.reset_index(drop=True, inplace=True)

In [11]:
x_train, x_val, y_train, y_val = train_test_split(x_train, y_train, test_size=0.2, random_state=45)

In [12]:
x_train.shape, x_val.shape, y_train.shape, y_val.shape

((1401, 1), (351, 1), (1401,), (351,))

In [13]:
from sklearn.utils import class_weight
class_weights = class_weight.compute_class_weight('balanced', np.unique(y_train), y_train)

In [14]:
class_weights

array([0.74600639, 1.51623377])

In [15]:
train_dataset = (tf.data.Dataset
                 .from_tensor_slices((x_train["image_name"].values, y_train))
                 .map(decode_image, num_parallel_calls=AUTO)
                 .repeat()
                 .shuffle(512)
                 .batch(BATCH_SIZE)
                 .prefetch(AUTO)
                )

In [16]:
val_dataset = (tf.data.Dataset
               .from_tensor_slices((x_val["image_name"].values, y_val))
               .map(decode_image, num_parallel_calls=AUTO)
               .repeat()
               .shuffle(512)
               .batch(BATCH_SIZE)
               .prefetch(AUTO))

In [17]:
test_dataset = (tf.data.Dataset.from_tensor_slices((test_df.image_name))
                .map(decode_image, num_parallel_calls=AUTO)
                .cache()
                .batch(BATCH_SIZE))

In [18]:
lr = ReduceLROnPlateau(
    monitor="val_auc",
    patience=3,
    min_lr=0.000001,
    factor=0.5,
    verbose=1
)

In [19]:
es = EarlyStopping(monitor="val_loss", patience=10)

In [20]:
model_chkpt = ModelCheckpoint(filepath="best_model.h5")

In [21]:

def get_crop_shape(target, refer):
        # width, the 3rd dimension
        print("target: {} {}, refer: {} {}".format(target, target.get_shape(), refer, refer.get_shape()))
        cw = (target.get_shape()[2] - refer.get_shape()[2])
        assert (cw >= 0)
        if cw % 2 != 0:
            cw1, cw2 = int(cw/2), int(cw/2) + 1
        else:
            cw1, cw2 = int(cw/2), int(cw/2)
        # height, the 2nd dimension
        ch = (target.get_shape()[1] - refer.get_shape()[1])
        assert (ch >= 0)
        if ch % 2 != 0:
            ch1, ch2 = int(ch/2), int(ch/2) + 1
        else:
            ch1, ch2 = int(ch/2), int(ch/2)

        return (ch1, ch2), (cw1, cw2)

In [22]:
def dice_loss(y_true, y_pred):
    numerator = 2 * tf.reduce_sum(y_true * y_pred, axis=-1)
    denominator = tf.reduce_sum(y_true + y_pred, axis=-1)
    return 1 - (numerator + 1) / (denominator + 1)

In [23]:
def model_unet(input_layer, expansion_filters=64, expansion_kernel=(3,3), expansion_pool_size=(2,2),
          contract_filters=64, contract_kernel=(3,3), contract_pool_size=(2,2)):
    
    #64
    lvl_1 = Conv2D(filters=expansion_filters, kernel_size=expansion_kernel, activation="relu", padding="same")(input_layer)
    lvl_1 = Conv2D(filters=expansion_filters, kernel_size=expansion_kernel, activation="relu", padding="same")(lvl_1)
    mp_lvl_1 = MaxPooling2D(expansion_pool_size)(lvl_1)
    mp_lvl_1 = Dropout(0.25)(mp_lvl_1)
    
    #128
    lvl_2 = Conv2D(filters=expansion_filters*2, kernel_size=expansion_kernel, activation="relu", padding="same")(mp_lvl_1)
    lvl_2 = Conv2D(filters=expansion_filters*2, kernel_size=expansion_kernel, activation="relu", padding="same")(lvl_2)
    mp_lvl_2 = MaxPooling2D(expansion_pool_size)(lvl_2)
    mp_lvl_2 = Dropout(0.25)(mp_lvl_2)
    
    #256
    lvl_3 = Conv2D(filters=expansion_filters*3, kernel_size=expansion_kernel, activation="relu", padding="same")(mp_lvl_2)
    lvl_3 = Conv2D(filters=expansion_filters*3, kernel_size=expansion_kernel, activation="relu", padding="same")(lvl_3)
    mp_lvl_3 = MaxPooling2D(expansion_pool_size)(lvl_3)
    mp_lvl_3 = Dropout(0.25)(mp_lvl_3)
    
    #512
    lvl_4 = Conv2D(filters=expansion_filters*4, kernel_size=expansion_kernel, activation="relu", padding="same")(mp_lvl_3)
    lvl_4 = Conv2D(filters=expansion_filters*4, kernel_size=expansion_kernel, activation="relu", padding="same")(lvl_4)
    mp_lvl_4 = MaxPooling2D(expansion_pool_size)(lvl_4)
    mp_lvl_4 = Dropout(0.25)(mp_lvl_4)
    
    #1024
    lvl_5 = Conv2D(filters=expansion_filters*5, kernel_size=expansion_kernel, activation="relu", padding="same")(mp_lvl_4)
    lvl_5 = Conv2D(filters=expansion_filters*5, kernel_size=expansion_kernel, activation="relu", padding="same")(lvl_5)
    
    #d_lvl_4 = Conv2DTranspose(filters=contract_filters*4, kernel_size=contract_kernel, activation="relu", padding="same")(lvl_5)
    d_lvl_4 = UpSampling2D(size=contract_pool_size, data_format="channels_last")(lvl_5)
    ch, cw = get_crop_shape(lvl_4, d_lvl_4)
    ccon_4 = Cropping2D(cropping=(ch, cw), data_format="channels_last")(lvl_4)
    ucon_4 = concatenate([d_lvl_4, ccon_4])
    ucon_4 = Dropout(0.25)(ucon_4)
    ucon_4 = Conv2D(filters=contract_filters*4, kernel_size=contract_kernel, activation="relu", padding="same")(ucon_4)
    ucon_4 = Conv2D(filters=contract_filters*4, kernel_size=contract_kernel, activation="relu", padding="same")(ucon_4)
    
    #d_lvl_3 = Conv2DTranspose(filters=contract_filters*3, kernel_size=contract_kernel, activation="relu", padding="same")(ucon_4)
    d_lvl_3 = UpSampling2D(size=contract_pool_size, data_format="channels_last")(ucon_4)
    ch, cw = get_crop_shape(lvl_3, d_lvl_3)
    ccon_3 = Cropping2D(cropping=(ch, cw), data_format="channels_last")(lvl_3)
    ucon_3 = concatenate([d_lvl_3, ccon_3])
    ucon_3 = Dropout(0.25)(ucon_3)
    ucon_3 = Conv2D(filters=contract_filters*3, kernel_size=contract_kernel, activation="relu", padding="same")(ucon_3)
    ucon_3 = Conv2D(filters=contract_filters*3, kernel_size=contract_kernel, activation="relu", padding="same")(ucon_3)
    
    #d_lvl_2 = Conv2DTranspose(filters=contract_filters*2, kernel_size=contract_kernel, activation="relu", padding="same")(ucon_3)
    d_lvl_2 = UpSampling2D(size=contract_pool_size, data_format="channels_last")(ucon_3)
    ch, cw = get_crop_shape(lvl_2, d_lvl_2)
    ccon_2 = Cropping2D(cropping=(ch, cw), data_format="channels_last")(lvl_2)
    ucon_2 = concatenate([d_lvl_2, ccon_2])
    ucon_2 = Dropout(0.25)(ucon_2)
    ucon_2 = Conv2D(filters=contract_filters*3, kernel_size=contract_kernel, activation="relu", padding="same")(ucon_2)
    ucon_2 = Conv2D(filters=contract_filters*3, kernel_size=contract_kernel, activation="relu", padding="same")(ucon_2)
    
    #d_lvl_1 = Conv2DTranspose(filters=contract_filters*1, kernel_size=contract_kernel, activation="relu", padding="same")(ucon_2)
    d_lvl_1 = UpSampling2D(size=contract_pool_size, data_format="channels_last")(ucon_2)
    ch, cw = get_crop_shape(lvl_1, d_lvl_1)
    ccon_1 = Cropping2D(cropping=(ch, cw), data_format="channels_last")(lvl_1)
    ucon_1 = concatenate([d_lvl_1, ccon_1])
    ucon_1 = Dropout(0.25)(ucon_1)
    ucon_1 = Conv2D(filters=contract_filters*1, kernel_size=contract_kernel, activation="relu", padding="same")(ucon_1)
    ucon_1 = Conv2D(filters=contract_filters*1, kernel_size=contract_kernel, activation="relu", padding="same")(ucon_1)
    
    output = Conv2D(filters=1, kernel_size=(1,1), activation="relu", padding="same")(ucon_1)
    
    flatten = Flatten()(output)
    dense4 = Dense(512, activation='relu')(flatten)
    bn4 = BatchNormalization()(dense4)
    dense3 = Dense(256, activation='relu')(bn4)
    bn3 = BatchNormalization()(dense3)
    dense2 = Dense(128, activation='relu')(bn3)
    bn2 = BatchNormalization()(dense2)
    dense1 = Dense(1, activation="sigmoid")(bn2)
    model = Model(inputs=input_layer, outputs=dense1)
    return model
    
    
     

input_layer = Input((IMG_HEIGHT, IMG_WIDTH, 3))
model = model_unet(input_layer, expansion_filters=24)
model.summary()

target: Tensor("conv2d_7/Identity:0", shape=(None, 37, 37, 96), dtype=float32) (None, 37, 37, 96), refer: Tensor("up_sampling2d/Identity:0", shape=(None, 36, 36, 120), dtype=float32) (None, 36, 36, 120)
target: Tensor("conv2d_5/Identity:0", shape=(None, 75, 75, 72), dtype=float32) (None, 75, 75, 72), refer: Tensor("up_sampling2d_1/Identity:0", shape=(None, 72, 72, 256), dtype=float32) (None, 72, 72, 256)
target: Tensor("conv2d_3/Identity:0", shape=(None, 150, 150, 48), dtype=float32) (None, 150, 150, 48), refer: Tensor("up_sampling2d_2/Identity:0", shape=(None, 144, 144, 192), dtype=float32) (None, 144, 144, 192)
target: Tensor("conv2d_1/Identity:0", shape=(None, 300, 300, 24), dtype=float32) (None, 300, 300, 24), refer: Tensor("up_sampling2d_3/Identity:0", shape=(None, 288, 288, 192), dtype=float32) (None, 288, 288, 192)
Model: "model"
__________________________________________________________________________________________________
Layer (type)                    Output Shape        

In [24]:
model

<tensorflow.python.keras.engine.training.Model at 0x1fdfa4e9048>

In [25]:
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.0001), loss=dice_loss, metrics=METRICS)

In [26]:
history = model.fit(train_dataset, epochs=EPOCHS, callbacks=[lr, es, model_chkpt],
                        steps_per_epoch=x_train.shape[0]//BATCH_SIZE, validation_data=val_dataset,
                        validation_steps=x_val.shape[0]//BATCH_SIZE,
                        class_weight = class_weights)

Train for 175 steps, validate for 43 steps
Epoch 1/40
Epoch 2/40
Epoch 3/40
Epoch 4/40
Epoch 5/40
Epoch 6/40
Epoch 7/40
Epoch 8/40
Epoch 00008: ReduceLROnPlateau reducing learning rate to 4.999999873689376e-05.
Epoch 9/40
Epoch 10/40
Epoch 11/40
Epoch 00011: ReduceLROnPlateau reducing learning rate to 2.499999936844688e-05.
Epoch 12/40
Epoch 13/40
Epoch 14/40
Epoch 00014: ReduceLROnPlateau reducing learning rate to 1.249999968422344e-05.
Epoch 15/40
Epoch 16/40


Epoch 17/40
Epoch 00017: ReduceLROnPlateau reducing learning rate to 6.24999984211172e-06.
Epoch 18/40
Epoch 19/40
Epoch 20/40
Epoch 00020: ReduceLROnPlateau reducing learning rate to 3.12499992105586e-06.
Epoch 21/40
Epoch 22/40
Epoch 23/40
Epoch 00023: ReduceLROnPlateau reducing learning rate to 1.56249996052793e-06.
Epoch 24/40
Epoch 25/40
Epoch 26/40
Epoch 00026: ReduceLROnPlateau reducing learning rate to 1e-06.
Epoch 27/40
Epoch 28/40
Epoch 29/40


In [27]:
history

<tensorflow.python.keras.callbacks.History at 0x1fdfa72ea48>

In [28]:
pred = model.predict(test_dataset)

In [29]:
pred

array([[0.00797546],
       [0.00262979],
       [0.00218161],
       ...,
       [0.9999999 ],
       [0.00299781],
       [0.17615633]], dtype=float32)

In [30]:
sam = pd.read_csv("sample_submission.csv")

In [31]:
sam.head()


Unnamed: 0,image_name,target
0,ISIC_0052060,0
1,ISIC_0052349,0
2,ISIC_0058510,0
3,ISIC_0073313,0
4,ISIC_0073502,0


In [32]:
sam["target"] = pred

In [33]:
sam

Unnamed: 0,image_name,target
0,ISIC_0052060,0.007975
1,ISIC_0052349,0.002630
2,ISIC_0058510,0.002182
3,ISIC_0073313,0.001730
4,ISIC_0073502,0.058733
...,...,...
10977,ISIC_9992485,0.001006
10978,ISIC_9996992,0.866623
10979,ISIC_9997917,1.000000
10980,ISIC_9998234,0.002998


In [34]:
sam.to_csv("dice_loss_unet_2d_barzil_researcher_image_augmentation_exp.csv", index=False)

In [None]:
history

In [None]:
history.__dict__

In [None]:
import matplotlib.pyplot as plt

In [None]:
loss_ = history.history["loss"]
val_loss_ = history.history["val_loss"]
epochs = [i for i in range(11)]