In [22]:
import pandas as pd
import numpy as np
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.layers import GlobalAveragePooling2D, MaxPooling2D
from tensorflow.keras.models import Model
import tensorflow.keras.backend as K
from sklearn.model_selection import train_test_split
import tensorflow as tf
from tensorflow.keras.callbacks import ReduceLROnPlateau
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.applications import DenseNet201
from tensorflow.keras.layers import Conv2D, BatchNormalization, GlobalAveragePooling2D, Flatten, Dropout, Dense
from tensorflow.keras.regularizers import l2
from tensorflow.keras.models import Model

In [2]:
TRAIN_IMAGE_PATH = "jpeg/train/"
TEST_IMAGE_PATH = "jpeg/test/"
IMG_HEIGHT = 500
IMG_WIDTH = 500
BATCH_SIZE = 64
AUTO = tf.data.experimental.AUTOTUNE
REG = 0.0005
EPOCHS=20
METRICS = [
    tf.keras.metrics.TruePositives(name='tp'),
      tf.keras.metrics.FalsePositives(name='fp'),
      tf.keras.metrics.TrueNegatives(name='tn'),
      tf.keras.metrics.FalseNegatives(name='fn'), 
      tf.keras.metrics.BinaryAccuracy(name='accuracy'),
      tf.keras.metrics.Precision(name='precision'),
      tf.keras.metrics.Recall(name='recall'),
      tf.keras.metrics.AUC(name='auc'),
]

In [3]:
train_df = pd.read_csv("train.csv")
test_df = pd.read_csv("test.csv")

In [4]:
train_df["image_name"] = train_df["image_name"].apply(lambda x: TRAIN_IMAGE_PATH + x + ".jpg")
test_df["image_name"] = test_df["image_name"].apply(lambda x: TEST_IMAGE_PATH + x + ".jpg")

In [5]:
# train_df, val_df = train_test_split(train_df, test_size=0.1, random_state=45, shuffle=True)

In [6]:
# train_df.shape, val_df.shape

In [7]:
def decode_image(filename, label=None, image_size=(IMG_WIDTH, IMG_HEIGHT)):
    bits = tf.io.read_file(filename)
    image = tf.image.decode_jpeg(bits, channels=3)
    image = tf.cast(image, tf.float32)/255.0
    image = tf.image.resize(image, size=image_size)
    
    if label is None:
        return image
    else:
        return image, label

In [8]:
def data_augment(image, label=None):
    image = tf.image.random_flip_left_right(image)
    if label is None:
        return image
    else:
        return image, label

In [9]:
# def compute_class_weights(labels):
#     total_labels = labels.shape[0]
    
#     positive_labels = K.sum(labels, axis=0)/total_labels
#     negative_labels = 1 - positive_labels
#     return {0:positive_labels, 1:negative_labels}

In [10]:
from imblearn.over_sampling import SMOTE
from imblearn.under_sampling import RandomUnderSampler
from imblearn.pipeline import Pipeline
from sklearn.preprocessing import LabelEncoder
lb = LabelEncoder()
image_names = train_df["image_name"].values
train_df["image_name"] = lb.fit_transform(train_df["image_name"].values)
train_df["target"] = train_df["target"].astype("int")
train_df.head()
map_name_no = dict(zip(train_df["image_name"], image_names))
y_train = train_df["target"]
x_train = train_df[["image_name"]]


over = SMOTE(random_state=45, sampling_strategy=0.1)
under = RandomUnderSampler(sampling_strategy=0.5)
steps = [('o', over), ('u', under)]
ppl = Pipeline(steps=steps)

x_train, y_train = ppl.fit_resample(x_train, y_train)
x_train, x_val, y_train, y_val = train_test_split(x_train, y_train, test_size=0.2, random_state=45)
x_train["image_name"] = x_train["image_name"].apply(lambda x: map_name_no[x])
x_val["image_name"] = x_val["image_name"].apply(lambda x: map_name_no[x])

Using TensorFlow backend.


In [11]:
from sklearn.utils import class_weight
class_weights = class_weight.compute_class_weight('balanced', np.unique(y_train), y_train)

In [12]:
class_weights = dict(enumerate(class_weights))

In [13]:
class_weights

{0: 0.752022342064715, 1: 1.491975544516622}

In [14]:
x_train.shape, y_train.shape, x_val.shape, y_val.shape

((7809, 1), (7809,), (1953, 1), (1953,))

In [15]:
sum(y_train), sum(y_val)

(2617, 637)

In [16]:
train_dataset = (tf.data.Dataset
                 .from_tensor_slices((x_train["image_name"].values, y_train))
                 .map(decode_image, num_parallel_calls=AUTO)
                 .map(data_augment, num_parallel_calls=AUTO)
                 .repeat()
                 .shuffle(512)
                 .batch(BATCH_SIZE)
                 .prefetch(AUTO)
                )

In [17]:
val_dataset = (tf.data.Dataset
               .from_tensor_slices((x_val["image_name"].values, y_val))
               .map(decode_image, num_parallel_calls=AUTO)
               .map(data_augment, num_parallel_calls=AUTO)
               .repeat()
               .shuffle(512)
               .batch(BATCH_SIZE)
               .prefetch(AUTO))

In [18]:
test_dataset = (tf.data.Dataset.from_tensor_slices((test_df.image_name))
                .map(decode_image, num_parallel_calls=AUTO)
                .batch(BATCH_SIZE))

In [19]:
lr = ReduceLROnPlateau(
    monitor="val_loss",
    patience=3,
    min_lr=0.000001,
    factor=0.5,
    verbose=1
)

In [20]:
es = EarlyStopping(monitor="val_loss", patience=10)

In [23]:
model_d201 = DenseNet201(include_top=False, weights="imagenet", input_shape=(IMG_WIDTH, IMG_HEIGHT,3))
for layers in model_d201.layers[:-10]:
    layers.trainable = False
model_d201.trainable=False

model = Conv2D(filters=32, kernel_size=(3,3), data_format="channels_last", activation="relu", kernel_regularizer=l2(REG))(model_d201.output)
model = BatchNormalization(axis=-1, center=True, scale=False)(model)
model = Conv2D(filters=32, kernel_size=(3,3), activation="relu", kernel_regularizer=l2(REG))(model)
model = BatchNormalization(axis=-1, center=True, scale=False)(model)
model = MaxPooling2D()(model)
model = Dropout(0.25)(model)

model = Conv2D(filters=64, kernel_size=(5,5), data_format="channels_last", activation="relu", kernel_regularizer=l2(REG))(model_d201.output)
model = BatchNormalization(axis=-1, center=True, scale=False)(model)
model = Conv2D(filters=64, kernel_size=(5,5), activation="relu", kernel_regularizer=l2(REG))(model)
model = BatchNormalization(axis=-1, center=True, scale=False)(model)
model = MaxPooling2D()(model)
model = Dropout(0.25)(model)


model = Flatten()(model)
model = Dense(256, activation="relu")(model)
model = BatchNormalization(axis=-1, center=True, scale=False)(model)
model = Dropout(0.5)(model)
model = Dense(64, activation="relu")(model)
model = BatchNormalization(axis=-1, center=True, scale=False)(model)

output = Dense(1, activation="sigmoid")(model)
model_d201 = Model(inputs=model_d201.input, outputs=output)
model_d201.summary()
model_d201.compile(optimizer=tf.keras.optimizers.RMSprop(lr=1e-4), loss="binary_crossentropy", metrics=METRICS)

Model: "model"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_2 (InputLayer)            [(None, 500, 500, 3) 0                                            
__________________________________________________________________________________________________
zero_padding2d_2 (ZeroPadding2D (None, 506, 506, 3)  0           input_2[0][0]                    
__________________________________________________________________________________________________
conv1/conv (Conv2D)             (None, 250, 250, 64) 9408        zero_padding2d_2[0][0]           
__________________________________________________________________________________________________
conv1/bn (BatchNormalization)   (None, 250, 250, 64) 256         conv1/conv[0][0]                 
______________________________________________________________________________________________

conv5_block3_0_relu (Activation (None, 15, 15, 960)  0           conv5_block3_0_bn[0][0]          
__________________________________________________________________________________________________
conv5_block3_1_conv (Conv2D)    (None, 15, 15, 128)  122880      conv5_block3_0_relu[0][0]        
__________________________________________________________________________________________________
conv5_block3_1_bn (BatchNormali (None, 15, 15, 128)  512         conv5_block3_1_conv[0][0]        
__________________________________________________________________________________________________
conv5_block3_1_relu (Activation (None, 15, 15, 128)  0           conv5_block3_1_bn[0][0]          
__________________________________________________________________________________________________
conv5_block3_2_conv (Conv2D)    (None, 15, 15, 32)   36864       conv5_block3_1_relu[0][0]        
__________________________________________________________________________________________________
conv5_bloc

In [None]:
x_val.shape

In [26]:
history = model_d201.fit(train_dataset, epochs=EPOCHS, callbacks=[lr, es],
                        steps_per_epoch=x_train.shape[0]//BATCH_SIZE, validation_data=val_dataset,
                        validation_steps=x_val.shape[0]//BATCH_SIZE,
                        class_weight = class_weights)

Train for 122 steps, validate for 30 steps
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 00005: ReduceLROnPlateau reducing learning rate to 4.999999873689376e-05.
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 00008: ReduceLROnPlateau reducing learning rate to 2.499999936844688e-05.
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 00016: ReduceLROnPlateau reducing learning rate to 1.249999968422344e-05.
Epoch 17/20
Epoch 18/20
Epoch 19/20


Epoch 00019: ReduceLROnPlateau reducing learning rate to 6.24999984211172e-06.
Epoch 20/20


In [27]:
probs = model_d201.predict(test_dataset, verbose = 1)



In [28]:
sam = pd.read_csv("sample_submission.csv")

In [29]:
sam.head()


Unnamed: 0,image_name,target
0,ISIC_0052060,0
1,ISIC_0052349,0
2,ISIC_0058510,0
3,ISIC_0073313,0
4,ISIC_0073502,0


In [30]:
sam["target"] = probs

In [31]:
sam

Unnamed: 0,image_name,target
0,ISIC_0052060,0.219937
1,ISIC_0052349,0.489566
2,ISIC_0058510,0.491473
3,ISIC_0073313,0.418109
4,ISIC_0073502,0.183890
...,...,...
10977,ISIC_9992485,0.359625
10978,ISIC_9996992,0.524004
10979,ISIC_9997917,0.378495
10980,ISIC_9998234,0.267345


In [32]:
sam.to_csv("model_d201_smote_oversampling_undersampling.csv", index=False)