In [1]:
import pandas as pd
import numpy as np
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.layers import GlobalAveragePooling2D
from tensorflow.keras.models import Model
import tensorflow.keras.backend as K
from sklearn.model_selection import train_test_split
import tensorflow as tf
from tensorflow.keras.callbacks import ReduceLROnPlateau
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.applications import DenseNet201
from tensorflow.keras.layers import Conv2D, BatchNormalization, GlobalAveragePooling2D, Flatten, Dropout, Dense, MaxPooling2D
from tensorflow.keras.regularizers import l2
from tensorflow.keras.models import Model

In [2]:
TRAIN_IMAGE_PATH = "jpeg/train/"
TEST_IMAGE_PATH = "jpeg/test/"
IMG_HEIGHT = 500
IMG_WIDTH = 500
BATCH_SIZE=64
AUTO = tf.data.experimental.AUTOTUNE
REG = 0.0005
EPOCHS=20

In [3]:
train_df = pd.read_csv("train.csv")
test_df = pd.read_csv("test.csv")

In [4]:
train_df["image_name"] = train_df["image_name"].apply(lambda x: TRAIN_IMAGE_PATH + x + ".jpg")
test_df["image_name"] = test_df["image_name"].apply(lambda x: TEST_IMAGE_PATH + x + ".jpg")

In [5]:
train_df, val_df = train_test_split(train_df, test_size=0.1, random_state=45, shuffle=True)

In [6]:
train_df.shape, val_df.shape

((29813, 8), (3313, 8))

In [7]:
def decode_image(filename, label=None, image_size=(IMG_WIDTH, IMG_HEIGHT)):
    bits = tf.io.read_file(filename)
    image = tf.image.decode_jpeg(bits, channels=3)
    image = tf.cast(image, tf.float32)/255.0
    image = tf.image.resize(image, size=image_size)
    
    if label is None:
        return image
    else:
        return image, label

In [8]:
def data_augment(image, label=None):
    image = tf.image.random_flip_left_right(image)
    if label is None:
        return image
    else:
        return image, label

In [9]:
def compute_class_weights(labels):
    total_labels = labels.shape[0]
    
    positive_labels = K.sum(labels, axis=0)/total_labels
    negative_labels = 1 - positive_labels
    return positive_labels, negative_labels

In [10]:
train_dataset = (tf.data.Dataset
                 .from_tensor_slices((train_df.image_name, train_df.target))
                 .map(decode_image, num_parallel_calls=AUTO)
                 .map(data_augment, num_parallel_calls=AUTO)
                 .repeat()
                 .shuffle(2048)
                 .batch(BATCH_SIZE)
                 .prefetch(AUTO)
                )

In [11]:
val_dataset = (tf.data.Dataset
               .from_tensor_slices((val_df.image_name, val_df.target))
               .map(decode_image, num_parallel_calls=AUTO)
               .map(data_augment, num_parallel_calls=AUTO)
               .repeat()
               .shuffle(2048)
               .batch(BATCH_SIZE)
               .prefetch(AUTO))

In [12]:
test_dataset = (tf.data.Dataset.from_tensor_slices((test_df.image_name))
                .map(decode_image, num_parallel_calls=AUTO)
                .batch(BATCH_SIZE))

In [13]:
lr = ReduceLROnPlateau(
    monitor="val_loss",
    patience=4,
    min_lr=0.000001,
    factor=0.5,
    verbose=1
)

In [14]:
es = EarlyStopping(monitor="val_loss", patience=10)

In [15]:
model_d201 = DenseNet201(include_top=False, weights="imagenet", input_shape=(IMG_WIDTH, IMG_HEIGHT,3))
model_d201.trainable=False

model = Conv2D(filters=32, kernel_size=(3,3), data_format="channels_last", activation="relu", kernel_regularizer=l2(REG))(model_d201.output)
model = BatchNormalization(axis=-1, center=True, scale=False)(model)
model = Conv2D(filters=32, kernel_size=(3,3), activation="relu", kernel_regularizer=l2(REG))(model)
model = BatchNormalization(axis=-1, center=True, scale=False)(model)
model = MaxPooling2D()(model)
model = Dropout(0.25)(model)

model = Conv2D(filters=64, kernel_size=(3,3), data_format="channels_last", activation="relu", kernel_regularizer=l2(REG))(model_d201.output)
model = BatchNormalization(axis=-1, center=True, scale=False)(model)
model = Conv2D(filters=64, kernel_size=(3,3), activation="relu", kernel_regularizer=l2(REG))(model)
model = BatchNormalization(axis=-1, center=True, scale=False)(model)
model = MaxPooling2D()(model)
model = Dropout(0.25)(model)


model = Flatten()(model)
model = Dense(256, activation="relu")(model)
model = BatchNormalization(axis=-1, center=True, scale=False)(model)
model = Dropout(0.5)(model)
model = Dense(64, activation="relu")(model)
model = BatchNormalization(axis=-1, center=True, scale=False)(model)

output = Dense(1, activation="sigmoid")(model)
model_d201 = Model(inputs=model_d201.input, outputs=output)
model_d201.summary()
model_d201.compile(optimizer="adam", loss="binary_crossentropy", metrics=[tf.keras.metrics.AUC()])

Model: "model"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            [(None, 500, 500, 3) 0                                            
__________________________________________________________________________________________________
zero_padding2d (ZeroPadding2D)  (None, 506, 506, 3)  0           input_1[0][0]                    
__________________________________________________________________________________________________
conv1/conv (Conv2D)             (None, 250, 250, 64) 9408        zero_padding2d[0][0]             
__________________________________________________________________________________________________
conv1/bn (BatchNormalization)   (None, 250, 250, 64) 256         conv1/conv[0][0]                 
______________________________________________________________________________________________

conv5_block24_2_conv (Conv2D)   (None, 15, 15, 32)   36864       conv5_block24_1_relu[0][0]       
__________________________________________________________________________________________________
conv5_block24_concat (Concatena (None, 15, 15, 1664) 0           conv5_block23_concat[0][0]       
                                                                 conv5_block24_2_conv[0][0]       
__________________________________________________________________________________________________
conv5_block25_0_bn (BatchNormal (None, 15, 15, 1664) 6656        conv5_block24_concat[0][0]       
__________________________________________________________________________________________________
conv5_block25_0_relu (Activatio (None, 15, 15, 1664) 0           conv5_block25_0_bn[0][0]         
__________________________________________________________________________________________________
conv5_block25_1_conv (Conv2D)   (None, 15, 15, 128)  212992      conv5_block25_0_relu[0][0]       
__________

In [16]:
compute_class_weights(train_df.target.values)

(<tf.Tensor: shape=(), dtype=float64, numpy=0.01801227652366417>,
 <tf.Tensor: shape=(), dtype=float64, numpy=0.9819877234763358>)

In [None]:
history = model_d201.fit(train_dataset, epochs=EPOCHS, callbacks=[lr, es],
                        steps_per_epoch=train_df.shape[0]//BATCH_SIZE, validation_data=val_dataset,
                        validation_steps=val_df.shape[0]//BATCH_SIZE,
                        class_weight=compute_class_weights(train_df.target.values),)

Train for 465 steps, validate for 51 steps
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 00009: ReduceLROnPlateau reducing learning rate to 0.0005000000237487257.
Epoch 10/20

ERROR:root:Internal Python error in the inspect module.
Below is the traceback from this internal error.

ERROR:root:Internal Python error in the inspect module.
Below is the traceback from this internal error.



In [None]:
probs = model_d201.predict(test_dataset, verbose = 1)

In [None]:
probs

In [None]:
sam = pd.read_csv("sample_submission.csv")

In [None]:
sam.head()


In [None]:
sam["target"] = probs

In [None]:
sam

In [None]:
sam.to_csv("res_50.csv", index=False)