In [None]:
from tensorflow.keras.layers import (Conv2D, UpSampling2D, Conv2DTranspose, concatenate, MaxPooling2D, 
                                     Activation, Dropout, Cropping2D, Flatten, Dense, BatchNormalization)
from tensorflow.keras.models import Model
from tensorflow.keras import Input
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import ReduceLROnPlateau, EarlyStopping, ModelCheckpoint
from tensorflow.keras.losses import BinaryCrossentropy, CategoricalCrossentropy
import tensorflow as tf
import pandas as pd
from sklearn.model_selection import train_test_split
import numpy as np
from imblearn.over_sampling import SMOTE
from imblearn.under_sampling import RandomUnderSampler
from imblearn.pipeline import Pipeline
from sklearn.preprocessing import LabelEncoder

In [None]:
IMG_HEIGHT = 200
IMG_WIDTH = 200
TRAIN_IMAGE_PATH = "jpeg/train/"
TEST_IMAGE_PATH = "jpeg/test/"
BATCH_SIZE = 8
AUTO = tf.data.experimental.AUTOTUNE
REG = 0.0005
EPOCHS=40
METRICS = [
    tf.keras.metrics.TruePositives(name='tp'),
    tf.keras.metrics.FalsePositives(name='fp'),
    tf.keras.metrics.TrueNegatives(name='tn'),
    tf.keras.metrics.FalseNegatives(name='fn'), 
    tf.keras.metrics.BinaryAccuracy(name='accuracy'),
    tf.keras.metrics.Precision(name='precision'),
    tf.keras.metrics.Recall(name='recall'),
    tf.keras.metrics.AUC(name='auc'),
    tf.keras.metrics.binary_crossentropy,
]

In [None]:
train_df = pd.read_csv("train.csv")
test_df = pd.read_csv("test.csv")

In [None]:
train_df["image_name"] = train_df["image_name"].apply(lambda x: TRAIN_IMAGE_PATH + x + ".jpg")
test_df["image_name"] = test_df["image_name"].apply(lambda x: TEST_IMAGE_PATH + x + ".jpg")

In [None]:
train_df_target_1 = train_df[train_df["target"] == 1]
train_df_target_0 = train_df[train_df["target"] == 0]

In [None]:
del train_df

In [None]:
random_target_0 = np.random.randint(low=1, high=train_df_target_0.shape[0], 
                                    size=2 * train_df_target_1.shape[0])

In [None]:
train_df_d = pd.concat([train_df_target_0.iloc[random_target_0], train_df_target_1])

In [None]:
def decode_image(filename, label=None, image_size=(IMG_WIDTH, IMG_HEIGHT)):
    bits = tf.io.read_file(filename)
    image = tf.image.decode_jpeg(bits, channels=3)
    image = tf.image.convert_image_dtype(image, tf.float32)
    #image = tf.cast(image, tf.float32)/255.0
    image = tf.image.resize(image, size=image_size)
    
    if label is None:
        return image
    else:
        return image, label
    
def data_augment(image, label=None):
    image = tf.image.rgb_to_hsv(image)
    image = tf.image.random_flip_left_right(image)
    image = tf.image.adjust_brightness(image, 0.2)
    image = tf.image.rot90(image)
    image = tf.image.central_crop(image, central_fraction=0.3)
    if label is None:
        return image
    else:
        return image, label

In [None]:
# lb = LabelEncoder()
# image_names = train_df["image_name"].values
# train_df["image_name"] = lb.fit_transform(train_df["image_name"].values)
# train_df["target"] = train_df["target"].astype("int")
# train_df.head()
# map_name_no = dict(zip(train_df["image_name"], image_names))
# y_train = train_df["target"]
# x_train = train_df[["image_name"]]


# over = SMOTE(random_state=45, sampling_strategy=0.1)
# under = RandomUnderSampler(sampling_strategy=0.5)
# steps = [('o', over), ('u', under)]
# ppl = Pipeline(steps=steps)
# x_train, y_train = ppl.fit_resample(x_train, y_train)

In [None]:
x_train = train_df_d[["image_name"]]
y_train = train_df_d["target"].astype(np.float32).values
x_train.reset_index(drop=True, inplace=True)

In [None]:
x_train, x_val, y_train, y_val = train_test_split(x_train, y_train, test_size=0.2, random_state=45)
# x_train["image_name"] = x_train["image_name"].apply(lambda x: map_name_no[x])
# x_val["image_name"] = x_val["image_name"].apply(lambda x: map_name_no[x])

In [None]:
x_train.shape, x_val.shape, y_train.shape, y_val.shape

In [None]:
from sklearn.utils import class_weight
class_weights = class_weight.compute_class_weight('balanced', np.unique(y_train), y_train)

In [None]:
class_weights

In [None]:
train_dataset = (tf.data.Dataset
                 .from_tensor_slices((x_train["image_name"].values, y_train))
                 .map(decode_image, num_parallel_calls=AUTO)
                 .repeat()
                 .shuffle(512)
                 .batch(BATCH_SIZE)
                 .prefetch(AUTO)
                )

In [None]:
val_dataset = (tf.data.Dataset
               .from_tensor_slices((x_val["image_name"].values, y_val))
               .map(decode_image, num_parallel_calls=AUTO)
               .repeat()
               .shuffle(512)
               .batch(BATCH_SIZE)
               .prefetch(AUTO))

In [None]:
test_dataset = (tf.data.Dataset.from_tensor_slices((test_df.image_name))
                .map(decode_image, num_parallel_calls=AUTO)
                .batch(BATCH_SIZE))

In [None]:
lr = ReduceLROnPlateau(
    monitor="val_loss",
    patience=4,
    min_lr=0.000001,
    factor=0.5,
    verbose=1
)

In [None]:
es = EarlyStopping(monitor="val_loss", patience=10)

In [None]:
model_chkpt = ModelCheckpoint(filepath="best_model.h5")

In [None]:
def get_crfn(input_layer):
    
    # block - 1
    conv1 = Conv2D(filters=16*(2**0), kernel_size=(3,3), activation="relu", padding="same", name="conv1")(input_layer)
    conv2 = Conv2D(filters=16*(2**0), kernel_size=(3,3), activation="relu", padding="same", name="conv2")(conv1)
    
    # blovk - 2
    conv3 = Conv2D(filters=16*(2**1), kernel_size=(3,3), activation="relu", padding="same", name="conv3")(conv2)
    conv4 = Conv2D(filters=16*(2**1), kernel_size=(3,3), activation="relu", padding="same", name="conv4")(conv3)
    
    # block - 3
    conv5 = Conv2D(filters=16*(2**2), kernel_size=(3,3), activation="relu", padding="same", name="conv5")(conv4)
    conv6 = Conv2D(filters=16*(2**2), kernel_size=(3,3), activation="relu", padding="same", name="conv6")(conv5)
    conv7 = Conv2D(filters=16*(2**2), kernel_size=(3,3), activation="relu", padding="same", name="conv7")(conv6)
    
    # block - 4
    conv8 = Conv2D(filters=16*(2**3), kernel_size=(3,3), activation="relu", padding="same", name="conv8")(conv7)
    conv9 = Conv2D(filters=16*(2**3), kernel_size=(3,3), activation="relu", padding="same", name="conv9")(conv8)
    conv10 = Conv2D(filters=16*(2**3), kernel_size=(3,3), activation="relu", padding="same", name="conv10")(conv9)
    
    # block - 5
    conv11 = Conv2D(filters=16*(2**3), kernel_size=(3,3), activation="relu", padding="same", name="conv11")(conv10)
    conv12 = Conv2D(filters=16*(2**3), kernel_size=(3,3), activation="relu", padding="same", name="conv12")(conv11)
    conv13 = Conv2D(filters=16*(2**3), kernel_size=(3,3), activation="relu", padding="same", name="conv13")(conv12)
    
    #block - 6
    conv14 = Conv2D(filters=16*(2**4), kernel_size=(7,7), activation="relu", padding="same", name="conv14")(conv13)
    dr1 = Dropout(0.5)(conv14)
    conv15 = Conv2D(filters=16*(2**4), kernel_size=(1,1), activation="relu", padding="same", name="conv15")(dr1)
    dr2 = Dropout(0.5)(conv15)
    conv16 = Conv2D(filters=2, kernel_size=(1,1), activation="relu", padding="same", name="conv16")(dr2)
    
    flatten_ = Flatten()(conv16)
    dense_ = Dense(1, activation="sigmoid")(flatten_)
    model = Model(inputs = input_layer, outputs = dense_)
    return model

In [None]:
input_layer = Input((IMG_HEIGHT, IMG_WIDTH, 3))
model_crfn = get_crfn(input_layer)

In [None]:
model_crfn.summary()

In [None]:
model_crfn.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.0001), loss=BinaryCrossentropy(from_logits=True), metrics=METRICS)

In [None]:
history = model_crfn.fit(train_dataset, epochs=EPOCHS, callbacks=[lr, es, model_chkpt],
                        steps_per_epoch=x_train.shape[0]//BATCH_SIZE, validation_data=val_dataset,
                        validation_steps=x_val.shape[0]//BATCH_SIZE,
                        class_weight = class_weights)