In [18]:
import os
os.environ["TF_CPP_MIN_LOG_LEVEL"] = "2"
import numpy as np
import pandas as pd
import cv2
from glob import glob
import scipy.io
from sklearn.model_selection import train_test_split
import tensorflow as tf
from tensorflow.keras.callbacks import ModelCheckpoint, ReduceLROnPlateau, EarlyStopping, CSVLogger
from tensorflow.keras.layers import Conv2D, BatchNormalization, Activation, MaxPool2D, Conv2DTranspose, Concatenate, Input
from tensorflow.keras.models import Model
from tensorflow.keras.applications import MobileNetV2

In [25]:
def conv_block(input, num_filters):
    x = Conv2D(num_filters, 3, padding="same")(input)
    x = BatchNormalization()(x)
    x = Activation("relu")(x)

    x = Conv2D(num_filters, 3, padding="same")(x)
    x = BatchNormalization()(x)
    x = Activation("relu")(x)

    return x

def decoder_block(input, skip_features, num_filters):
    x = Conv2DTranspose(num_filters, (2, 2), strides=2, padding="same")(input)
    x = Concatenate()([x, skip_features])
    x = conv_block(x, num_filters)
    return x

def build_unet(input_shape, num_classes):
    inputs = Input(input_shape)
    
    # Load MobileNetV2 as the encoder
    mobile_net = MobileNetV2(include_top=False, weights='imagenet', input_tensor=inputs)
    
    # Define skip connections from MobileNetV2
    skip_connections = [
        mobile_net.get_layer("block_1_expand_relu").output,  # 64 filters
        mobile_net.get_layer("block_3_expand_relu").output,  # 96 filters
        mobile_net.get_layer("block_6_expand_relu").output,  # 144 filters
        mobile_net.get_layer("block_13_expand_relu").output  # 384 filters
    ]

    # Bottom of the U-Net
    bottom = mobile_net.get_layer("block_16_project").output  # 1280 filters

    # Decoder
    d1 = decoder_block(bottom, skip_connections[3], 512)
    d2 = decoder_block(d1, skip_connections[2], 256)
    d3 = decoder_block(d2, skip_connections[1], 128)
    d4 = decoder_block(d3, skip_connections[0], 64)

    # Adjust the output layer to ensure the final output shape matches the input shape
    x = Conv2DTranspose(32, (2, 2), strides=2, padding="same")(d4)
    x = conv_block(x, 32)

    outputs = Conv2D(num_classes, 1, padding="same", activation="softmax")(x)

    model = Model(inputs, outputs, name="U-Net_MobileNetV2")
    return model

In [4]:
global IMG_H
global IMG_W
global NUM_CLASSES
global CLASSES
global COLORMAP

In [5]:
def create_dir(path):
    if not os.path.exists(path):
        os.makedirs(path)

""" Load and split the dataset """
def load_dataset(path, split=0.2):
    train_x = sorted(glob(os.path.join(path, "Training", "Images", "*")))[:10000]
    train_y = sorted(glob(os.path.join(path, "Training", "Categories", "*")))[:10000]

    split_size = int(split * len(train_x))

    train_x, valid_x = train_test_split(train_x, test_size=split_size, random_state=42)
    train_y, valid_y = train_test_split(train_y, test_size=split_size, random_state=42)

    train_x, test_x = train_test_split(train_x, test_size=split_size, random_state=42)
    train_y, test_y = train_test_split(train_y, test_size=split_size, random_state=42)

    return (train_x, train_y), (valid_x, valid_y), (test_x, test_y)

In [6]:
def get_colormap(path):
    mat_path = os.path.join(path, "human_colormap.mat")
    colormap = scipy.io.loadmat(mat_path)["colormap"]
    colormap = colormap * 256
    colormap = colormap.astype(np.uint8)
    colormap = [[c[2], c[1], c[0]] for c in colormap]

    classes = [
        "Background",
        "Hat",
        "Hair",
        "Glove",
        "Sunglasses",
        "UpperClothes",
        "Dress",
        "Coat",
        "Socks",
        "Pants",
        "Torso-skin",
        "Scarf",
        "Skirt",
        "Face",
        "Left-arm",
        "Right-arm",
        "Left-leg",
        "Right-leg",
        "Left-shoe",
        "Right-shoe"
    ]

    return classes, colormap

In [7]:
def read_image_mask(x, y):
    """ Reading """
    x = cv2.imread(x, cv2.IMREAD_COLOR)
    y = cv2.imread(y, cv2.IMREAD_COLOR)
    assert x.shape == y.shape

    """ Resizing """
    x = cv2.resize(x, (IMG_W, IMG_H))
    y = cv2.resize(y, (IMG_W, IMG_H))

    """ Image processing """
    x = x / 255.0
    x = x.astype(np.float32)

    """ Mask processing """
    output = []
    for color in COLORMAP:
        cmap = np.all(np.equal(y, color), axis=-1)
        output.append(cmap)
    output = np.stack(output, axis=-1)
    output = output.astype(np.uint8)

    return x, output

In [8]:
def preprocess(x, y):
    def f(x, y):
        x = x.decode()
        y = y.decode()
        image, mask = read_image_mask(x, y)
        return image, mask

    image, mask = tf.numpy_function(f, [x, y], [tf.float32, tf.uint8])
    image.set_shape([IMG_H, IMG_W, 3])
    mask.set_shape([IMG_H, IMG_W, NUM_CLASSES])

    return image, mask

def tf_dataset(x, y, batch=8):
    dataset = tf.data.Dataset.from_tensor_slices((x, y))
    dataset = dataset.shuffle(buffer_size=5000)
    dataset = dataset.map(preprocess)
    dataset = dataset.batch(batch)
    dataset = dataset.prefetch(2)
    return dataset

In [9]:
np.random.seed(42)
tf.random.set_seed(42)

create_dir("/kaggle/working/files")

In [10]:
IMG_H = 320
IMG_W = 416
NUM_CLASSES = 20
input_shape = (IMG_H, IMG_W, 3)

batch_size = 16
lr = 1e-4
num_epochs = 100

In [20]:
dataset_path = "/kaggle/input/instance-image-segmentation/instance-level_human_parsing/instance-level_human_parsing"

model_path = os.path.join("/kaggle/working/files", "model.keras")
csv_path = os.path.join("/kaggle/working/files", "data.csv")

In [21]:
(train_x, train_y), (valid_x, valid_y), (test_x, test_y) = load_dataset(dataset_path)
print(f"Train: {len(train_x)}/{len(train_y)} - Valid: {len(valid_x)}/{len(valid_y)} - Test: {len(test_x)}/{len(test_x)}")
print("")

CLASSES, COLORMAP = get_colormap(dataset_path)

train_dataset = tf_dataset(train_x, train_y, batch=batch_size)
valid_dataset = tf_dataset(valid_x, valid_y, batch=batch_size)

Train: 6000/6000 - Valid: 2000/2000 - Test: 2000/2000



In [26]:
model = build_unet(input_shape, NUM_CLASSES)
# model.load_weights(model_path)
model.compile(
    loss="categorical_crossentropy",
    optimizer=tf.keras.optimizers.Adam(lr),
    metrics=['accuracy']
)

  mobile_net = MobileNetV2(include_top=False, weights='imagenet', input_tensor=inputs)


In [28]:
callbacks = [
        ModelCheckpoint(model_path, verbose=1, save_best_only=True),
        ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=5, min_lr=1e-7, verbose=1),
        CSVLogger(csv_path, append=True),
        EarlyStopping(monitor='val_loss', patience=20, restore_best_weights=False)
    ]

model.fit(
    train_dataset,
    validation_data=valid_dataset,
    epochs=num_epochs,
    callbacks=callbacks
)

Epoch 1/100


2024-07-19 14:29:30.779401: E external/local_xla/xla/service/slow_operation_alarm.cc:65] Trying algorithm eng0{} for conv (f32[128,272,3,3]{3,2,1,0}, u8[0]{0}) custom-call(f32[16,272,80,104]{3,2,1,0}, f32[16,128,80,104]{3,2,1,0}), window={size=3x3 pad=1_1x1_1}, dim_labels=bf01_oi01->bf01, custom_call_target="__cudnn$convBackwardFilter", backend_config={"conv_result_scale":1,"activation_mode":"kNone","side_input_scale":0,"leakyrelu_alpha":0} is taking a while...
2024-07-19 14:29:31.372194: E external/local_xla/xla/service/slow_operation_alarm.cc:133] The operation took 1.592914342s
Trying algorithm eng0{} for conv (f32[128,272,3,3]{3,2,1,0}, u8[0]{0}) custom-call(f32[16,272,80,104]{3,2,1,0}, f32[16,128,80,104]{3,2,1,0}), window={size=3x3 pad=1_1x1_1}, dim_labels=bf01_oi01->bf01, custom_call_target="__cudnn$convBackwardFilter", backend_config={"conv_result_scale":1,"activation_mode":"kNone","side_input_scale":0,"leakyrelu_alpha":0} is taking a while...
2024-07-19 14:29:32.932836: E exter

[1m375/375[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1s/step - accuracy: 0.4442 - loss: 2.0942
Epoch 1: val_loss improved from inf to 1.35768, saving model to /kaggle/working/files/model.keras
[1m375/375[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m811s[0m 2s/step - accuracy: 0.4447 - loss: 2.0931 - val_accuracy: 0.6786 - val_loss: 1.3577 - learning_rate: 1.0000e-04
Epoch 2/100
[1m375/375[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1s/step - accuracy: 0.7539 - loss: 1.0384
Epoch 2: val_loss improved from 1.35768 to 1.10712, saving model to /kaggle/working/files/model.keras
[1m375/375[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m664s[0m 2s/step - accuracy: 0.7539 - loss: 1.0382 - val_accuracy: 0.7103 - val_loss: 1.1071 - learning_rate: 1.0000e-04
Epoch 3/100
[1m375/375[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1s/step - accuracy: 0.7984 - loss: 0.7745
Epoch 3: val_loss improved from 1.10712 to 0.84696, saving model to /kaggle/working/files/mo

<keras.src.callbacks.history.History at 0x7b91e083fa00>

In [29]:
tf.saved_model.save(
  model, '/kaggle/working/')

In [None]:
# Convert the model
converter = tf.lite.TFLiteConverter.from_saved_model(saved_model_dir) # path to the SavedModel directory
tflite_model = converter.convert()

# Save the model.
with open('model.tflite', 'wb') as f:
  f.write(tflite_model)


In [30]:
from sklearn.metrics import accuracy_score, f1_score, jaccard_score, precision_score, recall_score

In [31]:
def grayscale_to_rgb(mask, classes, colormap):
    h, w, _ = mask.shape
    mask = mask.astype(np.int32)
    output = []

    for i, pixel in enumerate(mask.flatten()):
        output.append(colormap[pixel])

    output = np.reshape(output, (h, w, 3))
    return output

def save_results(image, mask, pred, save_image_path):
    h, w, _ = image.shape
    line = np.ones((h, 10, 3)) * 255

    pred = np.expand_dims(pred, axis=-1)
    pred = grayscale_to_rgb(pred, CLASSES, COLORMAP)

    cat_images = np.concatenate([image, line, mask, line, pred], axis=1)
    cv2.imwrite(save_image_path, cat_images)

In [15]:
model_path = os.path.join("/kaggle/input/instance-degmentation/tensorflow2/default/1", "model.keras")
model = tf.keras.models.load_model(model_path)

In [None]:
converter = tf.lite.TFLiteConverter.from_keras_model(model)
tflite_model = converter.convert()

# Save the model.
with open('/kaggle/working/model.tflite', 'wb') as f:
    f.write(tflite_model)

In [34]:
SCORE = []
for x, y in zip(test_x, test_y):
    name = x.split("/")[-1].split(".")[0]

    image = cv2.imread(x, cv2.IMREAD_COLOR)
    image = cv2.resize(image, (IMG_W, IMG_H))
    image_x = image
    image = image/255.0
    image = np.expand_dims(image, axis=0)

    mask = cv2.imread(y, cv2.IMREAD_COLOR)
    mask = cv2.resize(mask, (IMG_W, IMG_H))
    mask_x = mask
    onehot_mask = []
    for color in COLORMAP:
        cmap = np.all(np.equal(mask, color), axis=-1)
        onehot_mask.append(cmap)
    onehot_mask = np.stack(onehot_mask, axis=-1)
    onehot_mask = np.argmax(onehot_mask, axis=-1)
    onehot_mask = onehot_mask.astype(np.int32)

    pred = model.predict(image, verbose=0)[0]
    pred = np.argmax(pred, axis=-1)
    pred = pred.astype(np.float32)
    
#     save_image_path = f"/kaggle/working/files/{name}.png"
#     save_results(image_x, mask_x, pred, save_image_path)
    

    onehot_mask = onehot_mask.flatten()
    pred = pred.flatten()

    labels = [i for i in range(NUM_CLASSES)]

    f1_value = f1_score(onehot_mask, pred, labels=labels, average=None, zero_division=0)
    jac_value = jaccard_score(onehot_mask, pred, labels=labels, average=None, zero_division=0)

    SCORE.append([f1_value, jac_value])

In [35]:
score = np.array(SCORE)
score = np.mean(score, axis=0)

f = open("/kaggle/working/files/score.csv", "w")
f.write("Class,F1,Jaccard\n")

l = ["Class", "F1", "Jaccard"]
print(f"{l[0]:15s} {l[1]:10s} {l[2]:10s}")
print("-"*35)

for i in range(score.shape[1]):
    class_name = CLASSES[i]
    f1 = score[0, i]
    jac = score[1, i]
    dstr = f"{class_name:15s}: {f1:1.5f} - {jac:1.5f}"
    print(dstr)
    f.write(f"{class_name:15s},{f1:1.5f},{jac:1.5f}\n")

print("-"*35)
class_mean = np.mean(score, axis=-1)
class_name = "Mean"
f1 = class_mean[0]
jac = class_mean[1]
dstr = f"{class_name:15s}: {f1:1.5f} - {jac:1.5f}"
print(dstr)
f.write(f"{class_name:15s},{f1:1.5f},{jac:1.5f}\n")

f.close()

Class           F1         Jaccard   
-----------------------------------
Background     : 0.91726 - 0.85489
Hat            : 0.17345 - 0.14142
Hair           : 0.70944 - 0.60189
Glove          : 0.00000 - 0.00000
Sunglasses     : 0.00000 - 0.00000
UpperClothes   : 0.54907 - 0.44535
Dress          : 0.04839 - 0.03812
Coat           : 0.29541 - 0.24208
Socks          : 0.00000 - 0.00000
Pants          : 0.40370 - 0.33231
Torso-skin     : 0.58930 - 0.46888
Scarf          : 0.00258 - 0.00158
Skirt          : 0.02384 - 0.01836
Face           : 0.80208 - 0.70738
Left-arm       : 0.35982 - 0.27104
Right-arm      : 0.40433 - 0.30608
Left-leg       : 0.06772 - 0.04761
Right-leg      : 0.05562 - 0.03821
Left-shoe      : 0.05771 - 0.03415
Right-shoe     : 0.07405 - 0.04564
-----------------------------------
Mean           : 0.27669 - 0.22975
