In [None]:
# !wget https://web.archive.org/web/20150520175645/http://137.189.35.203/WebUI/CatDatabase/Data/CAT_DATASET_01.zip
# !unzip CAT_DATASET_01.zip
# !wget https://web.archive.org/web/20150520175645/http://137.189.35.203/WebUI/CatDatabase/Data/CAT_DATASET_02.zip
# !unzip CAT_DATASET_02.zip
# !wget https://web.archive.org/web/20150520175645/http://137.189.35.203/WebUI/CatDatabase/Data/00000003_015.jpg.cat
# !mv 00000003_015.jpg.cat "./CAT_00"
# !mv CAT_* "/content/drive/MyDrive/AdvancedComputerVision/BBR/images"

In [1]:
%cd "/content/drive/MyDrive/CAT DATABASE"

/content/drive/MyDrive/CAT DATABASE


In [2]:
!ls .

 00000003_015.jpg.cat   CAT_DATASET_02	   dataset.py	 Untitled0.ipynb
 CAT_DATASET_01        'Colab Notebooks'   __pycache__


In [3]:
import sys
import os
CURRENT_DIR = os.getcwd()
sys.path.append(CURRENT_DIR)

In [4]:
import sys
from dataset import Dataset
import numpy as np
import argparse
import random
from scipy import misc
from skimage import draw
from keras.models import Sequential
from keras.layers.core import Dense, Dropout, Activation, Flatten
from keras.optimizers import Adam
from keras.layers.convolutional import Convolution2D, MaxPooling2D
from keras.callbacks import ModelCheckpoint

np.random.seed(42)
random.seed(42)

In [5]:
MODEL_IMAGE_HEIGHT = 128
MODEL_IMAGE_WIDTH = 128
NB_LOAD_IMAGES = 9500
SPLIT = 0.1 # Tỉ lệ giữa train và validation là 9 : 1
EPOCHS = 150
BATCH_SIZE = 64
SAVE_WEIGHTS_FILEPATH = os.path.join(CURRENT_DIR, "cat_face_locator.weights")
SAVE_WEIGHTS_CHECKPOINT_FILEPATH = os.path.join(CURRENT_DIR, "cat_face_locator.best.weights")
SAVE_PREDICTIONS = True
SAVE_PREDICTIONS_DIR = os.path.join(CURRENT_DIR, "predictions")
DATASET_DIR = os.getcwd()

In [6]:
subdir_names = []

for i in (1, 2):
  cat_dataset = f'CAT_DATASET_0{i}'
  for subdir in os.listdir(os.path.join(os.getcwd(), cat_dataset)):
    subdir_names.append(os.path.join(cat_dataset, subdir))

dataset = Dataset(subdir_names)

In [7]:
def load_xy(dataset, nb_load):
    """Loads X and y (examples with labels) for the dataset.
    Examples are images.
    Labels are the coordinates of the face rectangles with their half-heights and half-widths
    (each normalized to 0-1 with respect to the image dimensions.)

    Args:
        dataset            The Dataset object.
        nb_load            Intended number of images to load.
    Returns:
        X (numpy array of shape (N, height, width, 3)),
        y (numpy array of shape (N, 4))
    """
    i = 0
    nb_images = min(nb_load, len(dataset.fps))
    X = np.zeros((nb_images, MODEL_IMAGE_HEIGHT, MODEL_IMAGE_WIDTH, 3), dtype=np.float32)
    y = np.zeros((nb_images, 4), dtype=np.float32)

    for img_idx, image in enumerate(dataset.get_images()):
        if img_idx % 100 == 0:
            print("Loading image %d of %d..." % (img_idx+1, nb_images))
        image.resize(MODEL_IMAGE_HEIGHT, MODEL_IMAGE_WIDTH)
        # Norm image
        X[i] = image.to_array() / 255.0
        # Prepare groundtruth
        face_rect = image.keypoints.get_rectangle(image)
        face_rect.normalize(image)
        center = face_rect.get_center()
        width = face_rect.get_width() / 2
        height = face_rect.get_height() / 2
        y[i] = [center.y, center.x, height, width]
        
        i += 1
        if i >= nb_images:
            break

    return X, y

def unnormalize_prediction(y, x, half_height, half_width, \
                           img_height=MODEL_IMAGE_HEIGHT, img_width=MODEL_IMAGE_WIDTH):
    """Transforms a predictions from normalized (0 to 1) y, x, half-width,
    half-height to pixel values (top left y, top left x, bottom right y,
    bottom right x).
    Args:
        y: Normalized y coordinate of rectangle center.
        x: Normalized x coordinate of rectangle center.
        half_height: Normalized height of rectangle.
        half_width: Normalized width of rectangle.
        img_height: Height of the image to use while unnormalizing.
        img_width: Width of the image to use while unnormalizing.
    Returns:
        (top left y in px, top left x in px, bottom right y in px,
        bottom right x in px)
    """
    # calculate x, y of corners in pixels
    tl_y = int((y - half_height) * img_height)
    tl_x = int((x - half_width) * img_width)
    br_y = int((y + half_height) * img_height)
    br_x = int((x + half_width) * img_width)

    # make sure that x and y coordinates are within image boundaries
    tl_y = clip(0, tl_y, img_height-2)
    tl_x = clip(0, tl_x, img_width-2)
    br_y = clip(0, br_y, img_height-1)
    br_x = clip(0, br_x, img_width-1)

    # make sure that top left corner is really top left of bottom right values
    if tl_y > br_y:
        tl_y, br_y = br_y, tl_y
    if tl_x > br_x:
        tl_x, br_x = br_x, tl_x

    # make sure that the area covered is at least 1px,
    # move preferably the top left corner
    # but dont move it outside of the image
    if tl_y == br_y:
        if tl_y == 0:
            br_y += 1
        else:
            tl_y -= 1

    if tl_x == br_x:
        if tl_x == 0:
            br_x += 1
        else:
            tl_x -= 1

    return tl_y, tl_x, br_y, br_x

def draw_predicted_rectangle(image_arr, y, x, half_height, half_width):
    """Draws a rectangle onto the image at the provided coordinates.
    Args:
        image_arr: Numpy array of the image.
        y: y-coordinate of the rectangle (normalized to 0-1).
        x: x-coordinate of the rectangle (normalized to 0-1).
        half_height: Half of the height of the rectangle (normalized to 0-1).
        half_width: Half of the width of the rectangle (normalized to 0-1).
    Returns:
        Modified image (numpy array)
    """
    #assert image_arr.shape[0] == 3, str(image_arr.shape)
    #height = image_arr.shape[1]
    #width = image_arr.shape[2]
    height = image_arr.shape[0]
    width = image_arr.shape[1]
    tl_y, tl_x, br_y, br_x = unnormalize_prediction(y, x, half_height, half_width, \
                                                    img_height=height, img_width=width)
    image_arr = np.copy(image_arr) * 255
    #image_arr = np.rollaxis(image_arr, 0, 3)
    return draw_rectangle(image_arr, tl_y, tl_x, br_y, br_x)

def draw_rectangle(img, tl_y, tl_x, br_y, br_x):
    """Draws a rectangle onto an image.
    Args:
        img: The image as a numpy array of shape (row, col, channel).
        tl_y: Top left y coordinate as pixel.
        tl_x: Top left x coordinate as pixel.
        br_y: Top left y coordinate as pixel.
        br_x: Top left x coordinate as pixel.
    Returns:
        image with rectangle
    """
    assert img.shape[2] == 3, img.shape[2]
    img = np.copy(img)
    lines = [
        (tl_y, tl_x, tl_y, br_x), # top left to top right
        (tl_y, br_x, br_y, br_x), # top right to bottom right
        (br_y, br_x, br_y, tl_x), # bottom right to bottom left
        (br_y, tl_x, tl_y, tl_x)  # bottom left to top left
    ]
    for y0, x0, y1, x1 in lines:
        rr, cc, val = draw.line_aa(y0, x0, y1, x1)
        img[rr, cc, 0] = val * 255

    return img

def clip(lower, val, upper):
    """Clips a value. For lower bound L, upper bound U and value V it
    makes sure that L <= V <= U holds.
    Args:
        lower: Lower boundary (including)
        val: The value to clip
        upper: Upper boundary (including)
    Returns:
        value within bounds
    """
    if val < lower:
        return lower
    elif val > upper:
        return upper
    else:
        return val



In [13]:
from keras.applications import VGG16

def create_model(image_height, image_width, loss, optimizer):
    """Creates the cat face locator model.

    Args:
        image_height: The height of the input images.
        image_width: The width of the input images.
        loss: Keras loss function (name or object), e.g. "mse".
        optimizer: Keras optimizer to use, e.g. Adam() or "sgd".
    Returns:
        Sequential
    """

    # Load VGG16 model pretrained on ImageNet dataset excluding fully connected layers (Classification Head) (include_top = False)
    vgg_conv = VGG16(weights='imagenet', include_top=False, input_shape=(image_height, image_width, 3))

    # Freeze the layers except the last 4 layers
    # CODE: for each layer in vgg_conv.layer --> set 'trainable' = False
    for layer in vgg_conv.layers:
      layer.trainable = False


    # Create the model
    model = Sequential()
    # CODE: Add vgg_conv to the model
    # ....
    model.add(vgg_conv)
    # After adding vgg_conv, the last tensor should be flatten to be a vector
    model.add(Flatten())

    # CODE: Add bounding box Regression Head here.
    # Please choose number of neuron in each layer and suitable activation functions
    # You may use Dropout for generalization
    # Adding 3 layers, 4096 neurons -> ReLU-> 4096 neurons -> ReLU 4 neurons
    model.add(Dense(4096, activation='relu'))
    model.add(Dense(4096, activation='relu'))
    model.add(Dense(4))

    





    # Compile with mean squared error
    print("Compiling...")
    model.compile(loss=loss, optimizer=optimizer)

    return model


In [9]:
# load images and labels
print("Loading images...")
X, y = load_xy(dataset, NB_LOAD_IMAGES)

# split train and val
nb_images = X.shape[0]
nb_train = int(nb_images * (1 - SPLIT))
X_train = X[0:nb_train, ...]
y_train = y[0:nb_train, ...]
X_val = X[nb_train:, ...]
y_val = y[nb_train:, ...]

Loading images...
Loading image 1 of 9500...
Loading image 101 of 9500...
Loading image 201 of 9500...
Loading image 301 of 9500...
Loading image 401 of 9500...
Loading image 501 of 9500...
Loading image 601 of 9500...
Loading image 701 of 9500...
Loading image 801 of 9500...
Loading image 901 of 9500...
Loading image 1001 of 9500...
Loading image 1101 of 9500...
Loading image 1201 of 9500...
Loading image 1301 of 9500...
Loading image 1401 of 9500...
Loading image 1501 of 9500...
Loading image 1601 of 9500...
Loading image 1701 of 9500...
Loading image 1801 of 9500...
Loading image 1901 of 9500...
Loading image 2001 of 9500...
Loading image 2101 of 9500...
Loading image 2201 of 9500...
Loading image 2301 of 9500...
Loading image 2401 of 9500...
Loading image 2501 of 9500...
Loading image 2601 of 9500...
Loading image 2701 of 9500...
Loading image 2801 of 9500...
Loading image 2901 of 9500...
Loading image 3001 of 9500...
Loading image 3101 of 9500...
Loading image 3201 of 9500...
Load

In [14]:
# STEP 2: create model with Regression Head only, remove Classification Head
# You can change loss and optimizer if you want
print("Creating model...")
model = create_model(MODEL_IMAGE_HEIGHT, MODEL_IMAGE_WIDTH, "mse", Adam())


Creating model...
Compiling...


In [15]:
import cv2

#  STEP 3: Train with Regression Head
# Fit data to model
checkpoint_cb = ModelCheckpoint(SAVE_WEIGHTS_CHECKPOINT_FILEPATH, verbose=1, \
                                save_best_only=True)
model.fit(X_train, y_train, batch_size=BATCH_SIZE, epochs=EPOCHS, validation_split=0.0,
          validation_data=(X_val, y_val),
          callbacks=[checkpoint_cb])

# save weights
print("Saving weights...")
model.save_weights(SAVE_WEIGHTS_FILEPATH, overwrite=True)

#model.load_weights(SAVE_WEIGHTS_CHECKPOINT_FILEPATH)

# save predictions on val set
if SAVE_PREDICTIONS:
    print("Saving example predictions...")
    y_preds = model.predict(X_val, batch_size=BATCH_SIZE)
    for img_idx, (y, x, half_height, half_width) in enumerate(y_preds):
        img_arr = draw_predicted_rectangle(X_val[img_idx], y, x, half_height, half_width)
        filepath = os.path.join(SAVE_PREDICTIONS_DIR, "%d.png" % (img_idx,))
        cv2.imwrite(filepath, np.squeeze(img_arr))


Epoch 1/150

Epoch 00001: val_loss improved from inf to 0.00782, saving model to /content/drive/My Drive/CAT DATABASE/cat_face_locator.best.weights
INFO:tensorflow:Assets written to: /content/drive/My Drive/CAT DATABASE/cat_face_locator.best.weights/assets
Epoch 2/150

Epoch 00002: val_loss improved from 0.00782 to 0.00440, saving model to /content/drive/My Drive/CAT DATABASE/cat_face_locator.best.weights
INFO:tensorflow:Assets written to: /content/drive/My Drive/CAT DATABASE/cat_face_locator.best.weights/assets
Epoch 3/150

Epoch 00003: val_loss did not improve from 0.00440
Epoch 4/150

Epoch 00004: val_loss did not improve from 0.00440
Epoch 5/150

Epoch 00005: val_loss improved from 0.00440 to 0.00373, saving model to /content/drive/My Drive/CAT DATABASE/cat_face_locator.best.weights
INFO:tensorflow:Assets written to: /content/drive/My Drive/CAT DATABASE/cat_face_locator.best.weights/assets
Epoch 6/150

Epoch 00006: val_loss did not improve from 0.00373
Epoch 7/150

Epoch 00007: val

In [None]:
# Step 4
# CODE: Apply Classification Head and Regression Head for a full system of localization

Hmm... Chỗ này chưa hiểu lắm, áp dụng classification mà dữ liệu chỉ toàn là mèo - phân biệt với cái gì, nhãn dữ liệu chưa có. 

Hơn nữa, để thiết mô hình có nhiều output thì cần sử dụng functional api, hàm create model lại sử dụng sequential api.