# Hourglass Network
## For keypoints extraction

In [3]:
import os
import cv2
import numpy as np
import keras.models as km
import keras.layers as kl


### Path to data

In [4]:
DATA_PATH = "YCB-Video_data/data/0010"
DATA_EXT = "-color.png"
IMAGES = [f for f in os.listdir(DATA_PATH) if f.endswith(DATA_EXT)]
sample = cv2.imread(f"{DATA_PATH}/{IMAGES[0]}")
DIM = sample.shape
# cv2.imshow("", sample)
# cv2.waitKey()
# cv2.destroyAllWindows()

In [5]:
def ResidualBottleneck(num_filters: int):
    # Residual Bottleneck Layer:
    # 3 conv
    # 1 add skip

    def inner(x):
        # Skip layer
        skip = kl.Conv2D(num_filters,
                         kernel_size=(1, 1),
                         padding="same",
                         activation="relu")(x)
        # 1st ConvLayer  num_filters -> num_filters/2
        _x = kl.Conv2D(num_filters//2,
                       kernel_size=(1, 1),
                       padding="same",
                       activation="relu")(x)
        _x = kl.BatchNormalization()(_x)
        # 2nd ConvLayer num_filters/2 -> num_filters/4
        _x = kl.Conv2D(num_filters//2,
                       kernel_size=(3, 3),
                       padding="same",
                       activation="relu")(_x)
        _x = kl.BatchNormalization()(_x)
        # 3rd ConvLayer num_filters/4 -> num_filters/4
        _x = kl.Conv2D(num_filters,
                       kernel_size=(1, 1),
                       padding="same",
                       activation="relu")(_x)
        _x = kl.BatchNormalization()(_x)
        return kl.Add()([skip, _x])

    return inner


In [9]:
def Front(num_filters: int):
    # Front layer - reduces the resolution to 1/4:
    # 1 7x7 conv
    # 3 residual

    def inner(x):
        _x = kl.Conv2D(num_filters//4,
                       (7, 7),
                       (2, 2),
                       padding="same",
                       activation="relu",
                       data_format="channels_last")(x)
        _x = kl.BatchNormalization()(_x)

        _x = ResidualBottleneck(num_filters//2)(_x)
        _x = kl.MaxPool2D(pool_size=(2, 2), strides=(2, 2))(_x)

        _x = ResidualBottleneck(num_filters//2)(_x)
        return ResidualBottleneck(num_filters)(_x)

    return inner


In [10]:
num_filters = 256

# Input layer
input = kl.Input(DIM)
Front(num_filters)(input).shape

TensorShape([None, 120, 160, 256])