## HourGlass Network.

Creating a smaller version of the hourglass network (encoding/decoding network). The hourglass network is usually used in a stacked fashion. In the CenterNet Model (which is a Object Detection Algorithm/Architecture), they use a stacked hourglass network for feature extraction.

In [1]:
import torch
import torch.nn as nn
from torchvision import transforms
from scipy.io import loadmat
import cv2 as cv
from PIL import Image, ImageDraw
import numpy as np

The BottleNeck block is a Residual Module, which is used to

In [2]:
class ResNetBlock(nn.Module):
    """Creates a ResNet Block for feature extraction."""

    def __init__(self, inp_dim, out_dim):
        """Instantiates the Residual Module."""
        super(ResNetBlock, self).__init__()

        # half the output dimension.
        out = out_dim//2

        # sequence of layers (Batch Normalization, ReLu, Convolution)
        self.relu = nn.ReLU()
        self.bn1 = nn.BatchNorm2d(inp_dim)
        self.conv1 = nn.Conv2d(inp_dim, out, kernel_size=1, stride=1, padding=0, bias=True)
        self.bn2 = nn.BatchNorm2d(out)
        self.conv2 = nn.Conv2d(out, out, kernel_size=3, stride=1, padding=1, bias=True)
        self.bn3 = nn.BatchNorm2d(out)
        self.conv3 = nn.Conv2d(out, out_dim, kernel_size=1, stride=1, padding=0, bias=True)

        # add a skip layer for residual information
        self.skip_layer = nn.Conv2d(inp_dim, out_dim, kernel_size=1, stride=1, padding=0, bias=True)
        self.need_skip = not (inp_dim == out_dim)

    def forward(self, x):
        """Defines a forward pass of the ResNet block."""

        # save residual information.
        residual = self.skip_layer(x) if self.need_skip else x
        out = self.bn1(x)
        out = self.relu(out)
        out = self.conv1(out)
        out = self.bn2(out)
        out = self.relu(out)
        out = self.conv2(out)
        out = self.bn3(out)
        out = self.relu(out)
        out = self.conv3(out)
        out += residual

        return out

In [3]:
class Hourglass(nn.Module):
    """Hourglass Module."""

    def __init__(self, n, filters, bn=None):
        """Creates an Hourglass Module/Network."""

        super(Hourglass, self).__init__()
        self.n = n
        # up-sampling data.
        self.up1 = ResNetBlock(filters, filters)
        # encoding/feature extraction.
        self.pool1 = nn.MaxPool2d(2, 2)
        self.low1 = ResNetBlock(filters, filters)
        # recursion to add more resnet blocks.
        self.low2 = Hourglass(n-1, filters, bn=bn) if self.n > 1 else ResNetBlock(filters, filters)
        self.low3 = ResNetBlock(filters, filters)
        # up-sampling data.
        self.up2 = nn.Upsample(scale_factor=2, mode='nearest')

    def forward(self, x):
        """Forward pass of the Hourglass Module."""

        up1  = self.up1(x)
        pool1 = self.pool1(x)
        # encoding (lower levels).
        low1 = self.low1(pool1)
        low2 = self.low2(low1)
        low3 = self.low3(low2)
        up2  = self.up2(low3)

        # decoding (up-sampling).
        return up1 + up2

In [4]:
class HourGlassNetwork(nn.Module):
    """Creates an Hourglass Network."""

    def __init__(self, input_shape=(256, 256, 3), num_stack=1, num_residual=1, num_heatmap=1):
        """Instantiates the network (we want single center key points.)"""

        super(HourGlassNetwork, self).__init__()

        # initial feature extraction layers.
        self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, bias=True)
        self.bn1 = nn.BatchNorm2d(64)
        self.relu = nn.ReLU()
        self.res1 = ResNetBlock(64, 128)
        self.pool = nn.MaxPool2d(2, 2)
        self.res2 = ResNetBlock(128, 128)
        self.res3 = ResNetBlock(128, 256)
        self.hg1 = Hourglass(4, 256)
        self.linear = nn.Conv2d(256, 256, kernel_size=1, padding=0, bias=True)
        self.bn2 = nn.BatchNorm2d(256)
        self.y = nn.Conv2d(256, num_heatmap, kernel_size=1, padding=0, bias=True)

    def forward(self, x):
        """Forward pass of the model."""

        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)
        out = self.res1(out)
        out = self.pool(out)
        out = self.res2(out)
        out = self.res3(out)
        out = self.hg1(out)
        out = self.linear(out)
        out = self.bn2(out)
        out = self.relu(out)
        out = self.y(out)

        return out

In [5]:
# Device will determine whether to run the training on GPU or CPU.
DEVICE = torch.device('mps' if torch.has_mps else 'cuda' if torch.cuda.is_available() else 'cpu')

# create the model.
model = HourGlassNetwork()
model.to(DEVICE)

HourGlassNetwork(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3))
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU()
  (res1): ResNetBlock(
    (relu): ReLU()
    (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1))
    (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (bn3): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (conv3): Conv2d(64, 128, kernel_size=(1, 1), stride=(1, 1))
    (skip_layer): Conv2d(64, 128, kernel_size=(1, 1), stride=(1, 1))
  )
  (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (res2): ResNetBlock(
    (relu): ReLU()
    (bn1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running

In [6]:
DEVICE

device(type='mps')

In [7]:
img_path = r'/Users/chiahaohsutai/Documents/GitHub/PRW/frames/c1s1_000801.jpg'
image = cv.imread(img_path)
image = cv.cvtColor(image, cv.COLOR_BGR2RGB)
image = cv.resize(image, (256, 256))

MEAN = [0.485, 0.456, 0.406]
STD = [0.229, 0.224, 0.225]

process = transforms.Compose([
    transforms.ToPILImage(),
    transforms.ToTensor(),
    transforms.Normalize(mean=MEAN, std=STD)
])

img = process(image).unsqueeze(0).to(DEVICE)

In [8]:
img.shape

torch.Size([1, 3, 256, 256])

In [9]:
model.eval()

with torch.no_grad():
    pred = model(img)

In [10]:
pred.shape

torch.Size([1, 1, 64, 64])

In [11]:
pred

tensor([[[[-0.7719, -0.7055, -0.6553,  ..., -0.3752, -0.3756, -0.3780],
          [-0.7516, -0.6886, -0.6303,  ..., -0.3754, -0.3848, -0.3780],
          [-0.7233, -0.6466, -0.5703,  ..., -0.3787, -0.4078, -0.4102],
          ...,
          [-0.2260, -0.2165, -0.2095,  ..., -0.2683, -0.2704, -0.2730],
          [-0.2055, -0.2107, -0.2116,  ..., -0.2524, -0.2474, -0.2523],
          [-0.2095, -0.2072, -0.2014,  ..., -0.2529, -0.2542, -0.2572]]]],
       device='mps:0')

In [12]:
pred.shape

torch.Size([1, 1, 64, 64])

In [13]:
img

tensor([[[[ 2.2318,  2.2489,  2.2489,  ...,  0.1426,  0.3652,  0.1597],
          [ 2.2318,  2.2489,  2.2489,  ...,  0.1768,  0.2282,  0.0912],
          [ 2.2489,  2.2489,  2.2489,  ...,  0.1426,  0.2453,  0.1597],
          ...,
          [ 0.1597,  0.1254,  0.1768,  ..., -0.5424, -0.4911, -0.6109],
          [ 0.1597,  0.0741,  0.0741,  ..., -0.6109, -0.6109, -0.5253],
          [ 0.0912, -0.0287, -0.0629,  ..., -0.6109, -0.6109, -0.5424]],

         [[ 2.4111,  2.4286,  2.4286,  ...,  0.2752,  0.4153,  0.2927],
          [ 2.4111,  2.4286,  2.4286,  ...,  0.3102,  0.2927,  0.2227],
          [ 2.4286,  2.4286,  2.4286,  ...,  0.2752,  0.3102,  0.2927],
          ...,
          [ 0.2227,  0.1877,  0.2577,  ..., -0.4951, -0.4076, -0.5476],
          [ 0.2577,  0.1702,  0.2052,  ..., -0.5126, -0.4951, -0.4601],
          [ 0.1877,  0.0651,  0.0651,  ..., -0.5126, -0.4951, -0.4951]],

         [[ 2.6226,  2.6400,  2.6400,  ...,  0.4962,  0.6531,  0.5136],
          [ 2.6226,  2.6400,  

In [14]:
ann_path = r'/Users/chiahaohsutai/Documents/GitHub/PRW/annotations/c1s1_000801.jpg.mat'
annotation = loadmat(ann_path)['box_new']
annotation = [ann[1:] for ann in annotation]

In [15]:
annotation

[array([1460.84557235,  405.20842333,  151.61987041,  387.21382289]),
 array([1018.75053996,  419.20410367,   60.64794816,  177.27861771]),
 array([191.90388769, 468.18898488,  55.98272138, 139.95680346]),
 array([853.1349892 , 442.53023758,  48.98488121, 121.29589633])]

In [16]:
centers = []
for ann in annotation:
    x, y, w, h = ann
    centers.append((x+(w/2), y+(h/2)))

In [17]:
centers

[(1536.6555075593953, 598.8153347732183),
 (1049.0745140388772, 507.8434125269977),
 (219.89524838012954, 538.1673866090715),
 (877.6274298056157, 503.1781857451402)]

In [18]:
im = Image.open(img_path)
draw = ImageDraw.Draw(im)
for center in centers:
    draw.point(center, fill=(255, 0, 0))
im.show()

In [19]:
im.size # width, height

(1920, 1080)

Creating a gaussian patch.

In [20]:
def generate_patch(scale=12):
    """Creates a heatmap using Gaussian Distribution."""

    # constants.
    sigma = 1

    size = 6 * sigma + 1
    x_mesh, y_mesh = torch.meshgrid(torch.arange(0, 6*sigma+1, 1), torch.arange(0, 6*sigma+1, 1), indexing='xy')

    # the center of the gaussian patch should be 1
    center_x = size // 2
    center_y = size // 2

    # generate this 7x7 gaussian patch
    xmesh = torch.square(torch.sub(x_mesh, center_x))
    ymesh = torch.square(torch.sub(y_mesh, center_y))
    denom = (sigma**2) * 2
    gaussian_patch = torch.mul(torch.exp(torch.div(torch.neg(torch.add(xmesh, ymesh)), denom)), scale)

    return gaussian_patch

Displaying the results.

In [21]:
w, h = im.size
x, y = centers[0]
patch = generate_patch(1)

In [22]:
torch.Tensor.numpy(patch)

array([[1.2340980e-04, 1.5034392e-03, 6.7379470e-03, 1.1108996e-02,
        6.7379470e-03, 1.5034392e-03, 1.2340980e-04],
       [1.5034392e-03, 1.8315639e-02, 8.2084998e-02, 1.3533528e-01,
        8.2084998e-02, 1.8315639e-02, 1.5034392e-03],
       [6.7379470e-03, 8.2084998e-02, 3.6787945e-01, 6.0653067e-01,
        3.6787945e-01, 8.2084998e-02, 6.7379470e-03],
       [1.1108996e-02, 1.3533528e-01, 6.0653067e-01, 1.0000000e+00,
        6.0653067e-01, 1.3533528e-01, 1.1108996e-02],
       [6.7379470e-03, 8.2084998e-02, 3.6787945e-01, 6.0653067e-01,
        3.6787945e-01, 8.2084998e-02, 6.7379470e-03],
       [1.5034392e-03, 1.8315639e-02, 8.2084998e-02, 1.3533528e-01,
        8.2084998e-02, 1.8315639e-02, 1.5034392e-03],
       [1.2340980e-04, 1.5034392e-03, 6.7379470e-03, 1.1108996e-02,
        6.7379470e-03, 1.5034392e-03, 1.2340980e-04]], dtype=float32)

Add an extra dimension in order to convert to PIL image.

In [23]:
patch = torch.unsqueeze(patch, dim=0)

In [24]:
patch.shape

torch.Size([1, 7, 7])

In [25]:
t = transforms.ToPILImage()
patch_img = t(patch)
patch_img.show()

In [26]:
to_t = transforms.ToTensor()
to_t(patch_img)

tensor([[[0.0000, 0.0000, 0.0039, 0.0078, 0.0039, 0.0000, 0.0000],
         [0.0000, 0.0157, 0.0784, 0.1333, 0.0784, 0.0157, 0.0000],
         [0.0039, 0.0784, 0.3647, 0.6039, 0.3647, 0.0784, 0.0039],
         [0.0078, 0.1333, 0.6039, 1.0000, 0.6039, 0.1333, 0.0078],
         [0.0039, 0.0784, 0.3647, 0.6039, 0.3647, 0.0784, 0.0039],
         [0.0000, 0.0157, 0.0784, 0.1333, 0.0784, 0.0157, 0.0000],
         [0.0000, 0.0000, 0.0039, 0.0078, 0.0039, 0.0000, 0.0000]]])

The Gaussian Patch is being created properly as seen by the output tensor after converting from img to tensor. Now we should try to figure out how to place the patch in the heatmap.

In [27]:
def heatmap(width, height, center_x, center_y, gau_patch):
    """Places a Gaussian Patch in the heatmap."""

    # constants.
    heatmap = np.zeros((height, width))
    sigma = 1
    visibility = 2
    gau_patch = torch.Tensor.numpy(gau_patch)

    # this gaussian patch is 7x7, let's get four corners of it first
    xmin = center_x - 3 * sigma
    ymin = center_y - 3 * sigma
    xmax = center_x + 3 * sigma
    ymax = center_y + 3 * sigma

    # if outside the image don't include the gaussian patch.
    if xmin >= width or ymin >= height or xmax < 0 or ymax < 0 or visibility == 0:
        return heatmap

    # determine boundaries for patch if outside the image.
    patch_xmin = max(0, -xmin)
    patch_ymin = max(0, -ymin)
    patch_xmax = min(xmax, width) - xmin
    patch_ymax = min(ymax, height) - ymin

    # we need to determine where to put this patch in the whole heatmap
    heatmap_xmin = max(0, xmin)
    heatmap_ymin = max(0, ymin)

    for j in range(patch_ymin, patch_ymax):
        for i in range(patch_xmin, patch_xmax):
            heatmap[j+heatmap_ymin, i+heatmap_xmin] = gau_patch[j, i]

    return torch.FloatTensor(heatmap)

In [28]:
x, y

(1536.6555075593953, 598.8153347732183)

In [None]:
heatmap_example = heatmap(int(w), int(h), int(x), int(y), patch)
heatmap_img = t(heatmap_example)
heatmap_img.show()

Patch was applied to the image, but resizing the image eliminates the patch. So we should resize the image and keypoint coordinates, then apply patch.

In [29]:
im = Image.open(img_path)
print(im.size)
im = im.resize((64, 64))
im.show()

(1920, 1080)
ERROR! Session/line number was not unique in database. History logging moved to new session 1108


In [30]:
rx, ry = 64/1920, 64/1080
resized_centers = []
for center in centers:
    cx, cy = center
    resized_centers.append((cx*rx, cy*ry))

Draw new center point in resized image.

In [31]:
draw = ImageDraw.Draw(im)
for center in resized_centers:
    draw.point(center, fill=(255, 0, 0))
im.show()

The centers are not deformed and in the correct locations. Now try to create a heatmap with all the possible center key points. We need to define a new heatmap function in order to see add multiple key points into the image.

In [32]:
def heatmap(width, height, keypoints, gau_patch):
    """Places a Gaussian Patch in the heatmap."""

    # constants.
    heatmap = np.zeros((height, width))
    sigma = 1
    visibility = 2
    gau_patch = torch.Tensor.numpy(gau_patch)

    # this gaussian patch is 7x7, let's get four corners of it first
    coordinates = []
    for keypoint in keypoints:
        center_x, center_y = keypoint
        xmin = center_x - 3 * sigma
        ymin = center_y - 3 * sigma
        xmax = center_x + 3 * sigma
        ymax = center_y + 3 * sigma
        coordinates.append((xmin, ymin, xmax, ymax))

    for coordinate in coordinates:
        # unpack the coordinates.
        xmin, ymin, xmax, ymax = coordinate

        # if outside the image don't include the gaussian patch.
        if xmin >= width or ymin >= height or xmax < 0 or ymax < 0 or visibility == 0:
            pass

        # determine boundaries for patch if outside the image.
        patch_xmin = max(0, -xmin)
        patch_ymin = max(0, -ymin)
        patch_xmax = min(xmax, width) - xmin
        patch_ymax = min(ymax, height) - ymin

        # we need to determine where to put this patch in the whole heatmap
        heatmap_xmin = int(max(0, xmin))
        heatmap_ymin = int(max(0, ymin))

        for j in range(int(patch_ymin), int(patch_ymax)):
            for i in range(int(patch_xmin), int(patch_xmax)):
                gau_pixel = gau_patch[j, i]
                pixel = heatmap[j+heatmap_ymin, i+heatmap_xmin]
                if pixel > 0:
                    heatmap[j+heatmap_ymin, i+heatmap_xmin] = max(pixel, gau_pixel)
                else:
                    heatmap[j+heatmap_ymin, i+heatmap_xmin] = gau_pixel

    return torch.FloatTensor(heatmap)

In [33]:
resized_centers

[(51.221850251979845, 35.48535317174627),
 (34.96915046796257, 30.09442444604431),
 (7.329841612670985, 31.89140068794498),
 (29.25424766018719, 29.817966562674975)]

ERROR! Session/line number was not unique in database. History logging moved to new session 1109


In [35]:
patch = generate_patch(1)

In [37]:
heat_ex2 = heatmap(64, 64, resized_centers, patch)

In [38]:
heat_ex2

tensor([[0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        ...,
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.]])

Convert the images to HeatMaps and run the model in training and evaluation model. In other words create the dataset element in PyTorch and then generate training rounds.