In [1]:
import numpy as np
from utils.inference import *

import time

class Timer:
    def __init__(self, message="Elapsed time"):
        self.message = message

    def __enter__(self):
        self.start_time = time.time()
        return self  # If you need to return any object, it would be here

    def __exit__(self, exc_type, exc_value, traceback):
        elapsed_time = time.time() - self.start_time
        print(f"{self.message}: {elapsed_time:.4f} seconds")

In [2]:
hm = np.random.randn(32, 25, 384, 384)

In [3]:
def gaussian_blur1(hm, kernel):
    border = (kernel - 1) // 2
    batch_size = hm.shape[0]
    num_joints = hm.shape[1]
    height = hm.shape[2]
    width = hm.shape[3]
    for i in range(batch_size):
        for j in range(num_joints):
            origin_max = np.max(hm[i, j])
            dr = np.zeros((height + 2 * border, width + 2 * border))
            dr[border: -border, border: -border] = hm[i, j].copy()
            dr = cv2.GaussianBlur(dr, (kernel, kernel), 0)
            hm[i, j] = dr[border: -border, border: -border].copy()
            hm[i, j] *= origin_max / np.max(hm[i, j])
    return hm

In [4]:
def gaussian_blur2(hm, kernel):
    border = (kernel - 1) // 2
    batch_size, num_joints, height, width = hm.shape
    
    # Create padded heatmaps to apply the Gaussian blur
    padded_height = height + 2 * border
    padded_width = width + 2 * border
    
    # Initialize padded heatmap with zeros
    dr = np.zeros((batch_size, num_joints, padded_height, padded_width))
    
    # Place the original heatmap into the center of the padded version
    dr[:, :, border:-border, border:-border] = hm
    
    # Apply Gaussian blur to each heatmap jointly in a vectorized way
    for i in range(batch_size):
        for j in range(num_joints):
            dr[i, j] = cv2.GaussianBlur(dr[i, j], (kernel, kernel), 0)
    
    # Extract the blurred result back to the original shape
    hm_blurred = dr[:, :, border:-border, border:-border]
    
    # Normalize the blurred heatmaps to maintain their original max values
    max_orig = np.max(hm, axis=(2, 3), keepdims=True)
    max_blurred = np.max(hm_blurred, axis=(2, 3), keepdims=True)
    
    # Avoid division by zero
    max_blurred[max_blurred == 0] = 1
    
    # Scale the blurred heatmaps
    hm_blurred *= max_orig / max_blurred
    
    return hm_blurred

In [12]:
import numpy as np
from scipy.ndimage import gaussian_filter

def gaussian_blur3(hm, kernel):
    sigma = (kernel - 1) / 6.0  # Approximate sigma for the kernel size
    
    # Apply Gaussian filtering across all dimensions
    blurred_hm = gaussian_filter(hm, sigma=(0, 0, sigma, sigma), mode='constant')
    
    # Normalize the blurred heatmaps to maintain their original max values
    max_orig = np.max(hm, axis=(2, 3), keepdims=True)
    max_blurred = np.max(blurred_hm, axis=(2, 3), keepdims=True)
    
    # Avoid division by zero
    max_blurred[max_blurred == 0] = 1
    
    # Scale the blurred heatmaps
    blurred_hm *= max_orig / max_blurred
    
    return blurred_hm


In [17]:
import numpy as np
from scipy.ndimage import gaussian_filter

def gaussian_blur4(hm, kernel):
    border = (kernel - 1) // 2
    batch_size, num_joints, height, width = hm.shape

    # Calculate original maximum values
    origin_max = np.max(hm, axis=(2, 3), keepdims=True)

    # Pad the heatmaps
    hm_padded = np.pad(hm, ((0, 0), (0, 0), (border, border), (border, border)), mode='constant')

    # Estimate sigma for Gaussian kernel
    sigma = 0.3 * ((kernel - 1) * 0.5 - 1) + 0.8
    sigma = max(sigma, 0.8)

    # Apply Gaussian filter
    hm_blurred = gaussian_filter(hm_padded, sigma=(0, 0, sigma, sigma))

    # Crop back to original size
    hm_cropped = hm_blurred[:, :, border:-border, border:-border]

    # Scale back to original maximum
    max_blurred = np.max(hm_cropped, axis=(2, 3), keepdims=True) + 1e-6  # Avoid division by zero
    hm_scaled = hm_cropped * (origin_max / max_blurred)

    return hm_scaled


In [20]:
import numpy as np
import cv2
from numba import njit, prange

def cv2_gaussian_blur(image, kernel_size):
    return cv2.GaussianBlur(image, (kernel_size, kernel_size), 0)

# Since cv2 functions cannot be JIT-compiled, we need to wrap them
def gaussian_blur_numba(hm, kernel):
    border = (kernel - 1) // 2
    batch_size, num_joints, height, width = hm.shape

    hm_blurred = np.zeros_like(hm)
    for i in range(batch_size):
        for j in range(num_joints):
            origin_max = np.max(hm[i, j])

            # Pad the heatmap
            dr = np.pad(hm[i, j], pad_width=border, mode='constant')

            # Apply Gaussian Blur (cannot be JIT-compiled)
            dr = cv2_gaussian_blur(dr, kernel)

            # Crop back to original size
            dr_cropped = dr[border:-border, border:-border]

            # Normalize
            max_dr = np.max(dr_cropped)
            if max_dr > 0:
                hm_blurred[i, j] = dr_cropped * (origin_max / max_dr)
            else:
                hm_blurred[i, j] = dr_cropped

    return hm_blurred


In [24]:
import numpy as np
from scipy.signal import fftconvolve

def gaussian_kernel(size, sigma=0):
    if sigma <= 0:
        sigma = 0.3 * ((size - 1) * 0.5 - 1) + 0.8
    ax = np.arange(-size // 2 + 1., size // 2 + 1.)
    xx, yy = np.meshgrid(ax, ax)
    kernel = np.exp(-(xx**2 + yy**2) / (2. * sigma**2))
    return kernel / np.sum(kernel)

def gaussian_blur6(hm, kernel_size):
    batch_size, num_joints, height, width = hm.shape
    kernel = gaussian_kernel(kernel_size)
    pad_size = kernel_size // 2

    hm_blurred = np.zeros_like(hm)
    for i in range(batch_size):
        for j in range(num_joints):
            origin_max = np.max(hm[i, j])

            # Pad the heatmap
            hm_padded = np.pad(hm[i, j], pad_width=pad_size, mode='constant')

            # Apply FFT convolution
            dr = fftconvolve(hm_padded, kernel, mode='same')

            # Crop back to original size
            dr_cropped = dr[pad_size:-pad_size, pad_size:-pad_size]

            # Normalize
            max_dr = np.max(dr_cropped)
            if max_dr > 0:
                hm_blurred[i, j] = dr_cropped * (origin_max / max_dr)
            else:
                hm_blurred[i, j] = dr_cropped

    return hm_blurred


In [28]:
import numpy as np
import cv2
from multiprocessing import Pool, cpu_count

def process_heatmap(args):
    hm_padded_i_j, origin_max, border, kernel = args
    dr = cv2.GaussianBlur(hm_padded_i_j, (kernel, kernel), 0)
    dr_cropped = dr[border:-border, border:-border]
    max_dr = np.max(dr_cropped)
    if max_dr > 0:
        return dr_cropped * (origin_max / max_dr)
    else:
        return dr_cropped

def gaussian_blur7(hm, kernel):
    border = (kernel - 1) // 2
    batch_size, num_joints, height, width = hm.shape

    pad = ((0, 0), (0, 0), (border, border), (border, border))
    hm_padded = np.pad(hm, pad_width=pad, mode='constant')

    # Prepare arguments for parallel processing
    args_list = []
    for i in range(batch_size):
        for j in range(num_joints):
            origin_max = np.max(hm[i, j])
            args_list.append((hm_padded[i, j], origin_max, border, kernel))

    # Use multiprocessing Pool
    with Pool(processes=cpu_count()) as pool:
        results = pool.map(process_heatmap, args_list)

    # Reshape the results back to the original dimensions
    hm_blurred = np.array(results).reshape(batch_size, num_joints, height, width)
    return hm_blurred


In [30]:
import numpy as np
import torch
import torch.nn.functional as F

def gaussian_blur_torch(hm, kernel_size):
    # Check if CUDA is available
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    
    # Convert hm to a PyTorch tensor if it's a NumPy array
    if isinstance(hm, np.ndarray):
        hm = torch.from_numpy(hm).float()
    hm = hm.to(device)
    
    batch_size, num_joints, height, width = hm.shape
    padding = (kernel_size - 1) // 2
    
    # Create Gaussian kernel
    sigma = kernel_size / 6.0
    x = torch.arange(-padding, padding + 1, device=device).float()
    y = x.view(-1, 1)
    x_grid, y_grid = x.repeat(kernel_size, 1), y.repeat(1, kernel_size)
    kernel = torch.exp(-(x_grid ** 2 + y_grid ** 2) / (2 * sigma ** 2))
    kernel = kernel / kernel.sum()
    
    # Reshape kernel to [out_channels, in_channels/groups, kH, kW]
    kernel = kernel.view(1, 1, kernel_size, kernel_size)
    kernel = kernel.repeat(num_joints, 1, 1, 1)
    
    # Apply padding
    hm_padded = F.pad(hm, (padding, padding, padding, padding), mode='constant', value=0)
    
    # Store original maximum values
    origin_max = hm.amax(dim=(2, 3), keepdim=True)
    
    # Apply Gaussian filter using group convolution
    hm_blurred = F.conv2d(hm_padded, kernel, groups=num_joints)
    
    # Normalize to maintain the original maximum value
    max_blurred = hm_blurred.amax(dim=(2, 3), keepdim=True) + 1e-6  # Avoid division by zero
    hm_blurred = hm_blurred * (origin_max / max_blurred)
    
    return hm_blurred.cpu().numpy()


In [34]:
import time


def get_final_preds(hm):
    with Timer("get_max_preds"):
        coords, maxvals = get_max_preds(hm)
    # coords, maxvals = get_max_preds(hm)
    heatmap_height = hm.shape[2]
    heatmap_width = hm.shape[3]
    BLUR_KERNEL = 11

    # post-processing
    with Timer("gaussian_blur"):
        hm = gaussian_blur_torch(hm, BLUR_KERNEL)
    hm = np.maximum(hm, 1e-10)
    hm = np.log(hm)

    with Timer("taylor"):
        for n in range(coords.shape[0]):
            for p in range(coords.shape[1]):
                coords[n, p] = taylor(hm[n][p], coords[n][p])

    preds = coords.copy()
    # print("Preds : ", preds)

    # # Transform back
    # for i in range(coords.shape[0]):
    #     preds[i] = transform_preds(
    #         coords[i], center[i], scale[i], [heatmap_width, heatmap_height]
    #     )

    return preds, maxvals


get_final_preds(hm);


get_max_preds: 0.1229 seconds
gaussian_blur: 0.5193 seconds
taylor: 0.0413 seconds


In [5]:
# %%time

# get_final_preds(hm);