In [1]:
import torch
from torchvision.transforms import Normalize
import numpy as np
import cv2
import argparse
import json

from models import hmr, SMPL
from utils.imutils import crop
from utils.renderer import Renderer
import config
import constants

In [2]:
def bbox_from_openpose(openpose_file, rescale=1.2, detection_thresh=0.2):
    """Get center and scale for bounding box from openpose detections."""
    with open(openpose_file, 'r') as f:
        keypoints = json.load(f)['people'][0]['pose_keypoints_2d']
    keypoints = np.reshape(np.array(keypoints), (-1,3))
    valid = keypoints[:,-1] > detection_thresh
    valid_keypoints = keypoints[valid][:,:-1]
    center = valid_keypoints.mean(axis=0)
    bbox_size = (valid_keypoints.max(axis=0) - valid_keypoints.min(axis=0)).max()
    # adjust bounding box tightness
    scale = bbox_size / 200.0
    scale *= rescale
    return center, scale

In [3]:
def bbox_from_json(bbox_file):
    """Get center and scale of bounding box from bounding box annotations.
    The expected format is [top_left(x), top_left(y), width, height].
    """
    with open(bbox_file, 'r') as f:
        bbox = np.array(json.load(f)['bbox']).astype(np.float32)
    ul_corner = bbox[:2]
    center = ul_corner + 0.5 * bbox[2:]
    width = max(bbox[2], bbox[3])
    scale = width / 200.0
    # make sure the bounding box is rectangular
    return center, scale

In [4]:
def process_image(img_file, bbox_file, openpose_file, input_res=224):
    """Read image, do preprocessing and possibly crop it according to the bounding box.
    If there are bounding box annotations, use them to crop the image.
    If no bounding box is specified but openpose detections are available, use them to get the bounding box.
    """
    normalize_img = Normalize(mean=constants.IMG_NORM_MEAN, std=constants.IMG_NORM_STD)
    img = cv2.imread(img_file)[:,:,::-1].copy() # PyTorch does not support negative stride at the moment
    if bbox_file is None and openpose_file is None:
        # Assume that the person is centerered in the image
        height = img.shape[0]
        width = img.shape[1]
        center = np.array([width // 2, height // 2])
        scale = max(height, width) / 200
    else:
        if bbox_file is not None:
            center, scale = bbox_from_json(bbox_file)
        elif openpose_file is not None:
            center, scale = bbox_from_openpose(openpose_file)
    img = crop(img, center, scale, (input_res, input_res))
    img = img.astype(np.float32) / 255.
    img = torch.from_numpy(img).permute(2,0,1)
    norm_img = normalize_img(img.clone())[None]
    return img, norm_img

In [5]:
parser = argparse.ArgumentParser()
parser.add_argument('--checkpoint', required=True, help='Path to pretrained checkpoint')
parser.add_argument('--img', type=str, required=True, help='Path to input image')
parser.add_argument('--bbox', type=str, default=None, help='Path to .json file containing bounding box coordinates')
parser.add_argument('--openpose', type=str, default=None, help='Path to .json containing openpose detections')
parser.add_argument('--outfile', type=str, default=None, help='Filename of output images. If not set use input filename.')

_StoreAction(option_strings=['--outfile'], dest='outfile', nargs=None, const=None, default=None, type=<class 'str'>, choices=None, help='Filename of output images. If not set use input filename.', metavar=None)

In [13]:
if __name__ == '__main__':
    args = parser.parse_args(['--checkpoint=data/model_checkpoint.pt','--img=examples/im1010.jpg'])
    
    device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
    
    # Load pretrained model
    model = hmr(config.SMPL_MEAN_PARAMS).to(device)
    checkpoint = torch.load(args.checkpoint)
    model.load_state_dict(checkpoint['model'], strict=False)

    # Load SMPL model
    smpl = SMPL(config.SMPL_MODEL_DIR,
                batch_size=1,
                create_transl=False).to(device)
    model.eval()

    # Setup renderer for visualization
    renderer = Renderer(focal_length=constants.FOCAL_LENGTH, img_res=constants.IMG_RES, faces=smpl.faces)


    # Preprocess input image and generate predictions
    img, norm_img = process_image(args.img, args.bbox, args.openpose, input_res=constants.IMG_RES)
    with torch.no_grad():
        pred_rotmat, pred_betas, pred_camera = model(norm_img.to(device))
        pred_output = smpl(betas=pred_betas, body_pose=pred_rotmat[:,1:], global_orient=pred_rotmat[:,0].unsqueeze(1), pose2rot=False)
        pred_vertices = pred_output.vertices
        
    # Calculate camera parameters for rendering
    camera_translation = torch.stack([pred_camera[:,1], pred_camera[:,2], 2*constants.FOCAL_LENGTH/(constants.IMG_RES * pred_camera[:,0] +1e-9)],dim=-1)
    camera_translation = camera_translation[0].cpu().numpy()
    pred_vertices = pred_vertices[0].cpu().numpy()
    img = img.permute(1,2,0).cpu().numpy()

    
    # Render parametric shape
    img_shape = renderer(pred_vertices, [0,0,1], np.ones_like(img))
    
    # Render side views
    aroundy = cv2.Rodrigues(np.array([0, np.radians(90.), 0]))[0]
    center = pred_vertices.mean(axis=0)
    rot_vertices = np.dot((pred_vertices - center), aroundy) + center
    
    # Render non-parametric shape
    img_shape_side = renderer(rot_vertices, camera_translation, np.ones_like(img))

    outfile = args.img.split('.')[0] if args.outfile is None else args.outfile

    # Save reconstructions
    cv2.imwrite(outfile + '_shape.png', 255 * img_shape[:,:,::-1])
    cv2.imwrite(outfile + '_shape_side.png', 255 * img_shape_side[:,:,::-1])

In [8]:
# Creating an iterator based on the concept of basic rotations https://en.wikipedia.org/wiki/Rotation_matrix#Basic_rotations

t_pose = torch.zeros(1,24,3,3,device='cuda')
t_pose[:] = torch.eye(3)
t_betas = torch.zeros(1,10,device='cuda')

t_pose_model = smpl(betas=t_betas, body_pose=t_pose[:,1:], global_orient=t_pose[:,0].unsqueeze(1), pose2rot=False)

In [14]:
# Setup renderer for visualization
renderer = Renderer(focal_length=constants.FOCAL_LENGTH, img_res=constants.IMG_RES, faces=smpl.faces)

# Calculate camera parameters for rendering
camera_translation = torch.stack([pred_camera[:,1], pred_camera[:,2], 2*constants.FOCAL_LENGTH/(constants.IMG_RES * pred_camera[:,0] +1e-9)],dim=-1)
camera_translation = camera_translation[0].cpu().numpy()
pred_vertices = t_pose_model.vertices
pred_vertices = pred_vertices[0].cpu().numpy()
img = img.permute(1,2,0).cpu().numpy()


# Render parametric shape
img_shape = renderer(pred_vertices, camera_translation, np.ones_like(img))

AttributeError: 'numpy.ndarray' object has no attribute 'permute'

In [6]:
if __name__ == '__main__':
    args = parser.parse_args(['--checkpoint=data/model_checkpoint.pt','--img=examples/im1010.jpg'])
    
    device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
    
    # Load pretrained model
    model = hmr(config.SMPL_MEAN_PARAMS).to(device)
    checkpoint = torch.load(args.checkpoint)
    model.load_state_dict(checkpoint['model'], strict=False)

    # Load SMPL model
    smpl = SMPL(config.SMPL_MODEL_DIR,
                batch_size=1,
                create_transl=False).to(device)
    model.eval()

    # Setup renderer for visualization
    renderer = Renderer(focal_length=constants.FOCAL_LENGTH, img_res=constants.IMG_RES, faces=smpl.faces)


    # Preprocess input image and generate predictions
    img, norm_img = process_image(args.img, args.bbox, args.openpose, input_res=constants.IMG_RES)
    with torch.no_grad():
        pred_rotmat, pred_betas, pred_camera = model(norm_img.to(device))
        #pred_output = smpl(betas=pred_betas, body_pose=pred_rotmat[:,1:], global_orient=pred_rotmat[:,0].unsqueeze(1), pose2rot=False)
        #pred_output = smpl(betas=pred_betas, body_pose=zero_pose, global_orient=pred_rotmat[:,0].unsqueeze(1), pose2rot=False)
        #pred_vertices = pred_output.vertices
        
    # Calculate camera parameters for rendering
    camera_translation = torch.stack([pred_camera[:,1], pred_camera[:,2], 2*constants.FOCAL_LENGTH/(constants.IMG_RES * pred_camera[:,0] +1e-9)],dim=-1)
    camera_translation = camera_translation[0].cpu().numpy()
    pred_vertices = pred_vertices[0].cpu().numpy()
    img = img.permute(1,2,0).cpu().numpy()

    
    # Render parametric shape
    img_shape = renderer(pred_vertices, camera_translation, img)
    
    # Render side views
    aroundy = cv2.Rodrigues(np.array([0, np.radians(90.), 0]))[0]
    center = pred_vertices.mean(axis=0)
    rot_vertices = np.dot((pred_vertices - center), aroundy) + center
    
    # Render non-parametric shape
    img_shape_side = renderer(rot_vertices, camera_translation, np.ones_like(img))

    outfile = args.img.split('.')[0] if args.outfile is None else args.outfile

    # Save reconstructions
    cv2.imwrite(outfile + '_shape.png', 255 * img_shape[:,:,::-1])
    cv2.imwrite(outfile + '_shape_side.png', 255 * img_shape_side[:,:,::-1])

In [7]:
pred_betas

tensor([[-0.0316,  0.6420,  1.0889,  1.4400, -0.2099,  0.1531, -0.1142,  0.3719,
          0.0332, -0.0497]], device='cuda:0')

In [9]:
import opendr
from opendr.renderer import ColoredRenderer
from opendr.lighting import LambertianPointLight
from opendr.camera import ProjectPoints
import matplotlib.pyplot as plt

%matplotlib inline

In [10]:
t_pose_model.vertices

tensor([[[ 0.0449,  0.4941,  0.0896],
         [ 0.0395,  0.4814,  0.0996],
         [ 0.0500,  0.4761,  0.0910],
         ...,
         [-0.0748,  0.4281,  0.0046],
         [-0.0754,  0.4292,  0.0067],
         [-0.0778,  0.4276,  0.0092]]], device='cuda:0')

In [12]:
m = t_pose_model

rn = ColoredRenderer()

## Assign attributes to renderer
w, h = (640, 480)

rn.camera = ProjectPoints(v=m, rt=np.zeros(3), t=np.array([0, 0, 2.]), f=np.array([w,w])/2., c=np.array([w,h])/2., k=np.zeros(5))
rn.frustum = {'near': 1., 'far': 10., 'width': w, 'height': h}
rn.set(v=m, f=m.f, bgcolor=np.zeros(3))

## Construct point light source
rn.vc = LambertianPointLight(
    f=t_pose_model.f,
    v=rn.v,
    num_verts=len(m),
    light_pos=np.array([-1000,-1000,-2000]),
    vc=np.ones_like(m)*.9,
    light_color=np.array([1., 1., 1.]))



## Since we are in Docker without access to X, it's better to save the images. This is easier with matplotlib than with openCV, because cv2.imwrite requires the image to be converted to a compatible form first.

import matplotlib.pyplot as plt
plt.imshow(rn.r)

ValueError: only one element tensors can be converted to Python scalars

In [24]:
t_betas

tensor([[0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]], device='cuda:0')

In [25]:
pred_betas

tensor([[-0.0316,  0.6420,  1.0889,  1.4400, -0.2099,  0.1531, -0.1142,  0.3719,
          0.0332, -0.0497]], device='cuda:0')

In [11]:
np.zeros(10)

array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0.])

In [10]:
pred_rotmat.size()

torch.Size([1, 24, 3, 3])

In [36]:
zero_pose = torch.zeros(1,23,3,3,device='cuda')
zero_pose[:] = torch.eye(3)
zero_pose[0,13] = torch.tensor([[0,0,1],
                   [0,1,0],
                   [-1,0,0]]
zero_pose

tensor([[[[ 1.,  0.,  0.],
          [ 0.,  1.,  0.],
          [ 0.,  0.,  1.]],

         [[ 1.,  0.,  0.],
          [ 0.,  1.,  0.],
          [ 0.,  0.,  1.]],

         [[ 1.,  0.,  0.],
          [ 0.,  1.,  0.],
          [ 0.,  0.,  1.]],

         [[ 1.,  0.,  0.],
          [ 0.,  1.,  0.],
          [ 0.,  0.,  1.]],

         [[ 1.,  0.,  0.],
          [ 0.,  1.,  0.],
          [ 0.,  0.,  1.]],

         [[ 1.,  0.,  0.],
          [ 0.,  1.,  0.],
          [ 0.,  0.,  1.]],

         [[ 1.,  0.,  0.],
          [ 0.,  1.,  0.],
          [ 0.,  0.,  1.]],

         [[ 1.,  0.,  0.],
          [ 0.,  1.,  0.],
          [ 0.,  0.,  1.]],

         [[ 1.,  0.,  0.],
          [ 0.,  1.,  0.],
          [ 0.,  0.,  1.]],

         [[ 1.,  0.,  0.],
          [ 0.,  1.,  0.],
          [ 0.,  0.,  1.]],

         [[ 1.,  0.,  0.],
          [ 0.,  1.,  0.],
          [ 0.,  0.,  1.]],

         [[ 1.,  0.,  0.],
          [ 0.,  1.,  0.],
          [ 0.,  0.,  1.]],

    

In [22]:
pred_output.body_pose

tensor([[[[ 0.9642, -0.1700,  0.2037],
          [ 0.2270,  0.9261, -0.3014],
          [-0.1374,  0.3369,  0.9315]],

         [[ 0.9875,  0.1243,  0.0967],
          [-0.1569,  0.7206,  0.6753],
          [ 0.0142, -0.6821,  0.7311]],

         [[ 0.9873,  0.0194, -0.1575],
          [-0.0382,  0.9924, -0.1169],
          [ 0.1541,  0.1214,  0.9806]],

         [[ 0.9480, -0.0332, -0.3166],
          [-0.1000,  0.9131, -0.3952],
          [ 0.3022,  0.4063,  0.8623]],

         [[ 0.9985, -0.0471, -0.0268],
          [ 0.0416,  0.9832, -0.1778],
          [ 0.0348,  0.1764,  0.9837]],

         [[ 1.0000,  0.0058,  0.0049],
          [-0.0055,  0.9977, -0.0675],
          [-0.0053,  0.0674,  0.9977]],

         [[ 0.9756,  0.1856,  0.1170],
          [-0.1846,  0.9826, -0.0200],
          [-0.1187, -0.0021,  0.9929]],

         [[ 0.9586, -0.2435, -0.1473],
          [ 0.2284,  0.9671, -0.1122],
          [ 0.1698,  0.0740,  0.9827]],

         [[ 0.9954,  0.0024, -0.0959],
         

In [12]:
pred_rotmat[:,0]

tensor([[[ 0.4695,  0.0377, -0.8821],
         [ 0.0895, -0.9960,  0.0050],
         [-0.8784, -0.0813, -0.4709]]], device='cuda:0')

In [15]:
pred_rotmat[:,0].size()

torch.Size([1, 3, 3])

In [16]:
pred_rotmat[:,1:].size()

torch.Size([1, 23, 3, 3])

In [17]:
pred_rotmat[:,0].unsqueeze(1)

tensor([[[[ 0.4695,  0.0377, -0.8821],
          [ 0.0895, -0.9960,  0.0050],
          [-0.8784, -0.0813, -0.4709]]]], device='cuda:0')

In [18]:
pred_rotmat[:,0].unsqueeze(1).size()

torch.Size([1, 1, 3, 3])