In [17]:
import os
import sys
import os.path as osp
import torch
from torchvision.transforms import Normalize
import numpy as np
import cv2
import argparse
import json
import pickle
from datetime import datetime

from demo.demo_options import DemoOptions
from bodymocap.body_mocap_api import BodyMocap
from bodymocap.body_bbox_detector import BodyPoseEstimator
import mocap_utils.demo_utils as demo_utils
import mocap_utils.general_utils as gnu
from mocap_utils.timer import Timer

import renderer.image_utils as imu
# from renderer.viewer2D import ImShow

device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

In [2]:
# Set bbox detector
body_bbox_detector = BodyPoseEstimator()

Loading Body Pose Estimator


In [3]:
# Set mocap regressor
checkpoint_path = './extra_data/body_module/pretrained_weights/2020_05_31-00_50_43-best-51.749683916568756.pt'
smpl_dir = './extra_data/smpl/'
body_mocap = BodyMocap(checkpoint_path, smpl_dir, device, use_smplx=False)



In [4]:
renderer_type = 'pytorch3d'
if renderer_type in ['pytorch3d', 'opendr']:
    from renderer.screen_free_visualizer import Visualizer
else:
    from renderer.visualizer import Visualizer
visualizer = Visualizer(renderer_type)

In [5]:
start_frame = 0

In [15]:
cur_frame = start_frame
video_frame = 0
timer = Timer()

class Args():
    def __init__(self):
        self.input_path = './sample_data/han_short.mp4'
        self.input_type = 'video'
        self.save_frame = True
        self.out_dir = './mocap_output'
        self.end_frame = float('inf')
        self.save_bbox_output = True
        self.single_person = False
        self.no_display = True
        self.save_pred_pkl = True
        self.use_smplx = False
        self.save_mesh = True
        self.no_video_out = False

args = Args()
        
input_type, input_data = demo_utils.setup_input(args)

while True:
    timer.tic()
    # load data

#     if input_type =='image_dir':
#         if cur_frame < len(input_data):
#             image_path = input_data[cur_frame]
#             img_original_bgr  = cv2.imread(image_path)
#         else:
#             img_original_bgr = None

    if input_type == 'video':      
        _, img_original_bgr = input_data.read()
        if video_frame < cur_frame:
            video_frame += 1
            continue
        # save the obtained video frames
        image_path = osp.join(args.out_dir, "frames", f"{cur_frame:05d}.jpg")
        if img_original_bgr is not None:
            video_frame += 1
            if args.save_frame:
                gnu.make_subdir(image_path)
                cv2.imwrite(image_path, img_original_bgr)
    else:
        assert False, "Unknown input_type"

    cur_frame +=1
    if img_original_bgr is None or cur_frame > args.end_frame:
        break   
    print("--------------------------------------")

    body_pose_list, body_bbox_list = body_bbox_detector.detect_body_pose(img_original_bgr)
    hand_bbox_list = [None, ] * len(body_bbox_list)

    # save the obtained body & hand bbox to json file
    if args.save_bbox_output: 
        demo_utils.save_info_to_json(args, image_path, body_bbox_list, hand_bbox_list)

    if len(body_bbox_list) < 1: 
        print(f"No body deteced: {image_path}")
        continue

    #Sort the bbox using bbox size 
    # (to make the order as consistent as possible without tracking)
    bbox_size =  [ (x[2] * x[3]) for x in body_bbox_list]
    idx_big2small = np.argsort(bbox_size)[::-1]
    body_bbox_list = [ body_bbox_list[i] for i in idx_big2small ]
    if args.single_person and len(body_bbox_list)>0:
        body_bbox_list = [body_bbox_list[0], ]       

    # Body Pose Regression
    pred_output_list = body_mocap.regress(img_original_bgr, body_bbox_list)
    assert len(body_bbox_list) == len(pred_output_list)

    # extract mesh for rendering (vertices in image space and faces) from pred_output_list
    pred_mesh_list = demo_utils.extract_mesh_from_output(pred_output_list)
    
    # visualization
    res_img = visualizer.visualize(
        img_original_bgr,
        pred_mesh_list = pred_mesh_list, 
        body_bbox_list = body_bbox_list)

    # show result in the screen
#     if not args.no_display:
#         res_img = res_img.astype(np.uint8)
#         ImShow(res_img)

    # save result image
    if args.out_dir is not None:
        demo_utils.save_res_img(args.out_dir, image_path, res_img)

    # save predictions to pkl
    if args.save_pred_pkl:
        demo_type = 'body'
        demo_utils.save_pred_to_pkl(
            args, demo_type, image_path, body_bbox_list, hand_bbox_list, pred_output_list)

    timer.toc(bPrint=True,title="Time")
    print(f"Processed : {image_path}")
    break

#save images as a video
if not args.no_video_out and input_type in ['video', 'webcam']:
    demo_utils.gen_video_out(args.out_dir, args.seq_name)

# cv2.destroyAllWindows()

--------------------------------------
Bbox saved: ./mocap_output/bbox/00000_bbox.json
Visualization saved: ./mocap_output/rendered/00000.jpg
Prediction saved: ./mocap_output/mocap/00000_prediction_result.pkl
Time: 0.62 sec/frame, FPS 1.61
Processed : ./mocap_output/frames/00000.jpg
>> Generating video in ./mocap_output/han_short.mp4


In [25]:
pred_mesh_list

[{'vertices': array([[404.13864 , 128.8618  , -26.815046],
         [402.08905 , 131.59969 , -29.17342 ],
         [404.93375 , 133.3429  , -27.826658],
         ...,
         [375.07648 , 144.8882  ,  -4.135   ],
         [375.08624 , 144.30122 ,  -4.34855 ],
         [374.25165 , 144.62494 ,  -5.059637]], dtype=float32),
  'faces': array([[   1,    2,    0],
         [   0,    2,    3],
         [   2,    1,    4],
         ...,
         [4805, 3511, 6309],
         [3511, 1330, 6309],
         [6309, 1330, 4687]], dtype=int32)}]

In [23]:
pred_mesh_list[0]['vertices'].shape

(6890, 3)

In [24]:
pred_mesh_list[0]['faces'].shape

(13776, 3)