### 3D Skeleton Visualization

In [1]:
import os
import json
import numpy as np
import plotly.graph_objects as go
from ipywidgets import interact
import copy
from scipy.spatial.transform import Rotation as R
from test_utils import *

### Load outputs

In [2]:
motionbert_root = "/home/hrai/codes/MotionBERT"
mb_output_root = motionbert_root + "/output"
mb_aihub_output_root = mb_output_root + "/aihub_30"
assert os.path.exists(mb_aihub_output_root) == True # check if the path exists

In [3]:
def get_action_id(action):
    for key in action_dict.keys():
        if action_dict[key] == action:
            return key

In [4]:
dataset = {}
action_dict = {
    '23' : "BenchPress",
    '30' : "Squat",
    '33' : "Push_Up",
    '49' : "Butterfly"
}

for video in os.listdir(mb_aihub_output_root):
    print(video)
    _, action_id, actor, cam_id = video.split('_')
    if action_id in action_dict.keys():
        if action_dict[action_id] not in dataset.keys():
            dataset[action_dict[action_id]] = {actor: {cam_id: {'output_path': os.path.join(mb_aihub_output_root, video)}}}
        else:
            if actor not in dataset[action_dict[action_id]].keys():
                dataset[action_dict[action_id]][actor] = {cam_id: {'output_path': os.path.join(mb_aihub_output_root, video)}}
            else:
                if cam_id not in dataset[action_dict[action_id]][actor].keys():
                    dataset[action_dict[action_id]][actor][cam_id] = {'output_path': os.path.join(mb_aihub_output_root, video)}
                else:
                    print("Video {} is already in dataset".format(video))
    else:
        print("Action ID {} is not in action_dict".format(action_id))

MotionBERT_30_M160A_3


In [5]:
for action in dataset.keys():
    for actor in dataset[action].keys():
        for cam_num in dataset[action][actor].keys():
            print(dataset[action][actor][cam_num])
            output_path = dataset[action][actor][cam_num]['output_path']
            mp4, npy = os.listdir(output_path)
            assert 'mp4' in mp4 # check if mp4 file exists
            assert 'npy' in npy # check if npy file exists
            output = np.load(os.path.join(output_path, npy))
            print(output.shape)
            dataset[action][actor][cam_num]['output'] = output

{'output_path': '/home/hrai/codes/MotionBERT/output/aihub_30/MotionBERT_30_M160A_3'}
(144, 17, 3)


In [6]:
dataset

{'Squat': {'M160A': {'3': {'output_path': '/home/hrai/codes/MotionBERT/output/aihub_30/MotionBERT_30_M160A_3',
    'output': array([[[ 1.70216372e-04,  3.55981640e-04,  0.00000000e+00],
            [ 1.06207982e-01,  1.21679269e-02, -5.88868968e-02],
            [ 7.93617517e-02,  4.58368808e-01,  7.30124116e-02],
            ...,
            [ 1.61644265e-01, -6.06383204e-01, -1.93266660e-01],
            [ 2.45332122e-01, -2.42874220e-01, -1.65020734e-01],
            [ 2.79138982e-01,  1.93081684e-02, -7.88548514e-02]],
    
           [[ 1.77083100e-04,  3.36375437e-04,  1.71567686e-03],
            [ 1.06296457e-01,  1.21887997e-02, -5.89347221e-02],
            [ 7.95113966e-02,  4.56995338e-01,  7.26836920e-02],
            ...,
            [ 1.61761433e-01, -6.06790602e-01, -1.93121448e-01],
            [ 2.45349795e-01, -2.43140787e-01, -1.64609149e-01],
            [ 2.78788477e-01,  1.94656923e-02, -7.89625049e-02]],
    
           [[ 1.35206268e-04,  3.33484611e-04,  1.751

### Visualize 3D pose

In [9]:
import matplotlib
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
import matplotlib.gridspec as gridspec

plt.switch_backend('TkAgg')
matplotlib.rcParams['pdf.fonttype'] = 42
matplotlib.rcParams['ps.fonttype'] = 42

In [10]:
action = 'Squat'
actor = 'M160A'
cam_num = '3'
frame_num = 0
pred = dataset[action][actor][cam_num]['output'][frame_num] # 3D pose of the first frame
pred_hat = get_rootrel_pose(pred)

fig = plt.figure(0, figsize=(10, 10))
ax = plt.axes(projection="3d")
ax.set_xlim(-512, 512)
ax.set_ylim(-512, 512)
ax.set_zlim(-512, 512)
ax.set_xlabel('X')
ax.set_ylabel('Y')
ax.set_zlabel('Z')
ax.view_init(elev=12., azim=80)
show3Dpose(pred_hat, ax)
#plt.savefig('mb_result_{}_{}_{}_{}.png'.format(get_action_id(action), actor, cam_num, frame_num))
plt.show()

### Load GT

In [11]:
from zipfile import ZipFile

In [12]:
aihub_root = "/home/hrai/Datasets/HAAI/AIHUB"
aihub_3d_gt_path = os.path.join(aihub_root, "label/train/[라벨]3D_json.zip")
aihub_3d_gt_path

'/home/hrai/Datasets/HAAI/AIHUB/label/train/[라벨]3D_json.zip'

In [13]:
# load zip file
zip_label = ZipFile(os.path.join(aihub_3d_gt_path), 'r')
list_input = zip_label.namelist()

In [15]:
action = '30'
actor = 'M160A'
frame = '30'
# actor = 'M180D' #'M160A'
# frame = '311' # '30'
load_path = "3D_json/{}_{}/3D_{}_{}_{}.json".format(action, actor, action, actor, frame)
gt = json.loads(zip_label.read(load_path).decode('utf-8'))

In [16]:
print(gt.keys())
print(gt['info'].keys())
print(gt['annotations'].keys())

dict_keys(['info', 'annotations'])
dict_keys(['supercategory', 'action_category_id', 'actor_id', '3d_pos', '3d_rot'])
dict_keys(['frame_no', 'obj_path', '3d_pos', '3d_rot', 'trans_params'])


### Load camera parameter

In [17]:
aihub_camera_param_path = os.path.join(aihub_root, "label/train/Camera_json_train.zip")

In [18]:
# load zip file
zip_param = ZipFile(os.path.join(aihub_camera_param_path), 'r')
list_input = zip_param.namelist()

In [19]:
action = '30'
# #actor = 'M180D' 
# cam_num = '4'
actor = 'M160A'
cam_num = '3'
load_path = "Camera_json/train/{}_{}_{}.json".format(action, actor, cam_num)
camera_param = json.loads(zip_param.read(load_path).decode('utf-8'))

In [20]:
camera_param

{'camera_date': '20201008',
 'camera_no': 3,
 'extrinsics': [[-0.733395875, 0.026952436, 0.679267645, -8.36987305],
  [-0.155711249, -0.979308605, -0.129261598, 740.389099],
  [0.66172874, -0.200569466, 0.722417653, 4432.7334]],
 'intrinsics': [[0.68166077, 0.0, 0.50988585],
  [0.0, 0.68166077, 0.26416245],
  [0.0, 0.0, 1.0]]}

In [21]:
W = 1920
H = 1080
extrinsic_properties = np.array(camera_param['extrinsics'])
R = copy.deepcopy(np.array(camera_param['extrinsics'])[:,:3])
T = copy.deepcopy(np.array(camera_param['extrinsics'])[:,3])
R_c = R.T
C = - np.matmul(R_c, T)
intrinsic_properties = np.array(camera_param['intrinsics']) # normalized intrinsic matrix
intrinsic_properties[:2, :] *= W # denormalize
fx = intrinsic_properties[0,0]
fy = intrinsic_properties[1,1]
cx = intrinsic_properties[0,2]
cy = intrinsic_properties[1,2]

### Visualize GT

In [22]:
# aihub to h36m pose
world_3d = aihub2h36m(np.array(gt['annotations']['3d_pos'])[:, :3].reshape(1, 24, 3))[0]
world_3d.shape

(17, 3)

In [23]:
fig = plt.figure(0, figsize=(10, 10))
ax = plt.axes(projection="3d")
min_, max_ = -2000, 2000
ax.set_xlim(min_, max_)
ax.set_ylim(min_, max_)
ax.set_zlim(min_, max_)
ax.set_xlabel('X')
ax.set_ylabel('Y')
ax.set_zlabel('Z')
ax.view_init(elev=12., azim=80)
show3Dpose(world_3d, ax)
plt.show()

### Calculate scaling factor

In [24]:
# world to camera
pos = copy.deepcopy(world_3d)
cam_3d = World2CameraCoordinate(pos, extrinsic_properties) # World coordinate -> Camera coordinate
cam_3d_hat = get_rootrel_pose(cam_3d)

In [23]:
fig = plt.figure(0, figsize=(10, 10))
ax = plt.axes(projection="3d")
min_, max_ = -2000, 2000
ax.set_xlim(min_, max_)
ax.set_ylim(min_, max_)
ax.set_zlim(min_, max_)
ax.set_xlabel('X')
ax.set_ylabel('Y')
ax.set_zlabel('Z')
ax.view_init(elev=12., azim=80)
show3Dpose(cam_3d_hat, ax)
plt.show()

In [26]:
# camera to image
box = infer_box(cam_3d, {'fx': fx, 'fy': fy, 'cx': cx, 'cy': cy}, 0)
img_2d, img_3d = camera_to_image_frame(cam_3d, box, {'fx': fx, 'fy': fy, 'cx': cx, 'cy': cy}, 0) 
img_2d_hat = get_rootrel_pose(img_2d) # (17, 2) # root-relative pose 
img_3d_hat = get_rootrel_pose(img_3d) # (17, 3) # root-relative pose 

In [28]:
import cv2

In [29]:
# visualize 2D pose
img_path = "/home/hrai/Datasets/HAAI/AIHUB/30_M160A_3/30_M160A_3_30.jpg"
#img_path = "/home/hrai/Datasets/HAAI/AIHUB/30_M180D_4/30_M180D_4_311.jpg"
#img = np.ones([1080, 1920, 3])
img = cv2.imread(img_path)
img = get_2d_pose_image(img_2d, img, box)
plot_cv2_image(img)

In [30]:
# img_2d
img = np.ones([2000, 2000, 3])
img = get_2d_pose_image(cam_3d_hat[:, :2] + 1000, img)
plot_cv2_image(img)

Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers).


In [31]:
# optimize scaling factor
pred_lambda, losses = optimize_scaling_factor(img_3d_hat, cam_3d_hat) # x,y,z 사용
pred_lambda

3.3579845428466797

In [32]:
img_25d = img_3d * pred_lambda
img_25d_hat = get_rootrel_pose(img_25d)

In [35]:
fig = plt.figure(0, figsize=(10, 10))
ax = plt.axes(projection="3d")
ax.set_xlim(-512, 512)
ax.set_ylim(-512, 512)
ax.set_zlim(-512, 512)
ax.set_xlabel('X')
ax.set_ylabel('Y')
ax.set_zlabel('Z')
ax.view_init(elev=12., azim=80)

show3Dpose(img_25d_hat, ax)
show3Dpose(cam_3d_hat, ax)

plt.show()

### Visualize inference and gt

In [37]:
pred_hat_scaled = pred_hat * pred_lambda

In [42]:
fig = plt.figure(0, figsize=(10, 10))
ax = plt.axes(projection="3d")
ax.set_xlim(-512, 512)
ax.set_ylim(-512, 512)
ax.set_zlim(-512, 512)
ax.set_xlabel('X')
ax.set_ylabel('Y')
ax.set_zlabel('Z')
ax.view_init(elev=12., azim=80)

show3Dpose(img_25d_hat, ax)
show3Dpose(pred_hat*1000,  ax)

plt.show()

### Calculate MPJPE for single pose

In [42]:
import torch
from test_utils import MPJPE_for_single_pose, MPJPE_for_multiple_pose

In [43]:
mpjpe = MPJPE_for_single_pose(img_25d_hat, cam_3d_hat)
print(mpjpe, "mm")

13.47767607231927 mm


In [34]:
mpjpe = MPJPE_for_single_pose(img_25d_hat, pred_hat_scaled)
print(mpjpe, "mm")

421.74566232806006 mm


## For multiple frames

In [36]:
# whitelist
action_list = ['30']
actor_list = ['M160A']
frame_list = ['30']

#list_input = zip_label.namelist()
for action in action_list:
    for actor in actor_list:
        for frame in frame_list:
            print(action, actor, cam_num)
            load_path = "3D_json/{}_{}/3D_{}_{}_{}.json".format(action, actor, action, actor, frame)
            print(load_path)
            data_label = json.loads(zip_label.read(load_path).decode('utf-8'))

30 M160A 3
3D_json/30_M160A/3D_30_M160A_30.json
