In [13]:
"""
This script can be used to evaluate a trained model on 3D pose/shape and masks/part segmentation. You first need to download the datasets and preprocess them.
Example usage:
```
python3 eval.py --checkpoint=data/model_checkpoint.pt --dataset=h36m-p1 --log_freq=20
```
Running the above command will compute the MPJPE and Reconstruction Error on the Human3.6M dataset (Protocol I). The ```--dataset``` option can take different values based on the type of evaluation you want to perform:
1. Human3.6M Protocol 1 ```--dataset=h36m-p1```
2. Human3.6M Protocol 2 ```--dataset=h36m-p2```
3. 3DPW ```--dataset=3dpw```
4. LSP ```--dataset=lsp```
5. MPI-INF-3DHP ```--dataset=mpi-inf-3dhp```
"""

import torch
from torch.utils.data import DataLoader
import numpy as np
import cv2
import os
import argparse
import json
from collections import namedtuple
from tqdm import tqdm
import torchgeometry as tgm

import config
import constants
from models import hmr, SMPL
from datasets import BaseDataset
from utils.imutils import uncrop
from utils.pose_utils import reconstruction_error
from utils.part_utils import PartRenderer

from orientation_evaluation import orientation_evaluation
from scipy.spatial.transform import Rotation as R

# Define command-line arguments
parser = argparse.ArgumentParser()
parser.add_argument('--checkpoint', default=None, help='Path to network checkpoint')
parser.add_argument('--dataset', default='h36m-p1', choices=['h36m-p1', 'h36m-p2', 'lsp', '3dpw', 'mpi-inf-3dhp'], help='Choose evaluation dataset')
parser.add_argument('--log_freq', default=50, type=int, help='Frequency of printing intermediate results')
parser.add_argument('--batch_size', default=32, help='Batch size for testing')
parser.add_argument('--shuffle', default=False, action='store_true', help='Shuffle data')
parser.add_argument('--num_workers', default=8, type=int, help='Number of processes for data loading')
parser.add_argument('--result_file', default=None, help='If set, save detections to a .npz file')

def run_evaluation(model, dataset_name, dataset, result_file,
                   batch_size=32, img_res=224, 
                   num_workers=32, shuffle=False, log_freq=50):
    """Run evaluation on the datasets and metrics we report in the paper. """

    device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

    # Transfer model to the GPU
    model.to(device)

    # Load SMPL model
    smpl_neutral = SMPL(config.SMPL_MODEL_DIR,
                        create_transl=False).to(device)
    smpl_male = SMPL(config.SMPL_MODEL_DIR,
                     gender='male',
                     create_transl=False).to(device)
    smpl_female = SMPL(config.SMPL_MODEL_DIR,
                       gender='female',
                       create_transl=False).to(device)
    
    renderer = PartRenderer()
    
    # Regressor for H36m joints
    J_regressor = torch.from_numpy(np.load(config.JOINT_REGRESSOR_H36M)).float()
    
    save_results = result_file is not None
    # Disable shuffling if you want to save the results
    if save_results:
        shuffle=False
    # Create dataloader for the dataset
    data_loader = DataLoader(dataset, batch_size=batch_size, shuffle=shuffle, num_workers=num_workers)
    
    # Pose metrics
    # MPJPE and Reconstruction error for the non-parametric and parametric shapes
    mpjpe = np.zeros(len(dataset))
    recon_err = np.zeros(len(dataset))
    mpjpe_smpl = np.zeros(len(dataset))
    recon_err_smpl = np.zeros(len(dataset))
    
    #Including per joint position error:
    pjpe = torch.zeros(len(dataset), 14)
    
    # Including mean per joint angular error (reduced and per part)
    mpjae = np.zeros(len(dataset))
    mpjae_per_part = torch.zeros(len(dataset), 24, 3)

    # Shape metrics
    # Mean per-vertex error
    shape_err = np.zeros(len(dataset))
    shape_err_smpl = np.zeros(len(dataset))

    # Mask and part metrics
    # Accuracy
    accuracy = 0.
    parts_accuracy = 0.
    # True positive, false positive and false negative
    tp = np.zeros((2,1))
    fp = np.zeros((2,1))
    fn = np.zeros((2,1))
    parts_tp = np.zeros((7,1))
    parts_fp = np.zeros((7,1))
    parts_fn = np.zeros((7,1))
    # Pixel count accumulators
    pixel_count = 0
    parts_pixel_count = 0

    # Store SMPL parameters
    smpl_pose = np.zeros((len(dataset), 72))
    smpl_betas = np.zeros((len(dataset), 10))
    smpl_camera = np.zeros((len(dataset), 3))
    pred_joints = np.zeros((len(dataset), 17, 3))

    eval_pose = False
    eval_masks = False
    eval_parts = False
    eval_orientation = False # Adding the orientation parameter
    # Choose appropriate evaluation for each dataset
    if dataset_name == 'h36m-p1' or dataset_name == 'h36m-p2' or dataset_name == 'mpi-inf-3dhp':
        eval_pose = True
    elif dataset_name == 'lsp':
        eval_masks = True
        eval_parts = True
        annot_path = config.DATASET_FOLDERS['upi-s1h']
    elif dataset_name == '3dpw':
        eval_orientation = True
        eval_pose = True
        

    joint_mapper_h36m = constants.H36M_TO_J17 if dataset_name == 'mpi-inf-3dhp' else constants.H36M_TO_J14
    joint_mapper_gt = constants.J24_TO_J17 if dataset_name == 'mpi-inf-3dhp' else constants.J24_TO_J14
    # Iterate over the entire dataset
    for step, batch in enumerate(tqdm(data_loader, desc='Eval', total=len(data_loader))):
        # Get ground truth annotations from the batch
        gt_pose = batch['pose'].to(device)
        gt_betas = batch['betas'].to(device)
        gt_vertices = smpl_neutral(betas=gt_betas, body_pose=gt_pose[:, 3:], global_orient=gt_pose[:, :3]).vertices
        images = batch['img'].to(device)
        gender = batch['gender'].to(device)
        curr_batch_size = images.shape[0]
        
        with torch.no_grad():
            pred_rotmat, pred_betas, pred_camera = model(images)
            pred_output = smpl_neutral(betas=pred_betas, body_pose=pred_rotmat[:,1:], global_orient=pred_rotmat[:,0].unsqueeze(1), pose2rot=False)
            pred_vertices = pred_output.vertices

        if save_results:
            rot_pad = torch.tensor([0,0,1], dtype=torch.float32, device=device).view(1,3,1)
            rotmat = torch.cat((pred_rotmat.view(-1, 3, 3), rot_pad.expand(curr_batch_size * 24, -1, -1)), dim=-1)
            pred_pose = tgm.rotation_matrix_to_angle_axis(rotmat).contiguous().view(-1, 72)
            smpl_pose[step * batch_size:step * batch_size + curr_batch_size, :] = pred_pose.cpu().numpy()
            smpl_betas[step * batch_size:step * batch_size + curr_batch_size, :]  = pred_betas.cpu().numpy()
            smpl_camera[step * batch_size:step * batch_size + curr_batch_size, :]  = pred_camera.cpu().numpy()
        
        # Orientation evaluation
        orientation_error_per_part, orientation_error, orientation_error_new = \
        orientation_evaluation(gt_pose, pred_rotmat, batch_size, curr_batch_size, step)
        
        mpjae[step * batch_size:step * batch_size + curr_batch_size] = orientation_error_new
        mpjae_per_part[step*batch_size : step*batch_size + curr_batch_size] = orientation_error_per_part
            
        # 3D pose evaluation
        if eval_pose:
            # Regressor broadcasting
            J_regressor_batch = J_regressor[None, :].expand(pred_vertices.shape[0], -1, -1).to(device)
            # Get 14 ground truth joints
            if 'h36m' in dataset_name or 'mpi-inf' in dataset_name:
                gt_keypoints_3d = batch['pose_3d'].cuda()
                gt_keypoints_3d = gt_keypoints_3d[:, joint_mapper_gt, :-1]
            # For 3DPW get the 14 common joints from the rendered shape
            else:
                gt_vertices = smpl_male(global_orient=gt_pose[:,:3], body_pose=gt_pose[:,3:], betas=gt_betas).vertices 
                gt_vertices_female = smpl_female(global_orient=gt_pose[:,:3], body_pose=gt_pose[:,3:], betas=gt_betas).vertices 
                gt_vertices[gender==1, :, :] = gt_vertices_female[gender==1, :, :]
                gt_keypoints_3d = torch.matmul(J_regressor_batch, gt_vertices) # torch.Size([32, 17, 3]) # This returns 17 joints
                print("gt_keypoints_3d = torch.matmul(J_regressor_batch, gt_vertices)", gt_keypoints_3d.shape, gt_keypoints_3d)
                gt_pelvis = gt_keypoints_3d[:, [0],:].clone()
                gt_keypoints_3d = gt_keypoints_3d[:, joint_mapper_h36m, :] # torch.Size([32, 14, 3]) # But only 14 are used, the joint_mapper is [6, 5, 4, 1, 2, 3, 16, 15, 14, 11, 12, 13, 8, 10]
                print("joint_mapper_h36m", joint_mapper_h36m) 
                print("gt_keypoints_3d = gt_keypoints_3d[:, joint_mapper_h36m, :]", gt_keypoints_3d.shape, gt_keypoints_3d)
                gt_keypoints_3d = gt_keypoints_3d - gt_pelvis
                print("gt_keypoints_3d = gt_keypoints_3d - gt_pelvis", gt_keypoints_3d.shape, gt_keypoints_3d)


            # Get 14 predicted joints from the mesh
            pred_keypoints_3d = torch.matmul(J_regressor_batch, pred_vertices)
            print("pred_keypoints_3d = torch.matmul(J_regressor_batch, pred_vertices)", pred_keypoints_3d.shape, pred_keypoints_3d)
            if save_results:
                pred_joints[step * batch_size:step * batch_size + curr_batch_size, :, :]  = pred_keypoints_3d.cpu().numpy()
            pred_pelvis = pred_keypoints_3d[:, [0],:].clone()
            pred_keypoints_3d = pred_keypoints_3d[:, joint_mapper_h36m, :]
            pred_keypoints_3d = pred_keypoints_3d - pred_pelvis # [32, 14, 3]
            
            # Absolute error (MPJPE)
            error = torch.sqrt(((pred_keypoints_3d - gt_keypoints_3d) ** 2).sum(dim=-1)).mean(dim=-1).cpu().numpy()
            
            mpjpe[step * batch_size:step * batch_size + curr_batch_size] = error
            
            # Per part error
            per_part_error = torch.sqrt(((pred_keypoints_3d - gt_keypoints_3d) ** 2).sum(dim=-1)) # Not really necessary to send it to cpu as a np array for now
            
            pjpe[step * batch_size:step * batch_size + curr_batch_size] = per_part_error*1000 # Converting from meters to milimeters
            
            # Reconstuction_error
            r_error = reconstruction_error(pred_keypoints_3d.cpu().numpy(), gt_keypoints_3d.cpu().numpy(), reduction=None)
            recon_err[step * batch_size:step * batch_size + curr_batch_size] = r_error


        # If mask or part evaluation, render the mask and part images
        if eval_masks or eval_parts:
            mask, parts = renderer(pred_vertices, pred_camera)

        # Mask evaluation (for LSP)
        if eval_masks:
            center = batch['center'].cpu().numpy()
            scale = batch['scale'].cpu().numpy()
            # Dimensions of original image
            orig_shape = batch['orig_shape'].cpu().numpy()
            for i in range(curr_batch_size):
                # After rendering, convert imate back to original resolution
                pred_mask = uncrop(mask[i].cpu().numpy(), center[i], scale[i], orig_shape[i]) > 0
                # Load gt mask
                gt_mask = cv2.imread(os.path.join(annot_path, batch['maskname'][i]), 0) > 0
                # Evaluation consistent with the original UP-3D code
                accuracy += (gt_mask == pred_mask).sum()
                pixel_count += np.prod(np.array(gt_mask.shape))
                for c in range(2):
                    cgt = gt_mask == c
                    cpred = pred_mask == c
                    tp[c] += (cgt & cpred).sum()
                    fp[c] +=  (~cgt & cpred).sum()
                    fn[c] +=  (cgt & ~cpred).sum()
                f1 = 2 * tp / (2 * tp + fp + fn)

        # Part evaluation (for LSP)
        if eval_parts:
            center = batch['center'].cpu().numpy()
            scale = batch['scale'].cpu().numpy()
            orig_shape = batch['orig_shape'].cpu().numpy()
            for i in range(curr_batch_size):
                pred_parts = uncrop(parts[i].cpu().numpy().astype(np.uint8), center[i], scale[i], orig_shape[i])
                # Load gt part segmentation
                gt_parts = cv2.imread(os.path.join(annot_path, batch['partname'][i]), 0)
                # Evaluation consistent with the original UP-3D code
                # 6 parts + background
                for c in range(7):
                   cgt = gt_parts == c
                   cpred = pred_parts == c
                   cpred[gt_parts == 255] = 0
                   parts_tp[c] += (cgt & cpred).sum()
                   parts_fp[c] +=  (~cgt & cpred).sum()
                   parts_fn[c] +=  (cgt & ~cpred).sum()
                gt_parts[gt_parts == 255] = 0
                pred_parts[pred_parts == 255] = 0
                parts_f1 = 2 * parts_tp / (2 * parts_tp + parts_fp + parts_fn)
                parts_accuracy += (gt_parts == pred_parts).sum()
                parts_pixel_count += np.prod(np.array(gt_parts.shape))

        # Print intermediate results during evaluation
        if step % log_freq == log_freq - 1:
            if eval_pose:
                print('MPJPE: ' + str(1000 * mpjpe[:step * batch_size].mean()))
                print('Reconstruction Error: ' + str(1000 * recon_err[:step * batch_size].mean()))
                print()
            if eval_masks:
                print('Accuracy: ', accuracy / pixel_count)
                print('F1: ', f1.mean())
                print()
            if eval_parts:
                print('Parts Accuracy: ', parts_accuracy / parts_pixel_count)
                print('Parts F1 (BG): ', parts_f1[[0,1,2,3,4,5,6]].mean())
                print()
            if eval_orientation:
                print('Orientation error: ' + str(mpjae[:step * batch_size].mean()))

    # Save reconstructions to a file for further processing
    if save_results:
        np.savez(result_file, pred_joints=pred_joints, pose=smpl_pose, betas=smpl_betas, camera=smpl_camera)
    # Print final results during evaluation
    print('*** Final Results ***')
    print()
    if eval_pose:
        print('MPJPE: ' + str(1000 * mpjpe.mean()))
        print('Reconstruction Error: ' + str(1000 * recon_err.mean()))
        print()
        #torch.save(pjpe, 'pjpe.pt')
    if eval_masks:
        print('Accuracy: ', accuracy / pixel_count)
        print('F1: ', f1.mean())
        print()
    if eval_parts:
        print('Parts Accuracy: ', parts_accuracy / parts_pixel_count)
        print('Parts F1 (BG): ', parts_f1[[0,1,2,3,4,5,6]].mean())
        print()
    if eval_orientation:
        print('Orientation Error: ' + str(mpjae.mean()))
        print('Orientation Error per part: ', mpjae_per_part)
        #torch.save(mpjae_per_part, 'mpjae_per_part.pt')

In [14]:
if __name__ == '__main__':
    
    args = parser.parse_args(['--checkpoint=data/model_checkpoint.pt','--dataset=3dpw', '--log_freq=20'])
    # Here we inserted our own arguments list
    
    model = hmr(config.SMPL_MEAN_PARAMS)
    checkpoint = torch.load(args.checkpoint)
    model.load_state_dict(checkpoint['model'], strict=False)
    model.eval()

    # Setup evaluation dataset
    dataset = BaseDataset(None, args.dataset, is_train=False)
    # Run evaluation
    run_evaluation(model, args.dataset, dataset, args.result_file,
                   batch_size=args.batch_size,
                   shuffle=args.shuffle,
                   log_freq=args.log_freq)


Eval:   0%|          | 0/1110 [00:00<?, ?it/s][A

gt_keypoints_3d = torch.matmul(J_regressor_batch, gt_vertices) torch.Size([32, 17, 3]) tensor([[[-4.4040e-03, -2.6157e-01,  3.3186e-02],
         [ 1.3561e-01, -2.5373e-01,  3.8715e-02],
         [ 9.4056e-02,  1.7967e-01,  1.3922e-01],
         ...,
         [-1.4890e-01, -6.8714e-01, -2.8414e-02],
         [-2.3759e-01, -4.2739e-01,  7.7644e-03],
         [-2.0387e-01, -1.8171e-01,  1.2894e-02]],

        [[-4.3842e-03, -2.6167e-01,  3.2996e-02],
         [ 1.3568e-01, -2.5387e-01,  3.8124e-02],
         [ 9.5192e-02,  1.7975e-01,  1.3809e-01],
         ...,
         [-1.4957e-01, -6.8678e-01, -2.7853e-02],
         [-2.3817e-01, -4.2651e-01,  6.0912e-03],
         [-2.0659e-01, -1.8038e-01,  1.0852e-02]],

        [[-4.3657e-03, -2.6179e-01,  3.2999e-02],
         [ 1.3577e-01, -2.5415e-01,  3.7902e-02],
         [ 9.6454e-02,  1.7967e-01,  1.3741e-01],
         ...,
         [-1.5024e-01, -6.8669e-01, -2.6847e-02],
         [-2.4047e-01, -4.2656e-01,  6.4115e-03],
         [-2.1082


Eval:   0%|          | 1/1110 [00:27<8:25:00, 27.32s/it][A
Eval:   0%|          | 2/1110 [00:31<6:18:08, 20.48s/it][A

gt_keypoints_3d = torch.matmul(J_regressor_batch, gt_vertices) torch.Size([32, 17, 3]) tensor([[[-3.6191e-05, -2.6449e-01,  3.4947e-02],
         [ 1.3403e-01, -2.4831e-01, -4.1961e-03],
         [ 1.0712e-01,  1.8828e-01, -3.8759e-02],
         ...,
         [-1.2682e-01, -6.8291e-01, -6.3629e-03],
         [-2.3079e-01, -4.3292e-01,  6.6794e-02],
         [-2.1254e-01, -1.8623e-01,  3.8161e-02]],

        [[ 2.9096e-04, -2.6443e-01,  3.3789e-02],
         [ 1.3193e-01, -2.4636e-01, -1.0960e-02],
         [ 1.1220e-01,  1.8946e-01, -2.9663e-02],
         ...,
         [-1.2152e-01, -6.8451e-01, -5.3881e-04],
         [-2.2683e-01, -4.3657e-01,  7.7498e-02],
         [-2.1302e-01, -1.8968e-01,  4.8513e-02]],

        [[ 5.3468e-04, -2.6437e-01,  3.2913e-02],
         [ 1.2958e-01, -2.4548e-01, -1.7779e-02],
         [ 1.1664e-01,  1.9057e-01, -2.0151e-02],
         ...,
         [-1.1887e-01, -6.8643e-01,  6.9304e-03],
         [-2.2339e-01, -4.4012e-01,  8.9928e-02],
         [-2.1514


Eval:   0%|          | 3/1110 [00:32<4:27:35, 14.50s/it][A

gt_keypoints_3d = torch.matmul(J_regressor_batch, gt_vertices) torch.Size([32, 17, 3]) tensor([[[ 1.7895e-02, -2.5558e-01,  1.4108e-02],
         [-2.6830e-02, -2.1497e-01, -1.1207e-01],
         [ 3.1522e-02,  2.0895e-01,  3.0737e-03],
         ...,
         [ 5.2375e-02, -7.0882e-01,  4.1224e-03],
         [ 8.4470e-02, -4.8809e-01,  1.7150e-01],
         [-1.0864e-02, -2.7316e-01,  2.4384e-01]],

        [[ 1.8016e-02, -2.5551e-01,  1.4647e-02],
         [-2.6830e-02, -2.1708e-01, -1.1213e-01],
         [ 3.1673e-02,  2.0853e-01, -4.4098e-03],
         ...,
         [ 5.8635e-02, -7.0722e-01,  1.0116e-02],
         [ 9.0338e-02, -4.8334e-01,  1.7372e-01],
         [-6.8090e-03, -2.6855e-01,  2.4387e-01]],

        [[ 1.8008e-02, -2.5548e-01,  1.4948e-02],
         [-2.7822e-02, -2.1933e-01, -1.1218e-01],
         [ 2.8088e-02,  2.0851e-01, -1.3227e-02],
         ...,
         [ 6.5306e-02, -7.0518e-01,  1.5492e-02],
         [ 9.5819e-02, -4.7793e-01,  1.7486e-01],
         [-3.8533


Eval:   0%|          | 4/1110 [00:33<3:10:54, 10.36s/it][A
Eval:   0%|          | 5/1110 [00:33<2:15:57,  7.38s/it][A

gt_keypoints_3d = torch.matmul(J_regressor_batch, gt_vertices) torch.Size([32, 17, 3]) tensor([[[ 0.0119, -0.2529,  0.0060],
         [-0.0880, -0.2194, -0.0867],
         [-0.1650,  0.1830,  0.0714],
         ...,
         [ 0.0951, -0.6954,  0.0020],
         [ 0.0378, -0.5342,  0.1996],
         [-0.1903, -0.5574,  0.1836]],

        [[ 0.0125, -0.2524,  0.0060],
         [-0.0847, -0.2184, -0.0893],
         [-0.1525,  0.1869,  0.0690],
         ...,
         [ 0.0945, -0.6951,  0.0056],
         [ 0.0380, -0.5377,  0.2083],
         [-0.1904, -0.5626,  0.1991]],

        [[ 0.0126, -0.2519,  0.0064],
         [-0.0826, -0.2174, -0.0905],
         [-0.1376,  0.1918,  0.0661],
         ...,
         [ 0.0933, -0.6945,  0.0080],
         [ 0.0372, -0.5422,  0.2158],
         [-0.1915, -0.5672,  0.2104]],

        ...,

        [[ 0.0121, -0.2523,  0.0092],
         [-0.0751, -0.2178, -0.0944],
         [-0.0745,  0.2058,  0.0331],
         ...,
         [ 0.0881, -0.6972,  0.0153],
 


Eval:   1%|          | 6/1110 [00:34<1:39:30,  5.41s/it][A
[A

KeyboardInterrupt: 

In [8]:
pjpe = torch.load('pjpe.pt')
pjpe.shape

torch.Size([35515, 14])

In [25]:
            
# SMPL joint names according to https://github.com/gulvarol/surreal/blob/5e3193741ddb429f2decb9c0873e252447058dc5/datageneration/misc/smpl_relations/smpl_relations.py#L19

SMPL_joint_names = ['hips',
                    'leftUpLeg',
                    'rightUpLeg',
                    'spine',
                    'leftLeg',
                    'rightLeg',
                    'spine1',
                    'leftFoot',
                    'rightFoot',
                    'spine2',
                    'leftToeBase',
                    'rightToeBase',
                    'neck',
                    'leftShoulder',
                    'rightShoulder',
                    'head',
                    'leftArm',
                    'rightArm',
                    'leftForeArm',
                    'rightForeArm',
                    'leftHand',
                    'rightHand',
                    'leftHandIndex1',
                    'rightHandIndex1']

# The subset of joints from SMPL used on the pose evaluation of e.g. 3dpw (only the 14 first items)

J24_TO_J17 = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 18, 14, 16, 17]
J24_TO_J14 = J24_TO_J17[:14]

SMPL_14 = [SMPL_joint_names[i] for i in J24_TO_J14]
SMPL_14 # This seems quite odd because there's hardly any upper body joints

['hips',
 'leftUpLeg',
 'rightUpLeg',
 'spine',
 'leftLeg',
 'rightLeg',
 'spine1',
 'leftFoot',
 'rightFoot',
 'spine2',
 'leftToeBase',
 'rightToeBase',
 'neck',
 'leftForeArm']

In [8]:
J24_TO_J17 = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 18, 14, 16, 17]

# Map joints to SMPL joints
JOINT_MAP = {
'OP Nose': 24, 'OP Neck': 12, 'OP RShoulder': 17,
'OP RElbow': 19, 'OP RWrist': 21, 'OP LShoulder': 16,
'OP LElbow': 18, 'OP LWrist': 20, 'OP MidHip': 0,
'OP RHip': 2, 'OP RKnee': 5, 'OP RAnkle': 8,
'OP LHip': 1, 'OP LKnee': 4, 'OP LAnkle': 7,
'OP REye': 25, 'OP LEye': 26, 'OP REar': 27,
'OP LEar': 28, 'OP LBigToe': 29, 'OP LSmallToe': 30,
'OP LHeel': 31, 'OP RBigToe': 32, 'OP RSmallToe': 33, 'OP RHeel': 34,
'Right Ankle': 8, 'Right Knee': 5, 'Right Hip': 45,
'Left Hip': 46, 'Left Knee': 4, 'Left Ankle': 7,
'Right Wrist': 21, 'Right Elbow': 19, 'Right Shoulder': 17,
'Left Shoulder': 16, 'Left Elbow': 18, 'Left Wrist': 20,
'Neck (LSP)': 47, 'Top of Head (LSP)': 48,
'Pelvis (MPII)': 49, 'Thorax (MPII)': 50,
'Spine (H36M)': 51, 'Jaw (H36M)': 52,
'Head (H36M)': 53, 'Nose': 24, 'Left Eye': 26,
'Right Eye': 25, 'Left Ear': 28, 'Right Ear': 27
}

import numpy as np

print(len(JOINT_MAP.values()), sorted(JOINT_MAP.values()))


49 [0, 1, 2, 4, 4, 5, 5, 7, 7, 8, 8, 12, 16, 16, 17, 17, 18, 18, 19, 19, 20, 20, 21, 21, 24, 24, 25, 25, 26, 26, 27, 27, 28, 28, 29, 30, 31, 32, 33, 34, 45, 46, 47, 48, 49, 50, 51, 52, 53]


In [9]:
parts_subset = {k:v for k,v in JOINT_MAP.items() if v in J24_TO_J17[:14]}

joint_dict = {}

for k in parts_subset:
    joint_dict[str(k)] = JOINT_MAP[k]

joint_dict

{'OP Neck': 12,
 'OP LElbow': 18,
 'OP MidHip': 0,
 'OP RHip': 2,
 'OP RKnee': 5,
 'OP RAnkle': 8,
 'OP LHip': 1,
 'OP LKnee': 4,
 'OP LAnkle': 7,
 'Right Ankle': 8,
 'Right Knee': 5,
 'Left Knee': 4,
 'Left Ankle': 7,
 'Left Elbow': 18}

In [7]:
parts_subset = {v:k for k,v in JOINT_MAP.items() if v in J24_TO_J17[:14]}
parts_subset

{12: 'OP Neck',
 18: 'Left Elbow',
 0: 'OP MidHip',
 2: 'OP RHip',
 5: 'Right Knee',
 8: 'Right Ankle',
 1: 'OP LHip',
 4: 'Left Knee',
 7: 'Left Ankle'}

In [21]:
JOINT_MAP.items()

dict_items([('OP Nose', 24), ('OP Neck', 12), ('OP RShoulder', 17), ('OP RElbow', 19), ('OP RWrist', 21), ('OP LShoulder', 16), ('OP LElbow', 18), ('OP LWrist', 20), ('OP MidHip', 0), ('OP RHip', 2), ('OP RKnee', 5), ('OP RAnkle', 8), ('OP LHip', 1), ('OP LKnee', 4), ('OP LAnkle', 7), ('OP REye', 25), ('OP LEye', 26), ('OP REar', 27), ('OP LEar', 28), ('OP LBigToe', 29), ('OP LSmallToe', 30), ('OP LHeel', 31), ('OP RBigToe', 32), ('OP RSmallToe', 33), ('OP RHeel', 34), ('Right Ankle', 8), ('Right Knee', 5), ('Right Hip', 45), ('Left Hip', 46), ('Left Knee', 4), ('Left Ankle', 7), ('Right Wrist', 21), ('Right Elbow', 19), ('Right Shoulder', 17), ('Left Shoulder', 16), ('Left Elbow', 18), ('Left Wrist', 20), ('Neck (LSP)', 47), ('Top of Head (LSP)', 48), ('Pelvis (MPII)', 49), ('Thorax (MPII)', 50), ('Spine (H36M)', 51), ('Jaw (H36M)', 52), ('Head (H36M)', 53), ('Nose', 24), ('Left Eye', 26), ('Right Eye', 25), ('Left Ear', 28), ('Right Ear', 27)])

In [8]:
J24_TO_J17[:14]

[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 18]

In [17]:
i = 0
for key in JOINT_MAP:    
    print('{:16}'.format(key))
    i+=1

OP Nose         
OP Neck         
OP RShoulder    
OP RElbow       
OP RWrist       
OP LShoulder    
OP LElbow       
OP LWrist       
OP MidHip       
OP RHip         
OP RKnee        
OP RAnkle       
OP LHip         
OP LKnee        
OP LAnkle       
OP REye         
OP LEye         
OP REar         
OP LEar         
OP LBigToe      
OP LSmallToe    
OP LHeel        
OP RBigToe      
OP RSmallToe    
OP RHeel        
Right Ankle     
Right Knee      
Right Hip       
Left Hip        
Left Knee       
Left Ankle      
Right Wrist     
Right Elbow     
Right Shoulder  
Left Shoulder   
Left Elbow      
Left Wrist      
Neck (LSP)      
Top of Head (LSP)
Pelvis (MPII)   
Thorax (MPII)   
Spine (H36M)    
Jaw (H36M)      
Head (H36M)     
Nose            
Left Eye        
Right Eye       
Left Ear        
Right Ear       


In [12]:
pjpe.mean(0)

tensor([157.5282,  98.7288,  29.9631,  29.8926, 100.6508, 159.0087, 141.9562,
         91.9257,  69.3990,  72.2796,  97.8108, 141.5400,  67.9245,  98.5646])

In [10]:
mpjae = torch.load('mpjae_per_part.pt')
mpjae.shape

torch.Size([35515, 24, 3])

In [15]:
mpjae_mean = mpjae.mean(dim=0)
mpjae_mean

tensor([[98.1797,  9.4942, 51.9239],
        [10.0485,  5.6163,  4.0774],
        [ 9.8444,  4.6058,  4.8752],
        [ 9.1616,  2.0758,  2.2850],
        [16.9512,  5.1322,  6.1049],
        [16.4851,  4.1317,  4.9168],
        [ 4.4222,  2.1591,  2.0369],
        [ 4.7690,  8.3333,  3.8979],
        [ 5.5424, 11.1734,  6.7159],
        [ 3.0120,  1.6893,  1.0108],
        [11.9835,  8.3257,  8.1128],
        [ 3.9105,  6.4853, 11.5482],
        [ 5.9942,  8.0072,  5.4115],
        [ 3.6484,  9.7804, 10.5708],
        [ 4.6065,  7.8352, 10.8048],
        [10.2832,  6.6511,  5.5037],
        [ 8.7521,  7.7237, 12.1250],
        [13.8173,  8.5829, 11.1074],
        [16.6280, 18.6774, 13.1059],
        [15.8921, 16.8754, 15.4179],
        [ 8.4844,  6.3963, 17.3813],
        [ 5.1584,  4.7116, 12.7035],
        [ 3.2690,  2.1730,  1.5460],
        [ 1.7931,  1.0212,  1.5960]])

In [8]:
import numpy as np

joint_regressor = np.load('data/J_regressor_extra.npy')

joint_regressor.shape

(9, 6890)

In [1]:
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))

In [None]:
# Our first sanity check should be whether ground truth compared to ground truth yields zero error

import torch 
import numpy as np
from scipy.spatial.transform import Rotation as R

import pickle as pkl
import os

img_path = 'examples/image_00502_crop.jpg'

pickle_path = 'data/3dpw/sequenceFiles/validation/courtyard_basketball_01.pkl'

frame = 502

# Load the .pkl sequence file containing the ground-truth information from 3dpw

seq = pkl.load(open(pickle_path,'rb'),encoding='latin-1') # opening the sequence file, latin-1 encoding for making it compatible with python3

gt_pose = torch.tensor(seq['poses'][0][frame]).unsqueeze(0)

# print(gt_pose)

pred_rotmat = torch.zeros(1, 24, 3, 3)

q = R.from_rotvec(gt_pose.reshape(1,24,-1)[0])

pred_rotmat = torch.tensor(R.as_dcm(q)).unsqueeze(0)

# print(pred_rotmat)

In [None]:
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

curr_batch_size = 1

print("gt_pose", gt_pose.shape, gt_pose)

print("pred_rotmat", pred_rotmat.shape, pred_rotmat)

# Get ground truth orientation (already stored in gt_pose)
gt_rotvec = torch.zeros(curr_batch_size,24,3) # Have to have an array of this shape to input into the rotation object (from 32,72 to 32,24,3)
i = 0
for row in gt_pose:
    gt_rotvec[i] = torch.reshape(row,(24, -1))
    i+=1
print("gt_rotvec", gt_rotvec.shape, gt_rotvec)
    
gt_euler = np.zeros((curr_batch_size, 24, 3)) # Using numpy here because it works with the rotation library
i = 0
for row in gt_rotvec:
    r = R.from_rotvec(row)
    gt_euler[i] = R.as_euler(r, 'xyz', degrees=True)
    i+=1

print("gt_euler", gt_euler.shape, gt_euler)

# Get Euler representation of the predictions too:

pred_euler = np.zeros((curr_batch_size,24,3)) # Has to be a numpy array because it works with Rotation

# For each row in pred_rotmat convert it to a Rotation object and write it into a corresponding
# row in pred_euler as Euler angles

i=0
for row in pred_rotmat:
    r = R.from_dcm(row.cpu())
    pred_euler[i] = R.as_euler(r, 'xyz', degrees=True)
    i+=1

print("pred_euler", pred_euler.shape, pred_euler)
    
    
orientation_error_non_reduced = torch.sqrt((torch.from_numpy(gt_euler).to(device) -
                                torch.from_numpy(pred_euler).to(device))**2)
print(orientation_error_non_reduced)
    
orientation_error = torch.sqrt((torch.from_numpy(gt_euler).to(device) -
                                torch.from_numpy(pred_euler).to(device))**2).sum(dim=-1).mean(dim=-1)
# The reduction above is wrong. For a 90 degree error in one angle, it averages out 3.75 degrees, which
# is 90/24. The correct reduction would be a mean of 1.25 (90/72), because there are 72 angles (3 for each part)
# To remove the root, add [:,1:,:] to gt_euler and pred_euler above

orientation_error_new = torch.sqrt((torch.from_numpy(gt_euler).to(device) -
                                torch.from_numpy(pred_euler).to(device))**2).mean()
# This reduction is more accurate because it averages the error per part and then the error across parts
# It is equivalent to .mean(dim=-1).mean(dim=-1)

print("orientation_error")
print(orientation_error)
print()
print("orientation_error_new")
print(orientation_error_new)
print()

In [None]:
def compare_orientation(gt, pred):
    # Taking as input two axis_angle representations
    
    device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

    curr_batch_size = 1

    gt_pose = gt

    print("gt_pose", gt_pose.shape, gt_pose)

    q = R.from_rotvec(pred.reshape(1,24,-1)[0])

    pred_rotmat = torch.tensor(R.as_dcm(q)).unsqueeze(0)

    print("pred_rotmat", pred_rotmat.shape, pred_rotmat)

    # Get ground truth orientation (already stored in gt_pose)
    gt_rotvec = torch.zeros(curr_batch_size,24,3) # Have to have an array of this shape to input into the rotation object (from 32,72 to 32,24,3)
    i = 0
    for row in gt_pose:
        gt_rotvec[i] = torch.reshape(row,(24, -1))
        i+=1
    print("gt_rotvec", gt_rotvec.shape, gt_rotvec)

    gt_euler = np.zeros((curr_batch_size, 24, 3)) # Using numpy here because it works with the rotation library
    i = 0
    for row in gt_rotvec:
        r = R.from_rotvec(row)
        gt_euler[i] = R.as_euler(r, 'xyz', degrees=True)
        i+=1

    print("gt_euler", gt_euler.shape, gt_euler)

    # Get Euler representation of the predictions too:

    pred_euler = np.zeros((curr_batch_size,24,3)) # Has to be a numpy array because it works with Rotation

    # For each row in pred_rotmat convert it to a Rotation object and write it into a corresponding
    # row in pred_euler as Euler angles

    i=0
    for row in pred_rotmat:
        r = R.from_dcm(row.cpu())
        pred_euler[i] = R.as_euler(r, 'xyz', degrees=True)
        i+=1

    print("pred_euler", pred_euler.shape, pred_euler)


    orientation_error_non_reduced = torch.sqrt((torch.from_numpy(gt_euler).to(device) -
                                    torch.from_numpy(pred_euler).to(device))**2)
    print("error per part", orientation_error_non_reduced)

    orientation_error = torch.sqrt((torch.from_numpy(gt_euler).to(device) -
                                    torch.from_numpy(pred_euler).to(device))**2).sum(dim=-1).mean(dim=-1)
    # The reduction above is wrong. For a 90 degree error in one angle, it averages out 3.75 degrees, which
    # is 90/24. The correct reduction would be a mean of 1.25 (90/72), because there are 72 angles (3 for each part)
    # To remove the root, add [:,1:,:] to gt_euler and pred_euler above

    orientation_error_new = torch.sqrt((torch.from_numpy(gt_euler).to(device) -
                                    torch.from_numpy(pred_euler).to(device))**2).mean()
    # This reduction is more accurate because it averages the error per part and then the error across parts
    # It is equivalent to .mean(dim=-1).mean(dim=-1)

    print("orientation_error")
    print(orientation_error.item())
    print()
    print("orientation_error_new")
    print(orientation_error_new.item())
    print()

    #moe[step * batch_size:step * batch_size + curr_batch_size] = orientation_error.cpu()

In [24]:
import torch 
import numpy as np
from scipy.spatial.transform import Rotation as R

def compare_orientation_rotvec(gt, pred):
    # Taking as input two axis_angle representations
    
    device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

    curr_batch_size = 1

    gt_pose = gt

    print("gt_pose", gt_pose.shape, gt_pose)
    print("pred_pose", pred.shape, pred)

    # Get ground truth orientation (already stored in gt_pose)
    gt_rotvec = torch.zeros((curr_batch_size,24,3), dtype=torch.double) # Have to have an array of this shape to input into the rotation object (from 32,72 to 32,24,3)
    i = 0
    for row in gt_pose:
        gt_rotvec[i] = torch.reshape(row,(24, -1))
        i+=1
    print("gt_rotvec", gt_rotvec.shape, gt_rotvec)
    
    # Get prediction as rotation vectors
    
    r = R.from_rotvec(pred.reshape(1,24,-1)[0])
    pred_rotvec = torch.tensor(R.as_rotvec(r)).unsqueeze(0)

    print("pred_rotvec", pred_rotvec.shape, pred_rotvec)

    orientation_error_non_reduced = np.degrees(torch.sqrt((gt_rotvec - pred_rotvec)**2))
    
    print("error per part", orientation_error_non_reduced)

    orientation_error = np.degrees(torch.sqrt((gt_rotvec - pred_rotvec)**2).sum(dim=-1).mean(dim=-1))
    # The reduction above is wrong. For a 90 degree error in one angle, it averages out 3.75 degrees, which
    # is 90/24. The correct reduction would be a mean of 1.25 (90/72), because there are 72 angles (3 for each part)
    # To remove the root, add [:,1:,:] to gt_euler and pred_euler above

    orientation_error_new = np.degrees(torch.sqrt((gt_rotvec - pred_rotvec)**2).mean())
    # This reduction is more accurate because it averages the error per part and then the error across parts
    # It is equivalent to .mean(dim=-1).mean(dim=-1)

    print("orientation_error")
    print(orientation_error.item())
    print()
    print("orientation_error_new")
    print(orientation_error_new.item())
    print()

In [None]:
import torch 
import numpy as np
from scipy.spatial.transform import Rotation as R

def compare_orientation_rotvec(gt_pose, pred_rotmat):
    # Taking as input one axis angle representation and one rotation matrix representation
    
    device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

    curr_batch_size = 1

    gt_pose = gt

    #print("gt_pose", gt_pose.shape, gt_pose)
    #print("pred_pose", pred.shape, pred)

    # Get ground truth orientation (already stored in gt_pose)
    gt_rotvec = torch.zeros((curr_batch_size,24,3), dtype=torch.double) # Have to have an array of this shape to input into the rotation object (from 32,72 to 32,24,3)
    i = 0
    for row in gt_pose:
        gt_rotvec[i] = torch.reshape(row,(24, -1))
        i+=1
    print("gt_rotvec", gt_rotvec.shape, gt_rotvec)
    
    # Get prediction as rotation vectors
    
    pred_rotvec = torch.zeros((curr_batch_size,24,3)) # Has to be a numpy array because it works with Rotation

    # For each row in pred_rotmat convert it to a Rotation object and write it into a corresponding
    # row in pred_rotvec as rotation vectors

    i=0
    for row in pred_rotmat:
        r = R.from_dcm(row.cpu())
        pred_rotvec[i] = R.as_rotvec(r)
        i+=1

    print("pred_rotvec", pred_rotvec.shape, pred_rotvec)

    orientation_error_non_reduced = np.degrees(torch.sqrt((gt_rotvec - pred_rotvec)**2))
    
    print("error per part", orientation_error_non_reduced)

    orientation_error = np.degrees(torch.sqrt((gt_rotvec - pred_rotvec)**2).sum(dim=-1).mean(dim=-1))
    # The reduction above is wrong. For a 90 degree error in one angle, it averages out 3.75 degrees, which
    # is 90/24. The correct reduction would be a mean of 1.25 (90/72), because there are 72 angles (3 for each part)
    # To remove the root, add [:,1:,:] to gt_euler and pred_euler above

    orientation_error_new = np.degrees(torch.sqrt((gt_rotvec - pred_rotvec)**2).mean())
    # This reduction is more accurate because it averages the error per part and then the error across parts
    # It is equivalent to .mean(dim=-1).mean(dim=-1)

    print("orientation_error")
    print(orientation_error.item())
    print()
    print("orientation_error_new")
    print(orientation_error_new.item())
    print()

In [25]:
a = torch.zeros(1,72)
b = torch.zeros(1,72)

b[0][0] = np.pi/4
b[0][1] = np.pi/4

compare_orientation_rotvec(a,b)

gt_pose torch.Size([1, 72]) tensor([[0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]])
pred_pose torch.Size([1, 72]) tensor([[0.7854, 0.7854, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
         0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
         0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
         0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
         0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
         0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
         0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
         0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0

In [None]:
m = [[(1/np.sqrt(2)),0,(1/np.sqrt(2))],
    [1/2, (1/np.sqrt(2)), -1/2],
    [-1/2, (1/np.sqrt(2)), 1/2]]

m

In [None]:
# This is a rotation of 45 degrees around x multiplied on the
# left by a rotation of 45 degrees around y

m = [[(1/np.sqrt(2)),1/2,1/2],
    [0, (1/np.sqrt(2)), -(1/np.sqrt(2))],
    [-(1/np.sqrt(2)), 1/2, 1/2]]
m

In [None]:
n = R.from_dcm(m)

In [None]:
print(R.as_euler(n, 'xyz', degrees=True))

In [None]:
o = R.from_euler('xyz', [45, 45, 0], degrees=True)

print(o.as_dcm())