In [8]:
"""
This script can be used to evaluate a trained model on 3D pose/shape and masks/part segmentation. You first need to download the datasets and preprocess them.
Example usage:
```
python3 eval.py --checkpoint=data/model_checkpoint.pt --dataset=h36m-p1 --log_freq=20
```
Running the above command will compute the MPJPE and Reconstruction Error on the Human3.6M dataset (Protocol I). The ```--dataset``` option can take different values based on the type of evaluation you want to perform:
1. Human3.6M Protocol 1 ```--dataset=h36m-p1```
2. Human3.6M Protocol 2 ```--dataset=h36m-p2```
3. 3DPW ```--dataset=3dpw```
4. LSP ```--dataset=lsp```
5. MPI-INF-3DHP ```--dataset=mpi-inf-3dhp```
"""

import torch
from torch.utils.data import DataLoader
import numpy as np
import cv2
import os
import argparse
import json
from collections import namedtuple
from tqdm import tqdm
import torchgeometry as tgm

import config
import constants
from models import hmr, SMPL
from datasets import BaseDataset
from utils.imutils import uncrop
from utils.pose_utils import reconstruction_error
from utils.part_utils import PartRenderer

from orientation_evaluation import orientation_evaluation
from scipy.spatial.transform import Rotation as R

# Define command-line arguments
parser = argparse.ArgumentParser()
parser.add_argument('--checkpoint', default=None, help='Path to network checkpoint')
parser.add_argument('--dataset', default='h36m-p1', choices=['h36m-p1', 'h36m-p2', 'lsp', '3dpw', 'mpi-inf-3dhp'], help='Choose evaluation dataset')
parser.add_argument('--log_freq', default=50, type=int, help='Frequency of printing intermediate results')
parser.add_argument('--batch_size', default=32, help='Batch size for testing')
parser.add_argument('--shuffle', default=False, action='store_true', help='Shuffle data')
parser.add_argument('--num_workers', default=8, type=int, help='Number of processes for data loading')
parser.add_argument('--result_file', default=None, help='If set, save detections to a .npz file')

def run_evaluation(model, dataset_name, dataset, result_file,
                   batch_size=32, img_res=224, 
                   num_workers=32, shuffle=False, log_freq=50):
    """Run evaluation on the datasets and metrics we report in the paper. """

    device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

    # Transfer model to the GPU
    model.to(device)

    # Load SMPL model
    smpl_neutral = SMPL(config.SMPL_MODEL_DIR,
                        create_transl=False).to(device)
    smpl_male = SMPL(config.SMPL_MODEL_DIR,
                     gender='male',
                     create_transl=False).to(device)
    smpl_female = SMPL(config.SMPL_MODEL_DIR,
                       gender='female',
                       create_transl=False).to(device)
    
    renderer = PartRenderer()
    
    # Regressor for H36m joints
    J_regressor = torch.from_numpy(np.load(config.JOINT_REGRESSOR_H36M)).float()
    
    save_results = result_file is not None
    # Disable shuffling if you want to save the results
    if save_results:
        shuffle=False
    # Create dataloader for the dataset
    data_loader = DataLoader(dataset, batch_size=batch_size, shuffle=shuffle, num_workers=num_workers)
    
    # Pose metrics
    # MPJPE and Reconstruction error for the non-parametric and parametric shapes
    mpjpe = np.zeros(len(dataset))
    recon_err = np.zeros(len(dataset))
    mpjpe_smpl = np.zeros(len(dataset))
    recon_err_smpl = np.zeros(len(dataset))
    
    # Including mean per joint angular error (reduced and per part)
    mpjae = np.zeros(len(dataset))
    mpjae_per_part = torch.zeros(len(dataset), 24, 3)

    # Shape metrics
    # Mean per-vertex error
    shape_err = np.zeros(len(dataset))
    shape_err_smpl = np.zeros(len(dataset))

    # Mask and part metrics
    # Accuracy
    accuracy = 0.
    parts_accuracy = 0.
    # True positive, false positive and false negative
    tp = np.zeros((2,1))
    fp = np.zeros((2,1))
    fn = np.zeros((2,1))
    parts_tp = np.zeros((7,1))
    parts_fp = np.zeros((7,1))
    parts_fn = np.zeros((7,1))
    # Pixel count accumulators
    pixel_count = 0
    parts_pixel_count = 0

    # Store SMPL parameters
    smpl_pose = np.zeros((len(dataset), 72))
    smpl_betas = np.zeros((len(dataset), 10))
    smpl_camera = np.zeros((len(dataset), 3))
    pred_joints = np.zeros((len(dataset), 17, 3))

    eval_pose = False
    eval_masks = False
    eval_parts = False
    eval_orientation = False # Adding the orientation parameter
    # Choose appropriate evaluation for each dataset
    if dataset_name == 'h36m-p1' or dataset_name == 'h36m-p2' or dataset_name == 'mpi-inf-3dhp':
        eval_pose = True
    elif dataset_name == 'lsp':
        eval_masks = True
        eval_parts = True
        annot_path = config.DATASET_FOLDERS['upi-s1h']
    elif dataset_name == '3dpw':
        eval_orientation = True
        eval_pose = True
        

    joint_mapper_h36m = constants.H36M_TO_J17 if dataset_name == 'mpi-inf-3dhp' else constants.H36M_TO_J14
    joint_mapper_gt = constants.J24_TO_J17 if dataset_name == 'mpi-inf-3dhp' else constants.J24_TO_J14
    # Iterate over the entire dataset
    for step, batch in enumerate(tqdm(data_loader, desc='Eval', total=len(data_loader))):
        # Get ground truth annotations from the batch
        gt_pose = batch['pose'].to(device)
        gt_betas = batch['betas'].to(device)
        gt_vertices = smpl_neutral(betas=gt_betas, body_pose=gt_pose[:, 3:], global_orient=gt_pose[:, :3]).vertices
        images = batch['img'].to(device)
        gender = batch['gender'].to(device)
        curr_batch_size = images.shape[0]
        
        with torch.no_grad():
            pred_rotmat, pred_betas, pred_camera = model(images)
            pred_output = smpl_neutral(betas=pred_betas, body_pose=pred_rotmat[:,1:], global_orient=pred_rotmat[:,0].unsqueeze(1), pose2rot=False)
            pred_vertices = pred_output.vertices

        if save_results:
            rot_pad = torch.tensor([0,0,1], dtype=torch.float32, device=device).view(1,3,1)
            rotmat = torch.cat((pred_rotmat.view(-1, 3, 3), rot_pad.expand(curr_batch_size * 24, -1, -1)), dim=-1)
            pred_pose = tgm.rotation_matrix_to_angle_axis(rotmat).contiguous().view(-1, 72)
            smpl_pose[step * batch_size:step * batch_size + curr_batch_size, :] = pred_pose.cpu().numpy()
            smpl_betas[step * batch_size:step * batch_size + curr_batch_size, :]  = pred_betas.cpu().numpy()
            smpl_camera[step * batch_size:step * batch_size + curr_batch_size, :]  = pred_camera.cpu().numpy()
        
        # Orientation evaluation
        orientation_error_per_part, orientation_error, orientation_error_new = \
        orientation_evaluation(gt_pose, pred_rotmat, batch_size, curr_batch_size, step)
        
        mpjae[step * batch_size:step * batch_size + curr_batch_size] = orientation_error_new
        mpjae_per_part[step*batch_size : step*batch_size + curr_batch_size] = orientation_error_per_part
            
        # 3D pose evaluation
        if eval_pose:
            # Regressor broadcasting
            J_regressor_batch = J_regressor[None, :].expand(pred_vertices.shape[0], -1, -1).to(device)
            # Get 14 ground truth joints
            if 'h36m' in dataset_name or 'mpi-inf' in dataset_name:
                gt_keypoints_3d = batch['pose_3d'].cuda()
                gt_keypoints_3d = gt_keypoints_3d[:, joint_mapper_gt, :-1]
            # For 3DPW get the 14 common joints from the rendered shape
            else:
                gt_vertices = smpl_male(global_orient=gt_pose[:,:3], body_pose=gt_pose[:,3:], betas=gt_betas).vertices 
                gt_vertices_female = smpl_female(global_orient=gt_pose[:,:3], body_pose=gt_pose[:,3:], betas=gt_betas).vertices 
                gt_vertices[gender==1, :, :] = gt_vertices_female[gender==1, :, :]
                gt_keypoints_3d = torch.matmul(J_regressor_batch, gt_vertices)
                gt_pelvis = gt_keypoints_3d[:, [0],:].clone()
                gt_keypoints_3d = gt_keypoints_3d[:, joint_mapper_h36m, :]
                gt_keypoints_3d = gt_keypoints_3d - gt_pelvis 


            # Get 14 predicted joints from the mesh
            pred_keypoints_3d = torch.matmul(J_regressor_batch, pred_vertices)
            if save_results:
                pred_joints[step * batch_size:step * batch_size + curr_batch_size, :, :]  = pred_keypoints_3d.cpu().numpy()
            pred_pelvis = pred_keypoints_3d[:, [0],:].clone()
            pred_keypoints_3d = pred_keypoints_3d[:, joint_mapper_h36m, :]
            pred_keypoints_3d = pred_keypoints_3d - pred_pelvis 

            # Absolute error (MPJPE)
            error = torch.sqrt(((pred_keypoints_3d - gt_keypoints_3d) ** 2).sum(dim=-1)).mean(dim=-1).cpu().numpy()
            mpjpe[step * batch_size:step * batch_size + curr_batch_size] = error

            # Reconstuction_error
            r_error = reconstruction_error(pred_keypoints_3d.cpu().numpy(), gt_keypoints_3d.cpu().numpy(), reduction=None)
            recon_err[step * batch_size:step * batch_size + curr_batch_size] = r_error


        # If mask or part evaluation, render the mask and part images
        if eval_masks or eval_parts:
            mask, parts = renderer(pred_vertices, pred_camera)

        # Mask evaluation (for LSP)
        if eval_masks:
            center = batch['center'].cpu().numpy()
            scale = batch['scale'].cpu().numpy()
            # Dimensions of original image
            orig_shape = batch['orig_shape'].cpu().numpy()
            for i in range(curr_batch_size):
                # After rendering, convert imate back to original resolution
                pred_mask = uncrop(mask[i].cpu().numpy(), center[i], scale[i], orig_shape[i]) > 0
                # Load gt mask
                gt_mask = cv2.imread(os.path.join(annot_path, batch['maskname'][i]), 0) > 0
                # Evaluation consistent with the original UP-3D code
                accuracy += (gt_mask == pred_mask).sum()
                pixel_count += np.prod(np.array(gt_mask.shape))
                for c in range(2):
                    cgt = gt_mask == c
                    cpred = pred_mask == c
                    tp[c] += (cgt & cpred).sum()
                    fp[c] +=  (~cgt & cpred).sum()
                    fn[c] +=  (cgt & ~cpred).sum()
                f1 = 2 * tp / (2 * tp + fp + fn)

        # Part evaluation (for LSP)
        if eval_parts:
            center = batch['center'].cpu().numpy()
            scale = batch['scale'].cpu().numpy()
            orig_shape = batch['orig_shape'].cpu().numpy()
            for i in range(curr_batch_size):
                pred_parts = uncrop(parts[i].cpu().numpy().astype(np.uint8), center[i], scale[i], orig_shape[i])
                # Load gt part segmentation
                gt_parts = cv2.imread(os.path.join(annot_path, batch['partname'][i]), 0)
                # Evaluation consistent with the original UP-3D code
                # 6 parts + background
                for c in range(7):
                   cgt = gt_parts == c
                   cpred = pred_parts == c
                   cpred[gt_parts == 255] = 0
                   parts_tp[c] += (cgt & cpred).sum()
                   parts_fp[c] +=  (~cgt & cpred).sum()
                   parts_fn[c] +=  (cgt & ~cpred).sum()
                gt_parts[gt_parts == 255] = 0
                pred_parts[pred_parts == 255] = 0
                parts_f1 = 2 * parts_tp / (2 * parts_tp + parts_fp + parts_fn)
                parts_accuracy += (gt_parts == pred_parts).sum()
                parts_pixel_count += np.prod(np.array(gt_parts.shape))

        # Print intermediate results during evaluation
        if step % log_freq == log_freq - 1:
            if eval_pose:
                print('MPJPE: ' + str(1000 * mpjpe[:step * batch_size].mean()))
                print('Reconstruction Error: ' + str(1000 * recon_err[:step * batch_size].mean()))
                print()
            if eval_masks:
                print('Accuracy: ', accuracy / pixel_count)
                print('F1: ', f1.mean())
                print()
            if eval_parts:
                print('Parts Accuracy: ', parts_accuracy / parts_pixel_count)
                print('Parts F1 (BG): ', parts_f1[[0,1,2,3,4,5,6]].mean())
                print()
            if eval_orientation:
                print('Orientation error: ' + str(mpjae[:step * batch_size].mean()))

    # Save reconstructions to a file for further processing
    if save_results:
        np.savez(result_file, pred_joints=pred_joints, pose=smpl_pose, betas=smpl_betas, camera=smpl_camera)
    # Print final results during evaluation
    print('*** Final Results ***')
    print()
    if eval_pose:
        print('MPJPE: ' + str(1000 * mpjpe.mean()))
        print('Reconstruction Error: ' + str(1000 * recon_err.mean()))
        print()
    if eval_masks:
        print('Accuracy: ', accuracy / pixel_count)
        print('F1: ', f1.mean())
        print()
    if eval_parts:
        print('Parts Accuracy: ', parts_accuracy / parts_pixel_count)
        print('Parts F1 (BG): ', parts_f1[[0,1,2,3,4,5,6]].mean())
        print()
    if eval_orientation:
        print('Orientation Error: ' + str(mpjae.mean()))
        print('Orientation Error per part: ', mpjae_per_part)
        torch.save(mpjae_per_part, 'mpjae_per_part.pt')

In [9]:
if __name__ == '__main__':
    
    args = parser.parse_args(['--checkpoint=data/model_checkpoint.pt','--dataset=3dpw', '--log_freq=20'])
    # Here we inserted our own arguments list
    
    model = hmr(config.SMPL_MEAN_PARAMS)
    checkpoint = torch.load(args.checkpoint)
    model.load_state_dict(checkpoint['model'], strict=False)
    model.eval()

    # Setup evaluation dataset
    dataset = BaseDataset(None, args.dataset, is_train=False)
    # Run evaluation
    run_evaluation(model, args.dataset, dataset, args.result_file,
                   batch_size=args.batch_size,
                   shuffle=args.shuffle,
                   log_freq=args.log_freq)

Eval:   2%|▏         | 20/1110 [00:52<24:53,  1.37s/it]  

MPJPE: 81.37874403282215
Reconstruction Error: 41.73969471900675

Orientation error: 8.301217391926139


Eval:   4%|▎         | 40/1110 [01:01<12:28,  1.43it/s]

MPJPE: 93.01356022031261
Reconstruction Error: 45.72069009867473

Orientation error: 8.917448099172002


Eval:   5%|▌         | 60/1110 [01:25<07:32,  2.32it/s]  

MPJPE: 101.09796959480619
Reconstruction Error: 51.432238564179364

Orientation error: 9.283912416846842


Eval:   7%|▋         | 80/1110 [01:35<16:00,  1.07it/s]

MPJPE: 98.7467867649408
Reconstruction Error: 52.1927023537846

Orientation error: 9.357562113788697


Eval:   9%|▉         | 100/1110 [01:54<08:04,  2.09it/s] 

MPJPE: 98.17523845574921
Reconstruction Error: 52.06985538002254

Orientation error: 9.492716857255799


Eval:  11%|█         | 120/1110 [02:14<12:28,  1.32it/s]

MPJPE: 96.07387304662124
Reconstruction Error: 52.26824514395675

Orientation error: 9.43975521834012


Eval:  13%|█▎        | 140/1110 [02:30<07:17,  2.22it/s]

MPJPE: 95.53847144434911
Reconstruction Error: 54.08132505995094

Orientation error: 9.605082343641998


Eval:  14%|█▍        | 160/1110 [02:58<19:15,  1.22s/it]  

MPJPE: 98.54347701696972
Reconstruction Error: 56.634753527348195

Orientation error: 9.98932664327239


Eval:  16%|█▌        | 180/1110 [03:12<06:48,  2.28it/s]

MPJPE: 99.73491341728905
Reconstruction Error: 58.15416857451929

Orientation error: 10.074377547260031


Eval:  18%|█▊        | 200/1110 [03:28<21:40,  1.43s/it]

MPJPE: 99.64782742832567
Reconstruction Error: 60.30048787694462

Orientation error: 10.417564531309836


Eval:  20%|█▉        | 220/1110 [03:36<06:30,  2.28it/s]

MPJPE: 101.72668339038209
Reconstruction Error: 60.335253251846204

Orientation error: 10.298260807738503


Eval:  22%|██▏       | 240/1110 [03:47<15:38,  1.08s/it]

MPJPE: 101.2261059012451
Reconstruction Error: 59.34126633336324

Orientation error: 10.098604911945815


Eval:  23%|██▎       | 260/1110 [03:57<07:06,  1.99it/s]

MPJPE: 101.41501063166338
Reconstruction Error: 59.57728412031991

Orientation error: 9.885730506637461


Eval:  25%|██▌       | 280/1110 [04:07<07:02,  1.97it/s]

MPJPE: 102.28858076288002
Reconstruction Error: 59.867952756899136

Orientation error: 9.770727506474236


Eval:  27%|██▋       | 300/1110 [04:18<07:43,  1.75it/s]

MPJPE: 101.2368943148221
Reconstruction Error: 59.40012613301287

Orientation error: 9.699317643240029


Eval:  29%|██▉       | 320/1110 [04:30<06:30,  2.02it/s]

MPJPE: 100.787596164715
Reconstruction Error: 58.77969346847093

Orientation error: 9.638661989302124


Eval:  31%|███       | 340/1110 [04:40<06:14,  2.06it/s]

MPJPE: 99.71019655986686
Reconstruction Error: 58.42986403965147

Orientation error: 9.644130566835804


Eval:  32%|███▏      | 360/1110 [04:49<06:17,  1.98it/s]

MPJPE: 99.22611367989923
Reconstruction Error: 58.74572166184633

Orientation error: 9.702722487781003


Eval:  34%|███▍      | 380/1110 [04:57<04:34,  2.66it/s]

MPJPE: 97.52136656196203
Reconstruction Error: 58.06407751699806

Orientation error: 9.666777709845965


Eval:  36%|███▌      | 400/1110 [05:07<07:27,  1.59it/s]

MPJPE: 96.97976527603711
Reconstruction Error: 58.0197731418101

Orientation error: 9.65897574341051


Eval:  38%|███▊      | 420/1110 [05:18<06:03,  1.90it/s]

MPJPE: 97.04290869254574
Reconstruction Error: 58.60944849390065

Orientation error: 9.630202759782692


Eval:  40%|███▉      | 440/1110 [05:29<07:11,  1.55it/s]

MPJPE: 97.13350864210015
Reconstruction Error: 58.6497318643996

Orientation error: 9.65047314931424


Eval:  41%|████▏     | 460/1110 [05:40<05:30,  1.96it/s]

MPJPE: 96.64615257318519
Reconstruction Error: 58.50512616200401

Orientation error: 9.62443830232045


Eval:  43%|████▎     | 480/1110 [05:52<05:22,  1.95it/s]

MPJPE: 95.80476736015055
Reconstruction Error: 58.182424580700705

Orientation error: 9.600506167123314


Eval:  45%|████▌     | 500/1110 [06:02<05:26,  1.87it/s]

MPJPE: 96.07492126911723
Reconstruction Error: 58.44852769103943

Orientation error: 9.630988569460293


Eval:  47%|████▋     | 520/1110 [06:15<06:01,  1.63it/s]

MPJPE: 95.764759606444
Reconstruction Error: 58.31344841397592

Orientation error: 9.604472781011264


Eval:  49%|████▊     | 540/1110 [06:27<05:57,  1.59it/s]

MPJPE: 95.77269418519734
Reconstruction Error: 58.25498443650227

Orientation error: 9.580903662937596


Eval:  50%|█████     | 560/1110 [06:37<04:51,  1.89it/s]

MPJPE: 96.86103236683675
Reconstruction Error: 58.5607300873401

Orientation error: 9.60804681764662


Eval:  52%|█████▏    | 580/1110 [06:50<03:48,  2.32it/s]

MPJPE: 97.26141928825834
Reconstruction Error: 58.600691197615355

Orientation error: 9.62137952931423


Eval:  54%|█████▍    | 600/1110 [07:09<07:59,  1.06it/s]

MPJPE: 98.29730211023649
Reconstruction Error: 58.67116694960149

Orientation error: 9.660806977401156


Eval:  56%|█████▌    | 620/1110 [07:15<03:40,  2.22it/s]

MPJPE: 98.23011851563722
Reconstruction Error: 59.05669454760889

Orientation error: 9.712031446028323


Eval:  58%|█████▊    | 640/1110 [07:25<03:03,  2.56it/s]

MPJPE: 97.67185774781554
Reconstruction Error: 58.979304686096874

Orientation error: 9.688309270678893


Eval:  60%|█████▉    | 661/1110 [07:35<05:22,  1.39it/s]

MPJPE: 97.18698598820735
Reconstruction Error: 58.87917421603779

Orientation error: 9.706401420119855


Eval:  61%|██████▏   | 680/1110 [07:42<03:08,  2.28it/s]

MPJPE: 96.99483215800296
Reconstruction Error: 58.78070092597834

Orientation error: 9.719306826425552


Eval:  63%|██████▎   | 700/1110 [07:49<02:06,  3.25it/s]

MPJPE: 96.77892331344053
Reconstruction Error: 58.85737517482877

Orientation error: 9.692356570138148


Eval:  65%|██████▍   | 720/1110 [07:58<03:01,  2.15it/s]

MPJPE: 96.41934391015607
Reconstruction Error: 59.00512574593859

Orientation error: 9.6439248069934


Eval:  67%|██████▋   | 740/1110 [08:11<02:58,  2.08it/s]

MPJPE: 95.87445049866447
Reconstruction Error: 58.89322934042113

Orientation error: 9.620443120744646


Eval:  68%|██████▊   | 760/1110 [08:22<03:15,  1.79it/s]

MPJPE: 95.28066178641745
Reconstruction Error: 58.57721777647864

Orientation error: 9.588335140923272


Eval:  70%|███████   | 780/1110 [08:46<03:32,  1.55it/s]

MPJPE: 95.32241956027468
Reconstruction Error: 58.396075461424445

Orientation error: 9.550964598636195


Eval:  72%|███████▏  | 800/1110 [08:56<03:52,  1.34it/s]

MPJPE: 95.03338752057724
Reconstruction Error: 57.998542555867786

Orientation error: 9.503799512276396


Eval:  74%|███████▍  | 820/1110 [09:12<01:35,  3.03it/s]

MPJPE: 94.77129218207504
Reconstruction Error: 57.795886928912125

Orientation error: 9.477754139324057


Eval:  76%|███████▌  | 840/1110 [09:27<03:55,  1.15it/s]

MPJPE: 94.97227546023827
Reconstruction Error: 57.86474806755277

Orientation error: 9.458699660016924


Eval:  77%|███████▋  | 860/1110 [09:35<01:28,  2.84it/s]

MPJPE: 94.6700266467942
Reconstruction Error: 57.93397202313964

Orientation error: 9.442182616950156


Eval:  79%|███████▉  | 880/1110 [09:45<01:36,  2.40it/s]

MPJPE: 95.16668808374996
Reconstruction Error: 57.82050719398336

Orientation error: 9.47823908191114


Eval:  81%|████████  | 900/1110 [09:58<05:08,  1.47s/it]

MPJPE: 95.60778589541863
Reconstruction Error: 58.482298644847994

Orientation error: 9.488770803599863


Eval:  83%|████████▎ | 920/1110 [10:08<01:31,  2.07it/s]

MPJPE: 95.92476836754054
Reconstruction Error: 58.696438816692215

Orientation error: 9.525968189919906


Eval:  85%|████████▍ | 940/1110 [10:21<01:10,  2.42it/s]

MPJPE: 96.42709904449275
Reconstruction Error: 59.011896208689386

Orientation error: 9.56078473060671


Eval:  86%|████████▋ | 960/1110 [10:31<01:13,  2.03it/s]

MPJPE: 96.57400069709496
Reconstruction Error: 59.12613980964941

Orientation error: 9.60315614591921


Eval:  88%|████████▊ | 980/1110 [10:39<00:52,  2.47it/s]

MPJPE: 96.40454657848372
Reconstruction Error: 59.10551876262062

Orientation error: 9.593283433895452


Eval:  90%|█████████ | 1000/1110 [10:47<00:43,  2.52it/s]

MPJPE: 96.15514202476214
Reconstruction Error: 58.99938160780346

Orientation error: 9.5840790235374


Eval:  92%|█████████▏| 1020/1110 [10:54<00:27,  3.23it/s]

MPJPE: 95.85081390019505
Reconstruction Error: 58.96727392843891

Orientation error: 9.566197215521473


Eval:  94%|█████████▎| 1040/1110 [11:02<00:28,  2.50it/s]

MPJPE: 95.69092004460663
Reconstruction Error: 58.8947771665218

Orientation error: 9.54798952087937


Eval:  96%|█████████▌| 1061/1110 [11:09<00:12,  3.98it/s]

MPJPE: 96.1986018673559
Reconstruction Error: 59.07717778272021

Orientation error: 9.545936897417256


Eval:  97%|█████████▋| 1081/1110 [11:11<00:03,  9.48it/s]

MPJPE: 96.5447576704782
Reconstruction Error: 59.155131101707966

Orientation error: 9.5267607877964


Eval:  99%|█████████▉| 1101/1110 [11:13<00:00,  9.47it/s]

MPJPE: 96.87599752032011
Reconstruction Error: 59.31143589761427

Orientation error: 9.518863738219308


Eval: 100%|██████████| 1110/1110 [11:15<00:00,  1.64it/s]

*** Final Results ***

MPJPE: 96.94085740828478
Reconstruction Error: 59.285746753744455

Orientation Error: 9.5149063225102
Orientation Error per part:  tensor([[[3.4467e+02, 1.3718e+00, 1.4633e+00],
         [1.1698e+01, 5.1830e+00, 1.1266e+00],
         [1.6275e+01, 5.3367e-01, 3.6228e+00],
         ...,
         [1.1765e+01, 2.5596e+00, 5.4265e+00],
         [2.4144e+00, 3.2284e+00, 2.4397e+00],
         [1.5845e-02, 1.9711e+00, 1.9843e+00]],

        [[3.4505e+02, 1.3102e+00, 2.2830e+00],
         [1.2002e+01, 4.3776e+00, 8.9079e-01],
         [1.6749e+01, 1.0212e+00, 3.5849e+00],
         ...,
         [1.2371e+01, 2.3686e+00, 4.4567e+00],
         [2.3773e+00, 3.2820e+00, 2.4814e+00],
         [7.9473e-02, 2.0159e+00, 2.0162e+00]],

        [[3.4473e+02, 1.5390e+00, 6.8220e-01],
         [1.2073e+01, 4.2944e+00, 7.8759e-01],
         [1.6626e+01, 8.6043e-01, 3.1790e+00],
         ...,
         [1.2720e+01, 2.0895e+00, 4.3858e+00],
         [2.3054e+00, 3.2469e+00, 2.3463e+00],
 




In [10]:
mpjae = torch.load('mpjae_per_part.pt')
mpjae.shape

torch.Size([35515, 24, 3])

In [15]:
mpjae_mean = mpjae.mean(dim=0)
mpjae_mean

tensor([[98.1797,  9.4942, 51.9239],
        [10.0485,  5.6163,  4.0774],
        [ 9.8444,  4.6058,  4.8752],
        [ 9.1616,  2.0758,  2.2850],
        [16.9512,  5.1322,  6.1049],
        [16.4851,  4.1317,  4.9168],
        [ 4.4222,  2.1591,  2.0369],
        [ 4.7690,  8.3333,  3.8979],
        [ 5.5424, 11.1734,  6.7159],
        [ 3.0120,  1.6893,  1.0108],
        [11.9835,  8.3257,  8.1128],
        [ 3.9105,  6.4853, 11.5482],
        [ 5.9942,  8.0072,  5.4115],
        [ 3.6484,  9.7804, 10.5708],
        [ 4.6065,  7.8352, 10.8048],
        [10.2832,  6.6511,  5.5037],
        [ 8.7521,  7.7237, 12.1250],
        [13.8173,  8.5829, 11.1074],
        [16.6280, 18.6774, 13.1059],
        [15.8921, 16.8754, 15.4179],
        [ 8.4844,  6.3963, 17.3813],
        [ 5.1584,  4.7116, 12.7035],
        [ 3.2690,  2.1730,  1.5460],
        [ 1.7931,  1.0212,  1.5960]])

In [1]:
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))

In [None]:
# Our first sanity check should be whether ground truth compared to ground truth yields zero error

import torch 
import numpy as np
from scipy.spatial.transform import Rotation as R

import pickle as pkl
import os

img_path = 'examples/image_00502_crop.jpg'

pickle_path = 'data/3dpw/sequenceFiles/validation/courtyard_basketball_01.pkl'

frame = 502

# Load the .pkl sequence file containing the ground-truth information from 3dpw

seq = pkl.load(open(pickle_path,'rb'),encoding='latin-1') # opening the sequence file, latin-1 encoding for making it compatible with python3

gt_pose = torch.tensor(seq['poses'][0][frame]).unsqueeze(0)

# print(gt_pose)

pred_rotmat = torch.zeros(1, 24, 3, 3)

q = R.from_rotvec(gt_pose.reshape(1,24,-1)[0])

pred_rotmat = torch.tensor(R.as_dcm(q)).unsqueeze(0)

# print(pred_rotmat)

In [None]:
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

curr_batch_size = 1

print("gt_pose", gt_pose.shape, gt_pose)

print("pred_rotmat", pred_rotmat.shape, pred_rotmat)

# Get ground truth orientation (already stored in gt_pose)
gt_rotvec = torch.zeros(curr_batch_size,24,3) # Have to have an array of this shape to input into the rotation object (from 32,72 to 32,24,3)
i = 0
for row in gt_pose:
    gt_rotvec[i] = torch.reshape(row,(24, -1))
    i+=1
print("gt_rotvec", gt_rotvec.shape, gt_rotvec)
    
gt_euler = np.zeros((curr_batch_size, 24, 3)) # Using numpy here because it works with the rotation library
i = 0
for row in gt_rotvec:
    r = R.from_rotvec(row)
    gt_euler[i] = R.as_euler(r, 'xyz', degrees=True)
    i+=1

print("gt_euler", gt_euler.shape, gt_euler)

# Get Euler representation of the predictions too:

pred_euler = np.zeros((curr_batch_size,24,3)) # Has to be a numpy array because it works with Rotation

# For each row in pred_rotmat convert it to a Rotation object and write it into a corresponding
# row in pred_euler as Euler angles

i=0
for row in pred_rotmat:
    r = R.from_dcm(row.cpu())
    pred_euler[i] = R.as_euler(r, 'xyz', degrees=True)
    i+=1

print("pred_euler", pred_euler.shape, pred_euler)
    
    
orientation_error_non_reduced = torch.sqrt((torch.from_numpy(gt_euler).to(device) -
                                torch.from_numpy(pred_euler).to(device))**2)
print(orientation_error_non_reduced)
    
orientation_error = torch.sqrt((torch.from_numpy(gt_euler).to(device) -
                                torch.from_numpy(pred_euler).to(device))**2).sum(dim=-1).mean(dim=-1)
# The reduction above is wrong. For a 90 degree error in one angle, it averages out 3.75 degrees, which
# is 90/24. The correct reduction would be a mean of 1.25 (90/72), because there are 72 angles (3 for each part)
# To remove the root, add [:,1:,:] to gt_euler and pred_euler above

orientation_error_new = torch.sqrt((torch.from_numpy(gt_euler).to(device) -
                                torch.from_numpy(pred_euler).to(device))**2).mean()
# This reduction is more accurate because it averages the error per part and then the error across parts
# It is equivalent to .mean(dim=-1).mean(dim=-1)

print("orientation_error")
print(orientation_error)
print()
print("orientation_error_new")
print(orientation_error_new)
print()

In [None]:
def compare_orientation(gt, pred):
    # Taking as input two axis_angle representations
    
    device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

    curr_batch_size = 1

    gt_pose = gt

    print("gt_pose", gt_pose.shape, gt_pose)

    q = R.from_rotvec(pred.reshape(1,24,-1)[0])

    pred_rotmat = torch.tensor(R.as_dcm(q)).unsqueeze(0)

    print("pred_rotmat", pred_rotmat.shape, pred_rotmat)

    # Get ground truth orientation (already stored in gt_pose)
    gt_rotvec = torch.zeros(curr_batch_size,24,3) # Have to have an array of this shape to input into the rotation object (from 32,72 to 32,24,3)
    i = 0
    for row in gt_pose:
        gt_rotvec[i] = torch.reshape(row,(24, -1))
        i+=1
    print("gt_rotvec", gt_rotvec.shape, gt_rotvec)

    gt_euler = np.zeros((curr_batch_size, 24, 3)) # Using numpy here because it works with the rotation library
    i = 0
    for row in gt_rotvec:
        r = R.from_rotvec(row)
        gt_euler[i] = R.as_euler(r, 'xyz', degrees=True)
        i+=1

    print("gt_euler", gt_euler.shape, gt_euler)

    # Get Euler representation of the predictions too:

    pred_euler = np.zeros((curr_batch_size,24,3)) # Has to be a numpy array because it works with Rotation

    # For each row in pred_rotmat convert it to a Rotation object and write it into a corresponding
    # row in pred_euler as Euler angles

    i=0
    for row in pred_rotmat:
        r = R.from_dcm(row.cpu())
        pred_euler[i] = R.as_euler(r, 'xyz', degrees=True)
        i+=1

    print("pred_euler", pred_euler.shape, pred_euler)


    orientation_error_non_reduced = torch.sqrt((torch.from_numpy(gt_euler).to(device) -
                                    torch.from_numpy(pred_euler).to(device))**2)
    print("error per part", orientation_error_non_reduced)

    orientation_error = torch.sqrt((torch.from_numpy(gt_euler).to(device) -
                                    torch.from_numpy(pred_euler).to(device))**2).sum(dim=-1).mean(dim=-1)
    # The reduction above is wrong. For a 90 degree error in one angle, it averages out 3.75 degrees, which
    # is 90/24. The correct reduction would be a mean of 1.25 (90/72), because there are 72 angles (3 for each part)
    # To remove the root, add [:,1:,:] to gt_euler and pred_euler above

    orientation_error_new = torch.sqrt((torch.from_numpy(gt_euler).to(device) -
                                    torch.from_numpy(pred_euler).to(device))**2).mean()
    # This reduction is more accurate because it averages the error per part and then the error across parts
    # It is equivalent to .mean(dim=-1).mean(dim=-1)

    print("orientation_error")
    print(orientation_error.item())
    print()
    print("orientation_error_new")
    print(orientation_error_new.item())
    print()

    #moe[step * batch_size:step * batch_size + curr_batch_size] = orientation_error.cpu()

In [24]:
import torch 
import numpy as np
from scipy.spatial.transform import Rotation as R

def compare_orientation_rotvec(gt, pred):
    # Taking as input two axis_angle representations
    
    device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

    curr_batch_size = 1

    gt_pose = gt

    print("gt_pose", gt_pose.shape, gt_pose)
    print("pred_pose", pred.shape, pred)

    # Get ground truth orientation (already stored in gt_pose)
    gt_rotvec = torch.zeros((curr_batch_size,24,3), dtype=torch.double) # Have to have an array of this shape to input into the rotation object (from 32,72 to 32,24,3)
    i = 0
    for row in gt_pose:
        gt_rotvec[i] = torch.reshape(row,(24, -1))
        i+=1
    print("gt_rotvec", gt_rotvec.shape, gt_rotvec)
    
    # Get prediction as rotation vectors
    
    r = R.from_rotvec(pred.reshape(1,24,-1)[0])
    pred_rotvec = torch.tensor(R.as_rotvec(r)).unsqueeze(0)

    print("pred_rotvec", pred_rotvec.shape, pred_rotvec)

    orientation_error_non_reduced = np.degrees(torch.sqrt((gt_rotvec - pred_rotvec)**2))
    
    print("error per part", orientation_error_non_reduced)

    orientation_error = np.degrees(torch.sqrt((gt_rotvec - pred_rotvec)**2).sum(dim=-1).mean(dim=-1))
    # The reduction above is wrong. For a 90 degree error in one angle, it averages out 3.75 degrees, which
    # is 90/24. The correct reduction would be a mean of 1.25 (90/72), because there are 72 angles (3 for each part)
    # To remove the root, add [:,1:,:] to gt_euler and pred_euler above

    orientation_error_new = np.degrees(torch.sqrt((gt_rotvec - pred_rotvec)**2).mean())
    # This reduction is more accurate because it averages the error per part and then the error across parts
    # It is equivalent to .mean(dim=-1).mean(dim=-1)

    print("orientation_error")
    print(orientation_error.item())
    print()
    print("orientation_error_new")
    print(orientation_error_new.item())
    print()

In [None]:
import torch 
import numpy as np
from scipy.spatial.transform import Rotation as R

def compare_orientation_rotvec(gt_pose, pred_rotmat):
    # Taking as input one axis angle representation and one rotation matrix representation
    
    device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

    curr_batch_size = 1

    gt_pose = gt

    #print("gt_pose", gt_pose.shape, gt_pose)
    #print("pred_pose", pred.shape, pred)

    # Get ground truth orientation (already stored in gt_pose)
    gt_rotvec = torch.zeros((curr_batch_size,24,3), dtype=torch.double) # Have to have an array of this shape to input into the rotation object (from 32,72 to 32,24,3)
    i = 0
    for row in gt_pose:
        gt_rotvec[i] = torch.reshape(row,(24, -1))
        i+=1
    print("gt_rotvec", gt_rotvec.shape, gt_rotvec)
    
    # Get prediction as rotation vectors
    
    pred_rotvec = torch.zeros((curr_batch_size,24,3)) # Has to be a numpy array because it works with Rotation

    # For each row in pred_rotmat convert it to a Rotation object and write it into a corresponding
    # row in pred_rotvec as rotation vectors

    i=0
    for row in pred_rotmat:
        r = R.from_dcm(row.cpu())
        pred_rotvec[i] = R.as_rotvec(r)
        i+=1

    print("pred_rotvec", pred_rotvec.shape, pred_rotvec)

    orientation_error_non_reduced = np.degrees(torch.sqrt((gt_rotvec - pred_rotvec)**2))
    
    print("error per part", orientation_error_non_reduced)

    orientation_error = np.degrees(torch.sqrt((gt_rotvec - pred_rotvec)**2).sum(dim=-1).mean(dim=-1))
    # The reduction above is wrong. For a 90 degree error in one angle, it averages out 3.75 degrees, which
    # is 90/24. The correct reduction would be a mean of 1.25 (90/72), because there are 72 angles (3 for each part)
    # To remove the root, add [:,1:,:] to gt_euler and pred_euler above

    orientation_error_new = np.degrees(torch.sqrt((gt_rotvec - pred_rotvec)**2).mean())
    # This reduction is more accurate because it averages the error per part and then the error across parts
    # It is equivalent to .mean(dim=-1).mean(dim=-1)

    print("orientation_error")
    print(orientation_error.item())
    print()
    print("orientation_error_new")
    print(orientation_error_new.item())
    print()

In [25]:
a = torch.zeros(1,72)
b = torch.zeros(1,72)

b[0][0] = np.pi/4
b[0][1] = np.pi/4

compare_orientation_rotvec(a,b)

gt_pose torch.Size([1, 72]) tensor([[0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]])
pred_pose torch.Size([1, 72]) tensor([[0.7854, 0.7854, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
         0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
         0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
         0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
         0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
         0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
         0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
         0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0

In [None]:
m = [[(1/np.sqrt(2)),0,(1/np.sqrt(2))],
    [1/2, (1/np.sqrt(2)), -1/2],
    [-1/2, (1/np.sqrt(2)), 1/2]]

m

In [None]:
# This is a rotation of 45 degrees around x multiplied on the
# left by a rotation of 45 degrees around y

m = [[(1/np.sqrt(2)),1/2,1/2],
    [0, (1/np.sqrt(2)), -(1/np.sqrt(2))],
    [-(1/np.sqrt(2)), 1/2, 1/2]]
m

In [None]:
n = R.from_dcm(m)

In [None]:
print(R.as_euler(n, 'xyz', degrees=True))

In [None]:
o = R.from_euler('xyz', [45, 45, 0], degrees=True)

print(o.as_dcm())