In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import sys, os
import torch
import numpy as np
import pickle as pk
os.getcwd()
from tqdm.auto import tqdm

from hybrik.utils.metrics import calc_coord_accuracy
from hybrik.utils.config import update_config
from hybrik.models import builder
from hybrik.datasets import MixDataset, PW3D
import torch.utils.data

from pytorch3d.transforms import quaternion_to_axis_angle, quaternion_to_matrix, matrix_to_quaternion, \
    axis_angle_to_quaternion, rotation_6d_to_matrix

from torch.utils.data import DataLoader, Subset

from deeppose.collections.common.metrics.pa_mpjpe import PA_MPJPE
from deeppose.collections.common.metrics.mpjpe import MPJPE




In [3]:
%env CUDA_VISIBLE_DEVICES=1

env: CUDA_VISIBLE_DEVICES=1


In [4]:
def dict_to_device(d, device):
    d_device = {}
    for k, v in d.items():
        d_device[k] = v.to(device)
    return d_device

In [5]:
def preset_model(cfg, detach=False, device=None):
    model = builder.build_sppe(cfg.MODEL)

    if cfg.MODEL.PRETRAINED:
        print(f'Loading model from {cfg.MODEL.PRETRAINED}...')
        model.load_state_dict(torch.load(cfg.MODEL.PRETRAINED))
    elif cfg.MODEL.TRY_LOAD:
        print(f'Loading model from {cfg.MODEL.TRY_LOAD}...')
        pretrained_state = torch.load(cfg.MODEL.TRY_LOAD)
        model_state = model.state_dict()
        pretrained_state = {k: v for k, v in pretrained_state.items()
                            if k in model_state and v.size() == model_state[k].size()}

        model_state.update(pretrained_state)
        model.load_state_dict(model_state)
    else:
        print('Create new model')
        print('=> init weights')
        model._initialize()
        
    if detach:
        for p in model.parameters():
            p.requires_grad = False
            
    if device is not None:
        model = model.to(device)

    return model

In [6]:
cfg = 'configs/deeppose_transformer_smpl24.yaml'
# cfg = 'configs/256x192_adam_lr1e-3-res34_smpl_24_3d_base_2x_mix.yaml'
# configs/256x192_adam_lr1e-3-res34_smpl_24_3d_base_2x_mix.yaml

cfg = update_config(cfg)

hm_shape = cfg.MODEL.get('HEATMAP_SIZE')
depth_dim = cfg.MODEL.EXTRA.get('DEPTH_DIM')
hm_shape = (hm_shape[1], hm_shape[0], depth_dim)

cfg

{'DATASET': {'DATASET': 'mix_smpl',
  'SET_LIST': [{'ROOT': './data/h36m/',
    'TEST_SET': 'Sample_20_test_Human36M_smpl',
    'TRAIN_SET': 'Sample_5_train_Human36M_smpl_leaf_twist'},
   {'ROOT': './data/coco/', 'TRAIN_SET': 'train2017'},
   {'ROOT': './data/3dhp/', 'TRAIN_SET': 'train_v2'}],
  'PROTOCOL': 2,
  'FLIP': True,
  'ROT_FACTOR': 30,
  'SCALE_FACTOR': 0.3,
  'NUM_JOINTS_HALF_BODY': 8,
  'PROB_HALF_BODY': -1,
  'COLOR_FACTOR': 0.2,
  'OCCLUSION': True},
 'MODEL': {'TYPE': 'DeepposeTransformerSMPL24',
  'PRETRAINED': '',
  'TRY_LOAD': '',
  'IMAGE_SIZE': [256, 256],
  'HEATMAP_SIZE': [64, 64],
  'NUM_JOINTS': 24,
  'NUM_DECONV_FILTERS': [256, 256, 256],
  'NUM_LAYERS': 34,
  'EXTRA': {'SIGMA': 2,
   'BACKBONE': 'resnet',
   'CROP': 'padding',
   'AUGMENT': 'none',
   'PRESET': 'simple_smpl_3d',
   'DEPTH_DIM': 64},
  'POST': {'NORM_TYPE': 'softmax'},
  'PROTORES': {'PATH': 'model_files/h36m/epoch=471-step=214759-fmn.ckpt'},
  'TRANSFORMER': {'NUM_BLOCKS': 2,
   'NUM_LAYERS': 

In [7]:
train_dataset = MixDataset(cfg=cfg, train=True)

loading annotations into memory...
Done (t=6.19s)
creating index...
index created!


In [8]:
train_loader = torch.utils.data.DataLoader(train_dataset, 
                                           batch_size=256, 
                                           shuffle=True, 
                                           num_workers=4)

In [None]:
cfg.MODEL.TRY_LOAD = 'model_files/pretrained/pretrained_res34.pth'
m = preset_model(cfg, detach=False, device='cuda')
m.train()

cfg.LOSS.ELEMENTS.UVD24_WEIGHT = 1.0
cfg.LOSS.ELEMENTS.XYZ_SMPL24_WEIGHT = 0.1

optimizer = torch.optim.Adam(m.parameters(), lr=0.0001)

criterion = builder.build_loss(cfg.LOSS)

for i, batch in enumerate(tqdm(train_loader)):

#     if i >= 0:
    inps, labels, img_ids, bboxes = batch

    labels = dict_to_device(labels, 'cuda')

    trans_inv = labels['trans_inv'].cuda()
    intrinsic_param = labels['intrinsic_param'].cuda()
    root = labels['joint_root'].cuda()
    depth_factor = labels['depth_factor'].cuda()
    
    
    output_protores = m(inps.cuda(), trans_inv, intrinsic_param, root, depth_factor, None, 
                        ik_option='protores') # , labels=labels
    
    loss, loss_dict = criterion(output_protores, labels)
    
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    print(f"##### Batch {i} #####")
    print(loss_dict)
    
    if i==1000:
        break



  if OmegaConf.is_none(config):
  stream(template_mgs % msg_args)


Loading model from model_files/pretrained/pretrained_res34.pth...


HBox(children=(FloatProgress(value=0.0, max=2439.0), HTML(value='')))

##### Batch 0 #####
{'loss_tot': tensor(22.0586, device='cuda:0', grad_fn=<AddBackward0>), 'loss_beta': tensor(0.2817, device='cuda:0', grad_fn=<MseLossBackward0>), 'loss_theta': tensor(0.0506, device='cuda:0', grad_fn=<MseLossBackward0>), 'loss_uvd': tensor(20.3799, device='cuda:0', grad_fn=<DivBackward0>), 'loss_xyz_smpl24': tensor(13.9476, device='cuda:0', grad_fn=<DivBackward0>), 'loss_xyz_smpl17': tensor(16.1591, device='cuda:0', grad_fn=<DivBackward0>)}
##### Batch 1 #####
{'loss_tot': tensor(31.8322, device='cuda:0', grad_fn=<AddBackward0>), 'loss_beta': tensor(0.2568, device='cuda:0', grad_fn=<MseLossBackward0>), 'loss_theta': tensor(0.0457, device='cuda:0', grad_fn=<MseLossBackward0>), 'loss_uvd': tensor(30.6427, device='cuda:0', grad_fn=<DivBackward0>), 'loss_xyz_smpl24': tensor(9.3071, device='cuda:0', grad_fn=<DivBackward0>), 'loss_xyz_smpl17': tensor(10.3633, device='cuda:0', grad_fn=<DivBackward0>)}
##### Batch 2 #####
{'loss_tot': tensor(21.7596, device='cuda:0', grad_fn

##### Batch 18 #####
{'loss_tot': tensor(8.4094, device='cuda:0', grad_fn=<AddBackward0>), 'loss_beta': tensor(0.2518, device='cuda:0', grad_fn=<MseLossBackward0>), 'loss_theta': tensor(0.0336, device='cuda:0', grad_fn=<MseLossBackward0>), 'loss_uvd': tensor(7.6352, device='cuda:0', grad_fn=<DivBackward0>), 'loss_xyz_smpl24': tensor(5.2091, device='cuda:0', grad_fn=<DivBackward0>), 'loss_xyz_smpl17': tensor(5.3809, device='cuda:0', grad_fn=<DivBackward0>)}
##### Batch 19 #####
{'loss_tot': tensor(7.3258, device='cuda:0', grad_fn=<AddBackward0>), 'loss_beta': tensor(0.2111, device='cuda:0', grad_fn=<MseLossBackward0>), 'loss_theta': tensor(0.0316, device='cuda:0', grad_fn=<MseLossBackward0>), 'loss_uvd': tensor(6.5966, device='cuda:0', grad_fn=<DivBackward0>), 'loss_xyz_smpl24': tensor(5.1682, device='cuda:0', grad_fn=<DivBackward0>), 'loss_xyz_smpl17': tensor(5.3310, device='cuda:0', grad_fn=<DivBackward0>)}
##### Batch 20 #####
{'loss_tot': tensor(7.3454, device='cuda:0', grad_fn=<Add

##### Batch 36 #####
{'loss_tot': tensor(7.0882, device='cuda:0', grad_fn=<AddBackward0>), 'loss_beta': tensor(0.2336, device='cuda:0', grad_fn=<MseLossBackward0>), 'loss_theta': tensor(0.0316, device='cuda:0', grad_fn=<MseLossBackward0>), 'loss_uvd': tensor(6.3586, device='cuda:0', grad_fn=<DivBackward0>), 'loss_xyz_smpl24': tensor(4.9515, device='cuda:0', grad_fn=<DivBackward0>), 'loss_xyz_smpl17': tensor(5.1703, device='cuda:0', grad_fn=<DivBackward0>)}
##### Batch 37 #####
{'loss_tot': tensor(7.1731, device='cuda:0', grad_fn=<AddBackward0>), 'loss_beta': tensor(0.2324, device='cuda:0', grad_fn=<MseLossBackward0>), 'loss_theta': tensor(0.0409, device='cuda:0', grad_fn=<MseLossBackward0>), 'loss_uvd': tensor(6.4395, device='cuda:0', grad_fn=<DivBackward0>), 'loss_xyz_smpl24': tensor(5.0021, device='cuda:0', grad_fn=<DivBackward0>), 'loss_xyz_smpl17': tensor(5.2483, device='cuda:0', grad_fn=<DivBackward0>)}
##### Batch 38 #####
{'loss_tot': tensor(7.0136, device='cuda:0', grad_fn=<Add

##### Batch 54 #####
{'loss_tot': tensor(6.6238, device='cuda:0', grad_fn=<AddBackward0>), 'loss_beta': tensor(0.2373, device='cuda:0', grad_fn=<MseLossBackward0>), 'loss_theta': tensor(0.0359, device='cuda:0', grad_fn=<MseLossBackward0>), 'loss_uvd': tensor(5.8988, device='cuda:0', grad_fn=<DivBackward0>), 'loss_xyz_smpl24': tensor(4.8695, device='cuda:0', grad_fn=<DivBackward0>), 'loss_xyz_smpl17': tensor(5.1044, device='cuda:0', grad_fn=<DivBackward0>)}
##### Batch 55 #####
{'loss_tot': tensor(6.9170, device='cuda:0', grad_fn=<AddBackward0>), 'loss_beta': tensor(0.2441, device='cuda:0', grad_fn=<MseLossBackward0>), 'loss_theta': tensor(0.0344, device='cuda:0', grad_fn=<MseLossBackward0>), 'loss_uvd': tensor(6.1654, device='cuda:0', grad_fn=<DivBackward0>), 'loss_xyz_smpl24': tensor(5.0675, device='cuda:0', grad_fn=<DivBackward0>), 'loss_xyz_smpl17': tensor(5.3897, device='cuda:0', grad_fn=<DivBackward0>)}
##### Batch 56 #####
{'loss_tot': tensor(6.5367, device='cuda:0', grad_fn=<Add

##### Batch 72 #####
{'loss_tot': tensor(6.4336, device='cuda:0', grad_fn=<AddBackward0>), 'loss_beta': tensor(0.2236, device='cuda:0', grad_fn=<MseLossBackward0>), 'loss_theta': tensor(0.0330, device='cuda:0', grad_fn=<MseLossBackward0>), 'loss_uvd': tensor(5.7423, device='cuda:0', grad_fn=<DivBackward0>), 'loss_xyz_smpl24': tensor(4.6718, device='cuda:0', grad_fn=<DivBackward0>), 'loss_xyz_smpl17': tensor(4.7562, device='cuda:0', grad_fn=<DivBackward0>)}
##### Batch 73 #####
{'loss_tot': tensor(6.5443, device='cuda:0', grad_fn=<AddBackward0>), 'loss_beta': tensor(0.1732, device='cuda:0', grad_fn=<MseLossBackward0>), 'loss_theta': tensor(0.0273, device='cuda:0', grad_fn=<MseLossBackward0>), 'loss_uvd': tensor(5.8835, device='cuda:0', grad_fn=<DivBackward0>), 'loss_xyz_smpl24': tensor(4.8712, device='cuda:0', grad_fn=<DivBackward0>), 'loss_xyz_smpl17': tensor(5.0940, device='cuda:0', grad_fn=<DivBackward0>)}
##### Batch 74 #####
{'loss_tot': tensor(6.3350, device='cuda:0', grad_fn=<Add

##### Batch 90 #####
{'loss_tot': tensor(6.5478, device='cuda:0', grad_fn=<AddBackward0>), 'loss_beta': tensor(0.2078, device='cuda:0', grad_fn=<MseLossBackward0>), 'loss_theta': tensor(0.0307, device='cuda:0', grad_fn=<MseLossBackward0>), 'loss_uvd': tensor(5.8600, device='cuda:0', grad_fn=<DivBackward0>), 'loss_xyz_smpl24': tensor(4.7951, device='cuda:0', grad_fn=<DivBackward0>), 'loss_xyz_smpl17': tensor(5.0096, device='cuda:0', grad_fn=<DivBackward0>)}
##### Batch 91 #####
{'loss_tot': tensor(6.7884, device='cuda:0', grad_fn=<AddBackward0>), 'loss_beta': tensor(0.2267, device='cuda:0', grad_fn=<MseLossBackward0>), 'loss_theta': tensor(0.0348, device='cuda:0', grad_fn=<MseLossBackward0>), 'loss_uvd': tensor(6.0732, device='cuda:0', grad_fn=<DivBackward0>), 'loss_xyz_smpl24': tensor(4.8798, device='cuda:0', grad_fn=<DivBackward0>), 'loss_xyz_smpl17': tensor(5.1326, device='cuda:0', grad_fn=<DivBackward0>)}
##### Batch 92 #####
{'loss_tot': tensor(6.5402, device='cuda:0', grad_fn=<Add

##### Batch 108 #####
{'loss_tot': tensor(6.2425, device='cuda:0', grad_fn=<AddBackward0>), 'loss_beta': tensor(0.2612, device='cuda:0', grad_fn=<MseLossBackward0>), 'loss_theta': tensor(0.0335, device='cuda:0', grad_fn=<MseLossBackward0>), 'loss_uvd': tensor(5.5005, device='cuda:0', grad_fn=<DivBackward0>), 'loss_xyz_smpl24': tensor(4.8032, device='cuda:0', grad_fn=<DivBackward0>), 'loss_xyz_smpl17': tensor(4.8918, device='cuda:0', grad_fn=<DivBackward0>)}
##### Batch 109 #####
{'loss_tot': tensor(6.2284, device='cuda:0', grad_fn=<AddBackward0>), 'loss_beta': tensor(0.2362, device='cuda:0', grad_fn=<MseLossBackward0>), 'loss_theta': tensor(0.0329, device='cuda:0', grad_fn=<MseLossBackward0>), 'loss_uvd': tensor(5.5254, device='cuda:0', grad_fn=<DivBackward0>), 'loss_xyz_smpl24': tensor(4.6621, device='cuda:0', grad_fn=<DivBackward0>), 'loss_xyz_smpl17': tensor(4.7454, device='cuda:0', grad_fn=<DivBackward0>)}
##### Batch 110 #####
{'loss_tot': tensor(6.1101, device='cuda:0', grad_fn=<

##### Batch 126 #####
{'loss_tot': tensor(6.0539, device='cuda:0', grad_fn=<AddBackward0>), 'loss_beta': tensor(0.2281, device='cuda:0', grad_fn=<MseLossBackward0>), 'loss_theta': tensor(0.0351, device='cuda:0', grad_fn=<MseLossBackward0>), 'loss_uvd': tensor(5.3794, device='cuda:0', grad_fn=<DivBackward0>), 'loss_xyz_smpl24': tensor(4.4573, device='cuda:0', grad_fn=<DivBackward0>), 'loss_xyz_smpl17': tensor(4.5271, device='cuda:0', grad_fn=<DivBackward0>)}
##### Batch 127 #####
{'loss_tot': tensor(6.2860, device='cuda:0', grad_fn=<AddBackward0>), 'loss_beta': tensor(0.2134, device='cuda:0', grad_fn=<MseLossBackward0>), 'loss_theta': tensor(0.0330, device='cuda:0', grad_fn=<MseLossBackward0>), 'loss_uvd': tensor(5.5982, device='cuda:0', grad_fn=<DivBackward0>), 'loss_xyz_smpl24': tensor(4.7388, device='cuda:0', grad_fn=<DivBackward0>), 'loss_xyz_smpl17': tensor(4.8024, device='cuda:0', grad_fn=<DivBackward0>)}
##### Batch 128 #####
{'loss_tot': tensor(6.7360, device='cuda:0', grad_fn=<

##### Batch 144 #####
{'loss_tot': tensor(6.2708, device='cuda:0', grad_fn=<AddBackward0>), 'loss_beta': tensor(0.2212, device='cuda:0', grad_fn=<MseLossBackward0>), 'loss_theta': tensor(0.0317, device='cuda:0', grad_fn=<MseLossBackward0>), 'loss_uvd': tensor(5.5949, device='cuda:0', grad_fn=<DivBackward0>), 'loss_xyz_smpl24': tensor(4.5424, device='cuda:0', grad_fn=<DivBackward0>), 'loss_xyz_smpl17': tensor(4.5721, device='cuda:0', grad_fn=<DivBackward0>)}
##### Batch 145 #####
{'loss_tot': tensor(6.5100, device='cuda:0', grad_fn=<AddBackward0>), 'loss_beta': tensor(0.2344, device='cuda:0', grad_fn=<MseLossBackward0>), 'loss_theta': tensor(0.0367, device='cuda:0', grad_fn=<MseLossBackward0>), 'loss_uvd': tensor(5.8146, device='cuda:0', grad_fn=<DivBackward0>), 'loss_xyz_smpl24': tensor(4.6049, device='cuda:0', grad_fn=<DivBackward0>), 'loss_xyz_smpl17': tensor(4.6274, device='cuda:0', grad_fn=<DivBackward0>)}
##### Batch 146 #####
{'loss_tot': tensor(6.0773, device='cuda:0', grad_fn=<

##### Batch 162 #####
{'loss_tot': tensor(5.7428, device='cuda:0', grad_fn=<AddBackward0>), 'loss_beta': tensor(0.2421, device='cuda:0', grad_fn=<MseLossBackward0>), 'loss_theta': tensor(0.0378, device='cuda:0', grad_fn=<MseLossBackward0>), 'loss_uvd': tensor(5.0782, device='cuda:0', grad_fn=<DivBackward0>), 'loss_xyz_smpl24': tensor(4.2194, device='cuda:0', grad_fn=<DivBackward0>), 'loss_xyz_smpl17': tensor(3.9322, device='cuda:0', grad_fn=<DivBackward0>)}
##### Batch 163 #####
{'loss_tot': tensor(5.7639, device='cuda:0', grad_fn=<AddBackward0>), 'loss_beta': tensor(0.2286, device='cuda:0', grad_fn=<MseLossBackward0>), 'loss_theta': tensor(0.0338, device='cuda:0', grad_fn=<MseLossBackward0>), 'loss_uvd': tensor(5.1154, device='cuda:0', grad_fn=<DivBackward0>), 'loss_xyz_smpl24': tensor(4.1934, device='cuda:0', grad_fn=<DivBackward0>), 'loss_xyz_smpl17': tensor(3.8513, device='cuda:0', grad_fn=<DivBackward0>)}
##### Batch 164 #####
{'loss_tot': tensor(6.0810, device='cuda:0', grad_fn=<

##### Batch 180 #####
{'loss_tot': tensor(5.8216, device='cuda:0', grad_fn=<AddBackward0>), 'loss_beta': tensor(0.2147, device='cuda:0', grad_fn=<MseLossBackward0>), 'loss_theta': tensor(0.0366, device='cuda:0', grad_fn=<MseLossBackward0>), 'loss_uvd': tensor(5.1793, device='cuda:0', grad_fn=<DivBackward0>), 'loss_xyz_smpl24': tensor(4.2713, device='cuda:0', grad_fn=<DivBackward0>), 'loss_xyz_smpl17': tensor(4.0379, device='cuda:0', grad_fn=<DivBackward0>)}
##### Batch 181 #####
{'loss_tot': tensor(5.4140, device='cuda:0', grad_fn=<AddBackward0>), 'loss_beta': tensor(0.2192, device='cuda:0', grad_fn=<MseLossBackward0>), 'loss_theta': tensor(0.0345, device='cuda:0', grad_fn=<MseLossBackward0>), 'loss_uvd': tensor(4.8031, device='cuda:0', grad_fn=<DivBackward0>), 'loss_xyz_smpl24': tensor(3.9127, device='cuda:0', grad_fn=<DivBackward0>), 'loss_xyz_smpl17': tensor(3.4862, device='cuda:0', grad_fn=<DivBackward0>)}
##### Batch 182 #####
{'loss_tot': tensor(6.0418, device='cuda:0', grad_fn=<

##### Batch 198 #####
{'loss_tot': tensor(5.2021, device='cuda:0', grad_fn=<AddBackward0>), 'loss_beta': tensor(0.1917, device='cuda:0', grad_fn=<MseLossBackward0>), 'loss_theta': tensor(0.0277, device='cuda:0', grad_fn=<MseLossBackward0>), 'loss_uvd': tensor(4.6356, device='cuda:0', grad_fn=<DivBackward0>), 'loss_xyz_smpl24': tensor(3.7442, device='cuda:0', grad_fn=<DivBackward0>), 'loss_xyz_smpl17': tensor(3.3012, device='cuda:0', grad_fn=<DivBackward0>)}
##### Batch 199 #####
{'loss_tot': tensor(5.2546, device='cuda:0', grad_fn=<AddBackward0>), 'loss_beta': tensor(0.1913, device='cuda:0', grad_fn=<MseLossBackward0>), 'loss_theta': tensor(0.0303, device='cuda:0', grad_fn=<MseLossBackward0>), 'loss_uvd': tensor(4.6726, device='cuda:0', grad_fn=<DivBackward0>), 'loss_xyz_smpl24': tensor(3.9024, device='cuda:0', grad_fn=<DivBackward0>), 'loss_xyz_smpl17': tensor(3.5526, device='cuda:0', grad_fn=<DivBackward0>)}
##### Batch 200 #####
{'loss_tot': tensor(5.1506, device='cuda:0', grad_fn=<

##### Batch 216 #####
{'loss_tot': tensor(4.9640, device='cuda:0', grad_fn=<AddBackward0>), 'loss_beta': tensor(0.2209, device='cuda:0', grad_fn=<MseLossBackward0>), 'loss_theta': tensor(0.0339, device='cuda:0', grad_fn=<MseLossBackward0>), 'loss_uvd': tensor(4.3619, device='cuda:0', grad_fn=<DivBackward0>), 'loss_xyz_smpl24': tensor(3.8065, device='cuda:0', grad_fn=<DivBackward0>), 'loss_xyz_smpl17': tensor(3.4866, device='cuda:0', grad_fn=<DivBackward0>)}
##### Batch 217 #####
{'loss_tot': tensor(5.0084, device='cuda:0', grad_fn=<AddBackward0>), 'loss_beta': tensor(0.2044, device='cuda:0', grad_fn=<MseLossBackward0>), 'loss_theta': tensor(0.0276, device='cuda:0', grad_fn=<MseLossBackward0>), 'loss_uvd': tensor(4.4178, device='cuda:0', grad_fn=<DivBackward0>), 'loss_xyz_smpl24': tensor(3.8576, device='cuda:0', grad_fn=<DivBackward0>), 'loss_xyz_smpl17': tensor(3.3764, device='cuda:0', grad_fn=<DivBackward0>)}
##### Batch 218 #####
{'loss_tot': tensor(4.9289, device='cuda:0', grad_fn=<

##### Batch 234 #####
{'loss_tot': tensor(4.7034, device='cuda:0', grad_fn=<AddBackward0>), 'loss_beta': tensor(0.1917, device='cuda:0', grad_fn=<MseLossBackward0>), 'loss_theta': tensor(0.0290, device='cuda:0', grad_fn=<MseLossBackward0>), 'loss_uvd': tensor(4.1639, device='cuda:0', grad_fn=<DivBackward0>), 'loss_xyz_smpl24': tensor(3.4739, device='cuda:0', grad_fn=<DivBackward0>), 'loss_xyz_smpl17': tensor(3.0416, device='cuda:0', grad_fn=<DivBackward0>)}
##### Batch 235 #####
{'loss_tot': tensor(4.4245, device='cuda:0', grad_fn=<AddBackward0>), 'loss_beta': tensor(0.1865, device='cuda:0', grad_fn=<MseLossBackward0>), 'loss_theta': tensor(0.0320, device='cuda:0', grad_fn=<MseLossBackward0>), 'loss_uvd': tensor(3.8914, device='cuda:0', grad_fn=<DivBackward0>), 'loss_xyz_smpl24': tensor(3.4619, device='cuda:0', grad_fn=<DivBackward0>), 'loss_xyz_smpl17': tensor(2.9483, device='cuda:0', grad_fn=<DivBackward0>)}
##### Batch 236 #####
{'loss_tot': tensor(4.6301, device='cuda:0', grad_fn=<

In [37]:
output_protores.pred_uvd_jts[0].reshape(24,3)

tensor([[-1.4370e-02, -1.0003e-01, -2.0871e-03],
        [-7.6782e-03, -9.4711e-02, -2.5561e-01],
        [-1.4035e-02, -9.9625e-02,  2.2711e-01],
        [-2.2552e-03, -9.0404e-02,  1.5691e-01],
        [-4.6625e-03, -9.2332e-02, -7.8095e-01],
        [-1.3398e-02, -9.9144e-02,  1.1787e+00],
        [-2.0502e-03, -9.0298e-02,  2.8995e-01],
        [-1.3045e-02, -9.8737e-02,  1.0214e+00],
        [-1.1444e-02, -9.7571e-02,  2.5670e+00],
        [ 1.0098e-03, -8.7953e-02,  2.3891e-01],
        [-1.4029e-02, -9.9471e-02,  9.2546e-01],
        [-1.1545e-02, -9.7657e-02,  2.9813e+00],
        [ 5.6982e-04, -8.8272e-02,  4.4607e-01],
        [ 1.1900e-02, -7.9401e-02,  1.9440e-01],
        [-4.4961e-03, -9.2221e-02,  6.4560e-01],
        [ 2.0457e-02, -7.2921e-02,  1.6358e-01],
        [-2.8467e-02, -1.1088e-01, -2.3608e-01],
        [-5.5469e-03, -9.3064e-02,  7.6717e-01],
        [-2.9741e-02, -1.1132e-01, -7.3476e-02],
        [-8.4817e-03, -9.5310e-02,  1.9641e+00],
        [ 7.3426e-03

In [38]:
labels['target_uvd_29'][1].reshape(29,3)[:24]

tensor([[ 0.0000,  0.0000,  0.0000],
        [ 0.0000,  0.0000,  0.0000],
        [ 0.0000,  0.0000,  0.0000],
        [ 0.0000,  0.0000,  0.0000],
        [-0.0116,  0.0855,  0.0000],
        [-0.1308, -0.0091,  0.0000],
        [ 0.0000,  0.0000,  0.0000],
        [-0.0825,  0.2082,  0.0000],
        [-0.2700,  0.1245,  0.0000],
        [ 0.0000,  0.0000,  0.0000],
        [ 0.0000,  0.0000,  0.0000],
        [ 0.0000,  0.0000,  0.0000],
        [ 0.0000,  0.0000,  0.0000],
        [ 0.0000,  0.0000,  0.0000],
        [ 0.0000,  0.0000,  0.0000],
        [ 0.0000,  0.0000,  0.0000],
        [ 0.2052, -0.1024,  0.0000],
        [ 0.1120, -0.1894,  0.0000],
        [ 0.0713, -0.0323,  0.0000],
        [-0.0402, -0.1529,  0.0000],
        [ 0.0309,  0.0400,  0.0000],
        [-0.0925, -0.0502,  0.0000],
        [ 0.0000,  0.0000,  0.0000],
        [ 0.0000,  0.0000,  0.0000]], device='cuda:0')

In [33]:
labels['target_weight_29'][:, :24*3][1].reshape(24,3)

tensor([[1., 1., 1.],
        [1., 1., 1.],
        [1., 1., 1.],
        [1., 1., 1.],
        [1., 1., 1.],
        [1., 1., 1.],
        [1., 1., 1.],
        [1., 1., 1.],
        [1., 1., 1.],
        [1., 1., 1.],
        [1., 1., 1.],
        [1., 1., 1.],
        [1., 1., 1.],
        [1., 1., 1.],
        [1., 1., 1.],
        [1., 1., 1.],
        [1., 1., 1.],
        [1., 1., 1.],
        [1., 1., 1.],
        [1., 1., 1.],
        [1., 1., 1.],
        [1., 1., 1.],
        [1., 1., 1.],
        [1., 1., 1.]], device='cuda:0')

In [34]:
output_protores.pred_xyz_jts_24_struct[0].reshape(24,3)

tensor([[ 0.0000e+00,  0.0000e+00,  0.0000e+00],
        [ 1.9611e-03,  3.5017e-03,  2.0573e-02],
        [-1.1244e-02,  1.2105e-02, -6.1533e-03],
        [ 2.1186e-02,  1.0198e-02, -1.4764e-02],
        [ 2.3703e-04,  1.8503e-02,  1.0682e-02],
        [ 2.5340e-03,  1.9809e-02, -7.7835e-03],
        [ 3.1032e-03,  6.4696e-03, -1.2500e-02],
        [ 7.2176e-03,  5.6351e-03,  1.3921e-02],
        [ 1.0676e-03,  8.1474e-03, -2.0640e-02],
        [ 1.1864e-02,  1.9773e-02, -7.2135e-03],
        [-4.2819e-03,  1.0181e-02,  2.1102e-04],
        [ 2.3607e-02, -2.3885e-03, -1.1643e-02],
        [-2.4415e-02, -1.3554e-02,  7.8893e-03],
        [-6.3996e-03,  4.0183e-03, -1.2410e-04],
        [-4.1180e-03, -1.7192e-03,  2.7290e-03],
        [-2.2248e-02, -8.9058e-03,  1.0633e-03],
        [ 5.3140e-03,  6.7322e-05, -8.6654e-03],
        [-8.4888e-03, -1.2231e-02,  6.3956e-03],
        [ 1.4611e-02,  5.7239e-03,  4.0930e-03],
        [-3.1768e-04, -2.5064e-03, -4.0884e-03],
        [ 1.3137e-02

In [35]:
labels['target_xyz_24'][0].reshape(24,3)

tensor([[0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.]], device='cuda:0')

In [18]:
output_protores.pred_xyz_jts_17[0].reshape(17,3)

tensor([[ 0.0000,  0.0000,  0.0000],
        [ 0.0477,  0.0050, -0.0470],
        [ 0.0687,  0.2088, -0.0159],
        [ 0.1324,  0.3863,  0.0412],
        [-0.0472, -0.0046,  0.0471],
        [ 0.0067,  0.1915,  0.0892],
        [ 0.0528,  0.3765,  0.1523],
        [-0.0143, -0.1151, -0.0141],
        [-0.0514, -0.2248, -0.0312],
        [-0.0870, -0.2512, -0.0679],
        [-0.0898, -0.3030, -0.0521],
        [-0.0154, -0.2054, -0.0904],
        [-0.0588, -0.1023, -0.1591],
        [-0.0911, -0.1859, -0.1137],
        [-0.0713, -0.1979,  0.0304],
        [-0.0550, -0.0970,  0.1007],
        [-0.1278, -0.0464,  0.0474]], device='cuda:0')

In [19]:
labels['target_xyz_17'][0].reshape(17,3)

tensor([[-0.0000,  0.0000,  0.0000],
        [ 0.0412, -0.0004, -0.0532],
        [ 0.0630,  0.2037, -0.0161],
        [ 0.1341,  0.3838,  0.0363],
        [-0.0412,  0.0011,  0.0529],
        [ 0.0071,  0.1963,  0.0966],
        [ 0.0560,  0.3797,  0.1494],
        [-0.0174, -0.1163, -0.0157],
        [-0.0559, -0.2259, -0.0298],
        [-0.0945, -0.2500, -0.0623],
        [-0.0921, -0.3033, -0.0494],
        [-0.0182, -0.2048, -0.0850],
        [-0.0594, -0.1072, -0.1481],
        [-0.0848, -0.1914, -0.1037],
        [-0.0746, -0.1995,  0.0333],
        [-0.0605, -0.0977,  0.1045],
        [-0.1317, -0.0427,  0.0592]], device='cuda:0')

In [55]:
def weighted_l1_loss(input, target, weights, size_average):
    input = input * 64
    target = target * 64
    out = torch.abs(input - target)
    out = out * weights
    if size_average and weights.sum() > 0:
        return out.sum() / weights.sum()
    else:
        return out.sum()

pred_uvd = output_protores.pred_uvd_jts
target_uvd = labels['target_uvd_29'][:, :pred_uvd.shape[1]]
target_uvd_weight = labels['target_weight_29'][:, :pred_uvd.shape[1]]

for i in range(pred_uvd.shape[0]):
    uvd_loss = weighted_l1_loss(pred_uvd[[i]], target_uvd[[i]],
                                weights=target_uvd_weight[[i]], size_average=True)
    print(i, uvd_loss)

0 tensor(219.9721, device='cuda:0')
1 tensor(86.9777, device='cuda:0')
2 tensor(57.6795, device='cuda:0')
3 tensor(29.3383, device='cuda:0')
4 tensor(0.0008, device='cuda:0')
5 tensor(31.4037, device='cuda:0')
6 tensor(307.6479, device='cuda:0')
7 tensor(62.9032, device='cuda:0')
8 tensor(0.0014, device='cuda:0')
9 tensor(0.0013, device='cuda:0')
10 tensor(0.0012, device='cuda:0')
11 tensor(0.0014, device='cuda:0')
12 tensor(12.2110, device='cuda:0')
13 tensor(3.9092, device='cuda:0')
14 tensor(66.4815, device='cuda:0')
15 tensor(13.7375, device='cuda:0')
16 tensor(214.5781, device='cuda:0')
17 tensor(0.0011, device='cuda:0')
18 tensor(58.9240, device='cuda:0')
19 tensor(0.0015, device='cuda:0')
20 tensor(96.8480, device='cuda:0')
21 tensor(79.2419, device='cuda:0')
22 tensor(25.4064, device='cuda:0')
23 tensor(6.3708, device='cuda:0')
24 tensor(7.5052, device='cuda:0')
25 tensor(181.4329, device='cuda:0')
26 tensor(0.0012, device='cuda:0')
27 tensor(71.0691, device='cuda:0')
28 tensor

In [56]:
target_uvd_weight[0]

tensor([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 1., 0.,
        1., 1., 0., 1., 1., 0., 1., 1., 0., 1., 1., 0., 0., 0., 0., 0., 0., 0.],
       device='cuda:0')

In [41]:
labels.keys()

dict_keys(['trans_inv', 'intrinsic_param', 'joint_root', 'depth_factor', 'target_uvd_29', 'target_xyz_24', 'target_weight_24', 'target_weight_29', 'target_xyz_17', 'target_weight_17', 'target_theta', 'target_beta', 'target_smpl_weight', 'target_theta_weight', 'target_twist', 'target_twist_weight'])

In [1]:
!nvidia-smi

Mon Jul 18 17:28:37 2022       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 470.57.02    Driver Version: 470.57.02    CUDA Version: 11.4     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  NVIDIA A100-SXM...  Off  | 00000000:00:04.0 Off |                    0 |
| N/A   35C    P0    59W / 400W |  23010MiB / 40536MiB |      0%      Default |
|                               |                      |             Disabled |
+-------------------------------+----------------------+----------------------+
|   1  NVIDIA A100-SXM...  Off  | 00000000:00:05.0 Off |                    0 |
| N/A   34C    P0    73W / 400W |      3MiB / 40536MiB |      0%      Default |
|       

In [9]:
gt_val_dataset_3dpw = PW3D(cfg=cfg,
                           ann_file='3DPW_test_new.json',
                           train=False)

loading annotations into memory...
Done (t=9.58s)
creating index...
index created!


In [10]:
np.random.seed(12345)
gt_val_dataset_3dpw_subset = Subset(gt_val_dataset_3dpw, 
                                    indices=np.random.choice(np.arange(len(gt_val_dataset_3dpw)), 
                                                             size=int(0.1*len(gt_val_dataset_3dpw)), replace=False))

gt_val_loader = DataLoader(gt_val_dataset_3dpw, 
                           batch_size=4, 
                           shuffle=False, 
                           num_workers=8, 
                           drop_last=False)

In [154]:
cfg.MODEL.TRY_LOAD = 'model_files/pretrained/pretrained_res34.pth'

m = preset_model(cfg, detach=True, device='cuda')
m.eval()

criterion = builder.build_loss(cfg.LOSS)

m



  if OmegaConf.is_none(config):


Loading model from model_files/pretrained/pretrained_res34.pth...


DeepposeTransformerNeSMPL24(
  (preact): ResNet(
    (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
    (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu): ReLU(inplace=True)
    (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
    (layer1): Sequential(
      (0): BasicBlock(
        (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu): ReLU(inplace=True)
        (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      )
      (1): BasicBlock(
        (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, aff

In [308]:
output_protores = m(inps.cuda(), trans_inv, intrinsic_param, root, depth_factor, None, ik_option='protores')

In [313]:
labels


criterion(output_protores, dict_to_device(labels, 'cuda'))

(tensor(19.2637, device='cuda:0'),
 {'loss_tot': tensor(19.2637, device='cuda:0'),
  'loss_beta': tensor(0.0002, device='cuda:0'),
  'loss_theta': tensor(0.0417, device='cuda:0'),
  'loss_uvd': tensor(19.2631, device='cuda:0'),
  'loss_xyz_smpl24': tensor(8.1972, device='cuda:0'),
  'loss_xyz_smpl17': tensor(9.9651, device='cuda:0')})

In [297]:
labels.keys()

dict_keys(['trans_inv', 'intrinsic_param', 'joint_root', 'depth_factor', 'target_uvd_29', 'target_xyz_24', 'target_weight_24', 'target_weight_29', 'target_xyz_17', 'target_weight_17', 'target_theta', 'target_beta', 'target_smpl_weight', 'target_theta_weight', 'target_twist', 'target_twist_weight'])

In [300]:
labels['intrinsic_param']

tensor([[[ 0.0000e+00,  0.0000e+00,  0.0000e+00],
         [ 0.0000e+00,  0.0000e+00,  0.0000e+00],
         [ 0.0000e+00,  0.0000e+00,  0.0000e+00]],

        [[ 8.7332e-04,  0.0000e+00, -4.4762e-01],
         [ 0.0000e+00,  8.7429e-04, -4.5066e-01],
         [ 0.0000e+00,  0.0000e+00,  1.0000e+00]],

        [[ 6.6769e-04,  0.0000e+00, -6.8419e-01],
         [ 0.0000e+00,  6.6796e-04, -7.0229e-01],
         [ 0.0000e+00,  0.0000e+00,  1.0000e+00]],

        [[ 0.0000e+00,  0.0000e+00,  0.0000e+00],
         [ 0.0000e+00,  0.0000e+00,  0.0000e+00],
         [ 0.0000e+00,  0.0000e+00,  0.0000e+00]],

        [[ 8.7332e-04,  0.0000e+00, -4.4762e-01],
         [ 0.0000e+00,  8.7429e-04, -4.5066e-01],
         [ 0.0000e+00,  0.0000e+00,  1.0000e+00]],

        [[ 0.0000e+00,  0.0000e+00,  0.0000e+00],
         [ 0.0000e+00,  0.0000e+00,  0.0000e+00],
         [ 0.0000e+00,  0.0000e+00,  0.0000e+00]],

        [[ 0.0000e+00,  0.0000e+00,  0.0000e+00],
         [ 0.0000e+00,  0.0000e+00,  0

In [307]:
labels['depth_factor']

tensor([[2000.],
        [2000.],
        [2000.],
        [2000.],
        [2000.],
        [2000.],
        [2000.],
        [2000.],
        [2000.],
        [2000.],
        [2000.],
        [2000.],
        [2000.],
        [2000.],
        [2000.],
        [2000.],
        [2000.],
        [2000.],
        [2000.],
        [2000.],
        [2000.],
        [2000.],
        [2000.],
        [2000.],
        [2000.],
        [2000.],
        [2000.],
        [2000.],
        [2000.],
        [2000.],
        [2000.],
        [2000.]])

In [284]:
sample_id = 0

trans_inv = labels['trans_inv'].cuda()
intrinsic_param = labels['intrinsic_param'].cuda()
root = labels['joint_root'].cuda()
depth_factor = labels['depth_factor'].cuda()


output = m.smpl.forward(
                pose_axis_angle=labels['target_theta'].cuda(), 
                betas=labels['target_beta'].cuda(), 
                transl=None,
                global_orient=None,
                return_verts=True
            )
pred_vertices = output.vertices.float()
pred_xyz_jts_24_struct = output.joints.float() / 2.0
pred_xyz_jts_17 = output.joints_from_verts.float() / 2.0

pred_uvd_jts_24 = m.cam_to_uvd(pred_xyz_jts_24_struct, trans_inv, intrinsic_param, root, depth_factor)


In [285]:
pred_uvd_jts_24[0]

tensor([[ 0.0149, -0.0388,  0.0000],
        [-0.0167, -0.0025, -0.0034],
        [ 0.0353, -0.0004,  0.0243],
        [ 0.0263, -0.0795, -0.0247],
        [-0.0210,  0.1539,  0.0153],
        [ 0.0356,  0.1346,  0.1225],
        [ 0.0285, -0.1386, -0.0373],
        [ 0.0099,  0.3121,  0.0527],
        [ 0.0680,  0.2927,  0.1023],
        [ 0.0246, -0.1616, -0.0291],
        [-0.0203,  0.3093,  0.1045],
        [ 0.0677,  0.2945,  0.1656],
        [ 0.0335, -0.2586, -0.0662],
        [-0.0024, -0.2154, -0.0671],
        [ 0.0631, -0.2140, -0.0428],
        [ 0.0270, -0.2911, -0.0517],
        [-0.0418, -0.2052, -0.0761],
        [ 0.0984, -0.2028, -0.0188],
        [-0.0710, -0.1009, -0.0334],
        [ 0.1049, -0.1015,  0.0275],
        [-0.1047, -0.0372,  0.0511],
        [ 0.0972, -0.0238,  0.1102],
        [-0.1105, -0.0099,  0.0741],
        [ 0.0939,  0.0048,  0.1298]], device='cuda:0')

In [286]:
labels['target_uvd_29'].reshape(-1, 29, 3)[0]

tensor([[ 1.4932e-02, -3.8804e-02,  0.0000e+00],
        [-1.6659e-02, -2.5053e-03, -3.3872e-03],
        [ 3.5293e-02, -3.4532e-04,  2.4351e-02],
        [ 2.6300e-02, -7.9473e-02, -2.4659e-02],
        [-2.0970e-02,  1.5396e-01,  1.5279e-02],
        [ 3.5581e-02,  1.3463e-01,  1.2256e-01],
        [ 2.8535e-02, -1.3861e-01, -3.7340e-02],
        [ 9.9413e-03,  3.1204e-01,  5.2709e-02],
        [ 6.8013e-02,  2.9257e-01,  1.0229e-01],
        [ 2.4635e-02, -1.6159e-01, -2.9095e-02],
        [-2.0312e-02,  3.0919e-01,  1.0450e-01],
        [ 6.7685e-02,  2.9451e-01,  1.6569e-01],
        [ 3.3479e-02, -2.5860e-01, -6.6225e-02],
        [-2.3775e-03, -2.1541e-01, -6.7052e-02],
        [ 6.3113e-02, -2.1397e-01, -4.2832e-02],
        [ 2.6997e-02, -2.9114e-01, -5.1722e-02],
        [-4.1759e-02, -2.0518e-01, -7.6046e-02],
        [ 9.8422e-02, -2.0277e-01, -1.8816e-02],
        [-7.0982e-02, -1.0085e-01, -3.3412e-02],
        [ 1.0490e-01, -1.0152e-01,  2.7555e-02],
        [-1.0470e-01

In [289]:
pred_uvd_jts_24[0] - labels['target_uvd_29'].reshape(-1, 29, 3)[:, :24][0].cuda()

tensor([[ 5.9605e-08,  2.9802e-08,  0.0000e+00],
        [-1.1027e-06,  5.7817e-06,  6.5917e-06],
        [ 3.2783e-06, -1.3381e-05, -9.0320e-06],
        [-1.6093e-06, -6.2883e-06, -4.3958e-06],
        [-4.1425e-06, -2.2471e-05, -5.6149e-06],
        [-9.2983e-06, -3.1471e-05, -2.9542e-05],
        [ 1.9073e-06, -4.3213e-06, -5.6177e-06],
        [-6.6161e-06,  3.9220e-05,  9.7640e-06],
        [ 2.9743e-05,  7.9393e-05, -2.0757e-05],
        [-4.1127e-06, -7.5102e-06, -7.5698e-06],
        [-5.6326e-06,  8.1182e-05,  3.3200e-05],
        [ 1.3053e-05, -1.1921e-06, -4.7132e-05],
        [-3.2187e-06, -1.5736e-05,  7.3016e-07],
        [-1.7881e-07, -4.2021e-06, -6.3479e-06],
        [-5.2452e-06, -4.2021e-06, -6.5938e-06],
        [-5.0068e-06, -4.7684e-07, -7.5698e-06],
        [-7.3016e-06, -1.5914e-05, -1.0252e-05],
        [-8.6427e-06, -7.7784e-06, -1.9532e-05],
        [ 7.4506e-07, -2.6226e-06,  7.3388e-07],
        [-1.8477e-05,  1.0610e-05, -9.5200e-06],
        [ 2.8104e-05

In [228]:
trans_inv = labels['trans_inv'].cuda()
intrinsic_param = labels['intrinsic_param'].cuda()
root = labels['joint_root'].cuda()
depth_factor = labels['depth_factor'].cuda()


cam_jts = m.uvd_to_cam(uvd_jts=labels['target_uvd_29'].reshape(-1, 29, 3).cuda(), 
             trans_inv=trans_inv, 
             intrinsic_param=intrinsic_param, 
             joint_root=root, 
             depth_factor=depth_factor)


inverted_uvd_29 = m.cam_to_uvd(cam_jts.reshape(-1, 29, 3), 
             trans_inv=trans_inv, 
             intrinsic_param=intrinsic_param, 
             joint_root=root, 
             depth_factor=depth_factor)


print(labels['target_uvd_29'].reshape(-1, 29, 3)[1])
print(inverted_uvd_29[1])


print(inverted_uvd_29[1] - labels['target_uvd_29'].reshape(-1, 29, 3)[1].cuda())

tensor([[ 0.0336, -0.0410,  0.0000],
        [-0.0205, -0.0103, -0.0036],
        [ 0.0418,  0.0146,  0.0245],
        [ 0.0650, -0.0857, -0.0246],
        [-0.0932,  0.1783,  0.0166],
        [-0.0161,  0.1788,  0.1232],
        [ 0.0931, -0.1568, -0.0371],
        [-0.1231,  0.3842,  0.0545],
        [-0.0438,  0.3854,  0.1040],
        [ 0.0980, -0.1866, -0.0289],
        [-0.1586,  0.3678,  0.1064],
        [-0.0451,  0.3877,  0.1674],
        [ 0.1499, -0.3014, -0.0655],
        [ 0.0879, -0.2638, -0.0668],
        [ 0.1672, -0.2344, -0.0421],
        [ 0.1559, -0.3439, -0.0510],
        [ 0.0358, -0.2679, -0.0769],
        [ 0.2059, -0.2061, -0.0183],
        [-0.0446, -0.1514, -0.0387],
        [ 0.1722, -0.0785,  0.0257],
        [-0.1141, -0.0860,  0.0440],
        [ 0.1258,  0.0107,  0.1089],
        [-0.1330, -0.0549,  0.0664],
        [ 0.1086,  0.0433,  0.1290],
        [ 0.2045, -0.4528, -0.0715],
        [-0.1503, -0.0227,  0.1029],
        [ 0.0803,  0.0740,  0.1553],
 

In [148]:
inverted_uvd_29[:,:24].max()

tensor(0.3593, device='cuda:0')

In [152]:
labels['target_xyz_24'].min()

tensor(-0.3015)

In [153]:
cam_jts[:,:24].min()

tensor(-0.3015, device='cuda:0')

In [None]:
!nvidia-smi

In [25]:
m.protores_model.net.encoder.blocks[0]

FCBlock(
  (forward_projection): Linear(in_features=1024, out_features=1024, bias=True)
  (backward_projection): Linear(in_features=82, out_features=1024, bias=True)
  (fc_layers): ModuleList(
    (0): Linear(in_features=82, out_features=1024, bias=True)
    (1): Dropout(p=0.01, inplace=False)
    (2): Linear(in_features=1024, out_features=1024, bias=True)
    (3): Linear(in_features=1024, out_features=1024, bias=True)
  )
  (relu_layers): ModuleList(
    (0): LeakyReLU(negative_slope=0.01, inplace=True)
    (1): Identity()
    (2): LeakyReLU(negative_slope=0.01, inplace=True)
    (3): LeakyReLU(negative_slope=0.01, inplace=True)
  )
)

In [20]:
# import torch.distributed as dist

# dist.init_process_group(backend='nccl', init_method='tcp://172.17.0.4:23456',
#                         world_size=1, rank=0)
# torch.cuda.set_device(0)

In [10]:
# m = builder.build_sppe(cfg.MODEL)

# m.load_state_dict(torch.load('./model_files/pretrained/pretrained_res34.pth', map_location='cpu'), strict=False)

# m = m.cuda()
# m = torch.nn.parallel.DistributedDataParallel(m, device_ids=[0])
# m

In [293]:
from deeppose.collections.common.utils.geometry.metric_utils import calc_mpjpe
from deeppose.collections.common.utils.geometry.metric_utils import calc_pampjpe

class MPJPE():
    def __init__(self):
        self.reset()
        
    def reset(self):
        self.accumulated = 0.0
        self.count = 0.0

    def update(self, preds: torch.Tensor, target: torch.Tensor, align_inds=[0]):
        assert preds.shape == target.shape
        assert preds.dim() == 3
        self.accumulated += torch.sum(calc_mpjpe(preds, target, align_inds)).item() 
        self.count += target.shape[0]

    def compute(self):
        return (self.accumulated / self.count)

class PA_MPJPE():
    def __init__(self):
        self.reset()
        
    def reset(self):
        self.accumulated = 0.0
        self.count = 0.0

    def update(self, preds: torch.Tensor, target: torch.Tensor):
        assert preds.shape == target.shape
        assert preds.dim() == 3
        self.accumulated += torch.sum(calc_pampjpe(preds, target)).item() 
        self.count += target.shape[0]

    def compute(self):
        return (self.accumulated / self.count)

pampjpe = PA_MPJPE()
mpjpe = MPJPE()
pampjpe.reset()
mpjpe.reset()

with torch.no_grad():
    mpjpe_np = []
    for batch in tqdm(gt_val_loader):

        inps, labels, img_ids, bboxes = batch

        trans_inv = labels['trans_inv'].cuda()
        intrinsic_param = labels['intrinsic_param'].cuda()
        root = labels['joint_root'].cuda()
        depth_factor = labels['depth_factor'].cuda()

        target_xyz_17 = labels['target_xyz_17'].reshape(-1, 17, 3)
        target_xyz_17 = target_xyz_17 - target_xyz_17[:,[gt_val_dataset_3dpw.root_idx_17]]
        target_xyz_17 = target_xyz_17[:, gt_val_dataset_3dpw.EVAL_JOINTS]

        output_protores = m(inps.cuda(), trans_inv, intrinsic_param, root, depth_factor, None, ik_option='protores')
#         output_protores = m(inps.cuda(), trans_inv, intrinsic_param, root, depth_factor, None)

        pred_pos = output_protores.pred_xyz_jts_17.reshape(-1, 17, 3)
        pred_pos = pred_pos - pred_pos[:,[gt_val_dataset_3dpw.root_idx_17]]
        pred_pos = pred_pos[:, gt_val_dataset_3dpw.EVAL_JOINTS].cpu()
        
        pred_pos = torch.tensor(pred_pos.cpu().numpy())
        target_xyz_17 = torch.tensor(target_xyz_17.cpu().numpy())

        pampjpe.update(preds=pred_pos, target=target_xyz_17)
        # in EVAL_JOINTS 2,3 are left and right hip
        mpjpe.update(preds=pred_pos, target=target_xyz_17, align_inds=[2,3])


#         joint_relative_17 = labels['labels']['joint_relative_17']
#         joint_relative_17 = joint_relative_17 - joint_relative_17[:,[gt_val_dataset_3dpw.root_idx_17]]
#         joint_relative_17 = joint_relative_17[:, gt_val_dataset_3dpw.EVAL_JOINTS]

#         mpjpe_np.append(np.sqrt(np.sum((joint_relative_17.detach().numpy() - 2*pred_pos.detach().numpy())**2, 2)))
        
        break
    
print("PA-MPJPE", pampjpe.compute())
print("MPJPE", mpjpe.compute())

HBox(children=(FloatProgress(value=0.0, max=8879.0), HTML(value='')))


PA-MPJPE 130.1100616455078
MPJPE 289.6127624511719


In [92]:
joint_relative_17 = labels['labels']['joint_relative_17']
joint_relative_17 = joint_relative_17 - joint_relative_17[:,[gt_val_dataset_3dpw.root_idx_17]]
joint_relative_17 = joint_relative_17[:, gt_val_dataset_3dpw.EVAL_JOINTS]
joint_relative_17[0]

tensor([[-0.2335,  0.8106,  0.3826],
        [-0.1847,  0.4151,  0.1586],
        [-0.1222, -0.0547,  0.0952],
        [ 0.1213,  0.0553, -0.0927],
        [ 0.0380,  0.4825,  0.0999],
        [ 0.0078,  0.8674,  0.3304],
        [-0.2374,  0.0041,  0.1032],
        [-0.1573, -0.2497,  0.0900],
        [ 0.0024, -0.4638, -0.0500],
        [ 0.1851, -0.3586, -0.2624],
        [ 0.1386, -0.0693, -0.3049],
        [ 0.0176,  0.1608, -0.2573],
        [ 0.1008, -0.4632, -0.1786],
        [ 0.1612, -0.6418, -0.2315]])

In [13]:
target_xyz_17[0]*2

tensor([[-0.2335,  0.8106,  0.3826],
        [-0.1847,  0.4151,  0.1586],
        [-0.1222, -0.0547,  0.0952],
        [ 0.1213,  0.0553, -0.0927],
        [ 0.0380,  0.4825,  0.0999],
        [ 0.0078,  0.8674,  0.3304],
        [-0.2374,  0.0041,  0.1032],
        [-0.1573, -0.2497,  0.0900],
        [ 0.0024, -0.4638, -0.0500],
        [ 0.1851, -0.3586, -0.2624],
        [ 0.1386, -0.0693, -0.3049],
        [ 0.0176,  0.1608, -0.2573],
        [ 0.1008, -0.4632, -0.1786],
        [ 0.1612, -0.6418, -0.2315]])

In [99]:
np.mean(np.concatenate(mpjpe_np)) * 1000

80.66204190254211

In [58]:
kpt_all_pred = {}
for r in range(4):
    with open(os.path.join('exp', f'pw3d_test_gt_kpt_rank_{r}.pkl'), 'rb') as fid:
        kpt_pred = pk.load(fid)
        kpt_all_pred.update(kpt_pred)

In [59]:
sample =  5075 # 5494 #

with open(os.path.join('exp', f'test_gt_kpt_debug_{sample}.pkl'), 'rb') as fid: 
    kpt_debug = pk.load(fid)
# kpt_debug[5075].keys()

In [60]:
kpt_debug[sample]['output']

{'pred_shape': tensor([-2.9585e-01,  5.8200e-01,  2.8378e-01,  6.6612e-01,  3.7148e-01,
          1.7889e-01, -5.4302e-01, -3.8432e-04,  1.4790e+00,  5.1426e-01]),
 'pred_theta_mats': tensor([-0.0559,  0.9331,  0.1232, -0.3331,  0.9977,  0.0500,  0.0382, -0.0238,
          0.9994, -0.0091, -0.0299, -0.0137,  0.9929,  0.1178,  0.0164,  0.0082,
          0.9999,  0.0089,  0.0033, -0.0080,  0.9977,  0.0519, -0.0281, -0.0345,
          0.9978, -0.0652, -0.0064, -0.0095,  0.9780, -0.1690,  0.1171,  0.0358,
          0.9512,  0.0673,  0.2528,  0.1639,  0.9971,  0.0632, -0.0419, -0.0021,
          0.9975, -0.0417,  0.0418,  0.0382,  0.9985, -0.0410, -0.0171, -0.0304,
          0.9979, -0.0393, -0.0517,  0.0035,  0.9788,  0.0229, -0.1723, -0.1088,
          0.9905, -0.0046,  0.0896,  0.1039,  0.9995, -0.0284, -0.0057, -0.0085,
          0.8558,  0.0523, -0.2515, -0.4491,  0.8604,  0.0576,  0.0488,  0.5040,
          0.9850,  0.1264, -0.0948, -0.0700,  0.9839,  0.0457,  0.1165,  0.1277,
       

In [61]:
output_protores.pred_shape[0]

tensor([-0.3819,  0.5224,  0.2994,  0.6081,  0.1764,  0.2996, -0.5099,  0.0025,
         1.4406,  0.6506], device='cuda:0')

In [62]:
output_hybrik = m(kpt_debug[sample]['inps'][None].cuda(), 
                  kpt_debug[sample]['trans_inv'][None].cuda(), 
                  kpt_debug[sample]['intrinsic_param'][None].cuda(), 
                  kpt_debug[sample]['joint_root'][None].cuda(), 
                  kpt_debug[sample]['depth_factor'][None].cuda(), None)

output_hybrik

ModelOutput(pred_shape=tensor([[-0.3819,  0.5224,  0.2994,  0.6081,  0.1764,  0.2996, -0.5099,  0.0025,
          1.4406,  0.6506]], device='cuda:0'), pred_theta_mats=tensor([[-5.8600e-02,  9.5726e-01,  8.5865e-02, -2.6989e-01,  9.9550e-01,
          3.7797e-02,  3.4101e-02, -7.9967e-02,  9.9901e-01, -1.2172e-02,
         -2.9448e-02, -3.1122e-02,  9.9606e-01,  7.3250e-02,  2.0539e-02,
         -4.5588e-02,  9.9916e-01,  3.4977e-02,  9.6067e-03,  1.8973e-02,
          9.9671e-01,  6.3509e-02, -4.2073e-02, -2.7749e-02,  9.9698e-01,
         -1.7561e-02, -4.3282e-02,  6.2047e-02,  9.7923e-01, -1.9606e-01,
          5.0244e-02,  1.2075e-02,  9.5746e-01, -2.0305e-02,  2.5121e-01,
          1.4053e-01,  9.9812e-01,  4.0645e-02, -2.4717e-02, -3.8578e-02,
          9.9757e-01, -4.5395e-02,  4.0220e-02,  3.4324e-02,  9.9866e-01,
         -3.4833e-02, -2.8368e-02, -2.5734e-02,  9.9571e-01, -4.7511e-02,
         -7.7186e-02,  1.8512e-02,  9.6726e-01,  2.2590e-02, -2.0398e-01,
         -1.4930e-0

In [63]:
output_m_debug = kpt_debug[sample]['model'](kpt_debug[sample]['inps'][None], 
                                            kpt_debug[sample]['trans_inv'][None], 
                                            kpt_debug[sample]['intrinsic_param'][None], 
                                            kpt_debug[sample]['joint_root'][None], 
                                            kpt_debug[sample]['depth_factor'][None], None)

output_m_debug

ModelOutput(pred_shape=tensor([[-2.9585e-01,  5.8200e-01,  2.8378e-01,  6.6612e-01,  3.7148e-01,
          1.7889e-01, -5.4302e-01, -3.8485e-04,  1.4790e+00,  5.1426e-01]],
       grad_fn=<AddBackward0>), pred_theta_mats=tensor([[-0.0559,  0.9332,  0.1232, -0.3331,  0.9977,  0.0500,  0.0382, -0.0238,
          0.9994, -0.0091, -0.0299, -0.0137,  0.9929,  0.1178,  0.0164,  0.0082,
          0.9999,  0.0089,  0.0033, -0.0080,  0.9977,  0.0519, -0.0281, -0.0345,
          0.9978, -0.0652, -0.0064, -0.0095,  0.9780, -0.1690,  0.1171,  0.0358,
          0.9512,  0.0673,  0.2528,  0.1639,  0.9971,  0.0632, -0.0419, -0.0021,
          0.9975, -0.0417,  0.0418,  0.0382,  0.9985, -0.0410, -0.0171, -0.0304,
          0.9979, -0.0393, -0.0517,  0.0035,  0.9788,  0.0229, -0.1723, -0.1088,
          0.9905, -0.0046,  0.0896,  0.1039,  0.9995, -0.0284, -0.0057, -0.0085,
          0.8558,  0.0523, -0.2515, -0.4491,  0.8604,  0.0576,  0.0488,  0.5040,
          0.9850,  0.1264, -0.0948, -0.0700,  0.98

In [39]:
m_debug = kpt_debug[sample]['model']

m_parameters = dict(m.named_parameters())

In [53]:
for k, v in m_debug.named_parameters():
    
#     print("debug model", k, v)
    
#     print("notebook model", k, m_parameters[k])
    
    if not torch.isclose(v.cpu(), m_parameters[k].cpu(), 1e-10).all():
        print("mismatching parameters", k)
    
#     break

In [54]:
m_debug

Simple3DPoseBaseSMPL24(
  (preact): ResNet(
    (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
    (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu): ReLU(inplace=True)
    (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
    (layer1): Sequential(
      (0): BasicBlock(
        (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu): ReLU(inplace=True)
        (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      )
      (1): BasicBlock(
        (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=T

In [55]:
m

Simple3DPoseBaseSMPL24(
  (preact): ResNet(
    (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
    (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu): ReLU(inplace=True)
    (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
    (layer1): Sequential(
      (0): BasicBlock(
        (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu): ReLU(inplace=True)
        (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      )
      (1): BasicBlock(
        (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=T

In [135]:
labels['target_xyz_17'].reshape(-1, 17, 3) * 2

tensor([[[ 0.0000,  0.0000,  0.0000],
         [ 0.1213,  0.0553, -0.0927],
         [ 0.0380,  0.4825,  0.0999],
         ...,
         [ 0.0024, -0.4638, -0.0500],
         [-0.1573, -0.2497,  0.0900],
         [-0.2374,  0.0041,  0.1032]],

        [[ 0.0000,  0.0000,  0.0000],
         [ 0.0729,  0.0180,  0.1431],
         [ 0.0801,  0.4899,  0.0608],
         ...,
         [-0.1009, -0.4755, -0.0663],
         [-0.1768, -0.2231, -0.1932],
         [-0.1101,  0.0257, -0.2313]],

        [[ 0.0000,  0.0000,  0.0000],
         [ 0.1415,  0.0070, -0.0117],
         [ 0.1294,  0.4215,  0.0527],
         ...,
         [-0.1575, -0.3982, -0.0811],
         [-0.3217, -0.2232, -0.1669],
         [-0.2708, -0.2335, -0.3769]],

        ...,

        [[ 0.0000,  0.0000,  0.0000],
         [ 0.1128,  0.0264, -0.1118],
         [ 0.0210,  0.4939, -0.1126],
         ...,
         [-0.0645, -0.4678,  0.0639],
         [-0.1362, -0.2364,  0.2465],
         [-0.1622,  0.0199,  0.2072]],

        [[

In [122]:
target_xyz_17

tensor([[[-0.1168,  0.4053,  0.1913],
         [-0.0923,  0.2075,  0.0793],
         [-0.0611, -0.0273,  0.0476],
         ...,
         [ 0.0088,  0.0804, -0.1287],
         [ 0.0504, -0.2316, -0.0893],
         [ 0.0806, -0.3209, -0.1158]],

        [[-0.0157,  0.4538, -0.1089],
         [-0.0032,  0.2265, -0.0806],
         [-0.0368, -0.0082, -0.0726],
         ...,
         [ 0.0846,  0.0307,  0.1186],
         [-0.0109, -0.2569,  0.0323],
         [ 0.0026, -0.3495,  0.0450]],

        [[-0.0869,  0.3606,  0.1548],
         [-0.0801,  0.1995,  0.0386],
         [-0.0711, -0.0026,  0.0052],
         ...,
         [ 0.0662, -0.1244, -0.2160],
         [-0.0099, -0.2201, -0.0560],
         [-0.0191, -0.2894, -0.1120]],

        ...,

        [[-0.0046,  0.4385,  0.1293],
         [-0.0448,  0.2257,  0.0573],
         [-0.0573, -0.0129,  0.0571],
         ...,
         [ 0.1182,  0.0288, -0.1225],
         [ 0.0109, -0.2528, -0.0352],
         [-0.0239, -0.3314, -0.0868]],

        [[

In [33]:
batch = next(iter(gt_val_loader))
inps, labels, _, bboxes = batch

trans_inv = labels['trans_inv']
intrinsic_param = labels['intrinsic_param']
root = labels['joint_root']
depth_factor = labels['depth_factor']
label_masks_17 = labels['target_weight_17']

torch.Size([31, 14, 3])

In [10]:
batch_idx = 0

In [11]:
target_xyz_17 = labels['target_xyz_17'][batch_idx].reshape(17, 3)
target_xyz_17 - target_xyz_17[0]

tensor([[ 0.0000,  0.0000,  0.0000],
        [ 0.0657,  0.0111, -0.0457],
        [ 0.0448,  0.2424,  0.0106],
        [ 0.0545,  0.4565,  0.0798],
        [-0.0665, -0.0102,  0.0467],
        [-0.0323,  0.2259,  0.0586],
        [-0.0079,  0.4472,  0.1175],
        [ 0.0151, -0.1267,  0.0030],
        [ 0.0230, -0.2513, -0.0027],
        [ 0.0116, -0.2971, -0.0334],
        [ 0.0244, -0.3511, -0.0179],
        [ 0.0818, -0.2156, -0.0450],
        [ 0.1028, -0.0706, -0.0604],
        [ 0.0825,  0.0588, -0.0523],
        [-0.0357, -0.2264,  0.0481],
        [-0.0732, -0.0856,  0.0773],
        [-0.0814,  0.0464,  0.0641]])

In [12]:
output_hybrik = m(inps, trans_inv, intrinsic_param, root, depth_factor, None, ik_option='hybrik')

In [13]:
output_protores = m(inps, trans_inv, intrinsic_param, root, depth_factor, None, ik_option='protores')

In [14]:
def dict_to_device(input, device):
    output = dict()
    for k, v in input.items():
        output[k] = v.to(device)
    return output

def pack_data(betas, position_data, position_id, rotation_data=None, rotation_id=None):
    
    input_data = {'betas': betas,
                  'gender': 2 * torch.ones(len(betas), 1).to(betas).to(dtype=int),
                  'position_data': position_data,
                  'position_weight': torch.ones(len(betas), position_id.shape[0]).to(betas),
                  'position_tolerance': torch.zeros(len(betas), position_id.shape[0]).to(betas),
                  'position_id': position_id[None].repeat(len(betas), 1).to(betas).to(dtype=int),
                  'rotation_data': torch.empty(len(betas), 0, 6).to(betas),
                  'rotation_weight': torch.empty(len(betas), 0).to(betas),
                  'rotation_tolerance': torch.empty(len(betas), 0).to(betas),
                  'rotation_id': torch.empty(len(betas), 0, dtype=int).to(betas).to(dtype=int),
                  'lookat_data': torch.empty(len(betas), 0, 6).to(betas),
                  'lookat_weight': torch.empty(len(betas), 0).to(betas),
                  'lookat_tolerance': torch.empty(len(betas), 0).to(betas),
                  'lookat_id': torch.empty(len(betas), 0, dtype=int).to(betas).to(dtype=int),
                 }
    
    if rotation_data is not None:
        input_data['rotation_data'] = rotation_data
        input_data['rotation_id'] = rotation_id[None].repeat(len(betas), 1).to(betas).to(dtype=int)
        input_data['rotation_weight'] = torch.ones(len(betas), rotation_id.shape[0]).to(betas)
        input_data['rotation_tolerance'] = torch.zeros(len(betas), rotation_id.shape[0]).to(betas)
    
    return input_data

def deeppose_fk(input_data, deeppose_net):
    input_data = dict_to_device(input_data, device=deeppose_net.device)
    
    predictions = deeppose_net(input_data)

    betas = input_data["betas"]
    gender = input_data["gender"]

    predicted_root_joint_position = predictions["root_joint_position"]
    predicted_joint_rotations = predictions["joint_rotations"]
            
    # compute rotation matrices
    predicted_joint_rotations_mat = rotation_6d_to_matrix(predicted_joint_rotations.view(-1, 6)).view(-1, deeppose_net.nb_joints, 3, 3)
    # apply forward kinematics
    predicted_joint_rotations_quat = matrix_to_quaternion(predicted_joint_rotations_mat)

    predicted_joint_positions_fk, predicted_joint_rotations_fk = deeppose_net.apply_smpl_quat(betas=betas,
                                                                                              joint_rotations_quat=predicted_joint_rotations_quat,
                                                                                              root_position=predicted_root_joint_position,
                                                                                              gender=gender)
    joint_rotations_axis_angle = quaternion_to_axis_angle(predicted_joint_rotations_quat)    
        
    return predicted_joint_positions_fk, predicted_joint_rotations_fk

betas = labels['target_beta']
# betas = output_protores.pred_shape
protores_data = pack_data(betas = betas, 
                          position_data = 2 * output_protores.pred_xyz_jts_24.type(torch.float32).view(-1, 24, 3), 
                          position_id = torch.tensor(np.arange(24)).to(output_protores.pred_shape).to(dtype=int)
#                                   position_id = torch.tensor([0, 7, 8, 12, 20, 21]).to(params_pred['betas']).to(dtype=int),
#                                   rotation_data = params_pred['rotation_data'].view(-1, 6, 6),
#                                   rotation_id = torch.tensor([0, 7, 8, 12, 20, 21]).to(params_pred['betas']).to(dtype=int),
                         )

predicted_joint_positions_fk, predicted_joint_rotations_fk = deeppose_fk(protores_data, m.protores_model)
predicted_joint_positions_fk = predicted_joint_positions_fk / 2
predicted_joint_positions_fk = predicted_joint_positions_fk - predicted_joint_positions_fk[:, [0]]

In [15]:
predicted_joint_positions_fk[batch_idx]

tensor([[ 0.0000,  0.0000,  0.0000],
        [ 0.0333,  0.0519, -0.0082],
        [-0.0349,  0.0452,  0.0218],
        [ 0.0080, -0.0592,  0.0080],
        [ 0.0285,  0.2553, -0.0340],
        [-0.0481,  0.2526,  0.0389],
        [ 0.0106, -0.1253, -0.0131],
        [ 0.0487,  0.4280,  0.0978],
        [ 0.0232,  0.4304,  0.1447],
        [ 0.0054, -0.1493, -0.0319],
        [ 0.0575,  0.4683,  0.0399],
        [-0.0288,  0.4612,  0.1058],
        [ 0.0165, -0.2642, -0.0289],
        [ 0.0516, -0.2121, -0.0412],
        [-0.0240, -0.2182, -0.0079],
        [ 0.0092, -0.2952, -0.0557],
        [ 0.0962, -0.2114, -0.0591],
        [-0.0712, -0.2319,  0.0083],
        [ 0.1294, -0.0797, -0.0389],
        [-0.0855, -0.1182,  0.0797],
        [ 0.0870,  0.0397, -0.0685],
        [-0.1002,  0.0150,  0.0628],
        [ 0.0721,  0.0815, -0.0696],
        [-0.0909,  0.0587,  0.0607]], grad_fn=<SelectBackward0>)

In [16]:
(labels['target_xyz_24'][batch_idx]-predicted_joint_positions_fk[batch_idx].ravel()).abs().mean()

tensor(0.0169, grad_fn=<MeanBackward0>)

In [17]:
(labels['target_xyz_24'][batch_idx]-output_protores.pred_xyz_jts_24_struct[batch_idx]).abs().mean()

tensor(0.0168, grad_fn=<MeanBackward0>)

In [18]:
(labels['target_xyz_24'][batch_idx]-output_hybrik.pred_xyz_jts_24_struct[batch_idx]).abs().mean()

tensor(0.0169, grad_fn=<MeanBackward0>)

In [19]:
output_protores.pred_xyz_jts_24[batch_idx].reshape(24, 3)

tensor([[ 0.0000,  0.0000,  0.0000],
        [ 0.0285,  0.0482, -0.0054],
        [-0.0330,  0.0449,  0.0172],
        [ 0.0126, -0.0526,  0.0054],
        [ 0.0457,  0.2470,  0.0150],
        [-0.0292,  0.2459,  0.0613],
        [ 0.0132, -0.1234, -0.0083],
        [ 0.0525,  0.4244,  0.0727],
        [ 0.0184,  0.4187,  0.1317],
        [ 0.0066, -0.1509, -0.0259],
        [ 0.0424,  0.4385,  0.0326],
        [-0.0136,  0.4499,  0.1105],
        [ 0.0146, -0.2474, -0.0273],
        [ 0.0487, -0.2088, -0.0407],
        [-0.0187, -0.2121, -0.0071],
        [-0.0033, -0.2890, -0.0561],
        [ 0.0778, -0.2103, -0.0648],
        [-0.0564, -0.2206,  0.0020],
        [ 0.1130, -0.0754, -0.0590],
        [-0.0799, -0.1054,  0.0623],
        [ 0.0808,  0.0360, -0.0627],
        [-0.0929,  0.0152,  0.0673],
        [ 0.0668,  0.0765, -0.0635],
        [-0.0955,  0.0613,  0.0608]], grad_fn=<ReshapeAliasBackward0>)

In [20]:
output_protores.pred_xyz_jts_24_struct[batch_idx].reshape(24, 3)

tensor([[ 0.0056,  0.0070, -0.0006],
        [ 0.0369,  0.0539, -0.0011],
        [-0.0241,  0.0478,  0.0222],
        [ 0.0101, -0.0471,  0.0011],
        [ 0.0517,  0.2368,  0.0168],
        [-0.0315,  0.2313,  0.0665],
        [ 0.0145, -0.1153, -0.0141],
        [ 0.0503,  0.4238,  0.0750],
        [ 0.0117,  0.4090,  0.1366],
        [ 0.0107, -0.1390, -0.0310],
        [ 0.0351,  0.4626,  0.0243],
        [-0.0193,  0.4582,  0.1065],
        [ 0.0181, -0.2476, -0.0295],
        [ 0.0525, -0.1979, -0.0410],
        [-0.0172, -0.2024, -0.0085],
        [ 0.0090, -0.2760, -0.0601],
        [ 0.0940, -0.1979, -0.0629],
        [-0.0628, -0.2132,  0.0054],
        [ 0.1165, -0.0722, -0.0427],
        [-0.0720, -0.1011,  0.0600],
        [ 0.0872,  0.0436, -0.0675],
        [-0.0973,  0.0209,  0.0605],
        [ 0.0746,  0.0825, -0.0742],
        [-0.0978,  0.0619,  0.0621]], grad_fn=<ReshapeAliasBackward0>)

In [21]:
output_hybrik.pred_xyz_jts_24_struct[batch_idx].reshape(24, 3)

tensor([[ 0.0000,  0.0000,  0.0000],
        [ 0.0223,  0.0494, -0.0156],
        [-0.0265,  0.0403,  0.0272],
        [ 0.0126, -0.0526,  0.0054],
        [ 0.0437,  0.2304,  0.0124],
        [-0.0290,  0.2268,  0.0581],
        [ 0.0132, -0.1213, -0.0079],
        [ 0.0521,  0.4174,  0.0705],
        [ 0.0150,  0.4051,  0.1264],
        [ 0.0077, -0.1460, -0.0229],
        [ 0.0378,  0.4486,  0.0145],
        [-0.0188,  0.4581,  0.1076],
        [ 0.0143, -0.2545, -0.0300],
        [ 0.0487, -0.2041, -0.0389],
        [-0.0199, -0.2108, -0.0043],
        [-0.0018, -0.2861, -0.0539],
        [ 0.0833, -0.2114, -0.0697],
        [-0.0665, -0.2233,  0.0038],
        [ 0.1108, -0.0854, -0.0598],
        [-0.0792, -0.1118,  0.0591],
        [ 0.0815,  0.0332, -0.0626],
        [-0.0926,  0.0120,  0.0670],
        [ 0.0682,  0.0725, -0.0634],
        [-0.0950,  0.0527,  0.0619]], grad_fn=<ReshapeAliasBackward0>)

In [22]:
hybrik_xyz_17 = output_hybrik.pred_xyz_jts_17[batch_idx].reshape(17, 3)
hybrik_xyz_17 - hybrik_xyz_17[0]

tensor([[ 0.0000,  0.0000,  0.0000],
        [ 0.0513,  0.0128, -0.0434],
        [ 0.0425,  0.2204,  0.0016],
        [ 0.0581,  0.4092,  0.0735],
        [-0.0518, -0.0125,  0.0436],
        [-0.0365,  0.2060,  0.0487],
        [ 0.0111,  0.3862,  0.1311],
        [ 0.0102, -0.1281, -0.0050],
        [ 0.0057, -0.2463, -0.0397],
        [-0.0129, -0.2865, -0.0686],
        [-0.0007, -0.3359, -0.0461],
        [ 0.0701, -0.2163, -0.0622],
        [ 0.1070, -0.0836, -0.0848],
        [ 0.0753,  0.0350, -0.0732],
        [-0.0520, -0.2255, -0.0014],
        [-0.0991, -0.1084,  0.0497],
        [-0.0989,  0.0104,  0.0477]], grad_fn=<SubBackward0>)

In [23]:
protores_xyz_17 = output_protores.pred_xyz_jts_17[batch_idx].reshape(17, 3)
protores_xyz_17 - protores_xyz_17[0]

tensor([[ 0.0000,  0.0000,  0.0000],
        [ 0.0642,  0.0097, -0.0222],
        [ 0.0482,  0.2207,  0.0090],
        [ 0.0523,  0.4100,  0.0788],
        [-0.0646, -0.0093,  0.0221],
        [-0.0411,  0.2049,  0.0550],
        [-0.0010,  0.3858,  0.1430],
        [ 0.0104, -0.1244, -0.0083],
        [ 0.0093, -0.2408, -0.0383],
        [-0.0049, -0.2796, -0.0765],
        [ 0.0022, -0.3317, -0.0578],
        [ 0.0754, -0.2084, -0.0565],
        [ 0.1108, -0.0760, -0.0676],
        [ 0.0763,  0.0390, -0.0756],
        [-0.0527, -0.2204, -0.0014],
        [-0.0955, -0.1029,  0.0483],
        [-0.1018,  0.0158,  0.0419]], grad_fn=<SubBackward0>)

In [24]:
labels.keys()

dict_keys(['type', 'target_theta', 'target_theta_weight', 'target_beta', 'target_smpl_weight', 'target_uvd_29', 'target_xyz_24', 'target_weight_29', 'target_weight_24', 'target_xyz_17', 'target_weight_17', 'trans_inv', 'intrinsic_param', 'joint_root', 'depth_factor', 'target_twist', 'target_twist_weight'])

In [25]:
target_xyz_17 = labels['target_xyz_17'][batch_idx].reshape(17, 3)
target_xyz_17 - target_xyz_17[0]

tensor([[ 0.0000,  0.0000,  0.0000],
        [ 0.0657,  0.0111, -0.0457],
        [ 0.0448,  0.2424,  0.0106],
        [ 0.0545,  0.4565,  0.0798],
        [-0.0665, -0.0102,  0.0467],
        [-0.0323,  0.2259,  0.0586],
        [-0.0079,  0.4472,  0.1175],
        [ 0.0151, -0.1267,  0.0030],
        [ 0.0230, -0.2513, -0.0027],
        [ 0.0116, -0.2971, -0.0334],
        [ 0.0244, -0.3511, -0.0179],
        [ 0.0818, -0.2156, -0.0450],
        [ 0.1028, -0.0706, -0.0604],
        [ 0.0825,  0.0588, -0.0523],
        [-0.0357, -0.2264,  0.0481],
        [-0.0732, -0.0856,  0.0773],
        [-0.0814,  0.0464,  0.0641]])

In [26]:
(target_xyz_17 - (hybrik_xyz_17-hybrik_xyz_17[0])).abs().mean()

tensor(0.0158, grad_fn=<MeanBackward0>)

In [27]:
target_xyz_17

tensor([[ 0.0000,  0.0000,  0.0000],
        [ 0.0657,  0.0111, -0.0457],
        [ 0.0448,  0.2424,  0.0106],
        [ 0.0545,  0.4565,  0.0798],
        [-0.0665, -0.0102,  0.0467],
        [-0.0323,  0.2259,  0.0586],
        [-0.0079,  0.4472,  0.1175],
        [ 0.0151, -0.1267,  0.0030],
        [ 0.0230, -0.2513, -0.0027],
        [ 0.0116, -0.2971, -0.0334],
        [ 0.0244, -0.3511, -0.0179],
        [ 0.0818, -0.2156, -0.0450],
        [ 0.1028, -0.0706, -0.0604],
        [ 0.0825,  0.0588, -0.0523],
        [-0.0357, -0.2264,  0.0481],
        [-0.0732, -0.0856,  0.0773],
        [-0.0814,  0.0464,  0.0641]])

In [28]:
(target_xyz_17 - (protores_xyz_17-protores_xyz_17[0])).abs().mean()

tensor(0.0157, grad_fn=<MeanBackward0>)

In [29]:
loss_protores = criterion(output_protores, labels)
loss_protores

(tensor(3.2438, grad_fn=<AddBackward0>),
 {'loss_tot': tensor(3.2438, grad_fn=<AddBackward0>),
  'loss_beta': tensor(1.8465, grad_fn=<MseLossBackward0>),
  'loss_theta': tensor(0.0487, grad_fn=<MseLossBackward0>),
  'loss_uvd': tensor(1.1440, grad_fn=<DivBackward0>),
  'loss_xyz_smpl24': tensor(1.3166, grad_fn=<DivBackward0>),
  'loss_xyz_smpl17': tensor(1.2114, grad_fn=<DivBackward0>)})

In [30]:
loss_hybrik = criterion(output_hybrik, labels)
loss_hybrik

(tensor(3.2387, grad_fn=<AddBackward0>),
 {'loss_tot': tensor(3.2387, grad_fn=<AddBackward0>),
  'loss_beta': tensor(1.8493, grad_fn=<MseLossBackward0>),
  'loss_theta': tensor(0.0531, grad_fn=<MseLossBackward0>),
  'loss_uvd': tensor(1.1440, grad_fn=<DivBackward0>),
  'loss_xyz_smpl24': tensor(1.2877, grad_fn=<DivBackward0>),
  'loss_xyz_smpl17': tensor(1.1610, grad_fn=<DivBackward0>)})

In [31]:
calc_coord_accuracy(output_hybrik.pred_xyz_jts_24_struct, labels['target_xyz_24'], labels['target_weight_24'], 
                    hm_shape, num_joints=24, root_idx=gt_val_loader.dataset.root_idx_17)

0.78125

In [32]:
calc_coord_accuracy(predicted_joint_positions_fk.reshape(-1, 72), labels['target_xyz_24'], labels['target_weight_24'], 
                    hm_shape, num_joints=24, root_idx=gt_val_loader.dataset.root_idx_17)

0.6927083333333334

In [33]:
calc_coord_accuracy(output_protores.pred_xyz_jts_24_struct, labels['target_xyz_24'], labels['target_weight_24'], 
                    hm_shape, num_joints=24, root_idx=gt_val_loader.dataset.root_idx_17)

0.6927083333333334

In [34]:
calc_coord_accuracy(output_hybrik.pred_xyz_jts_17, labels['target_xyz_17'], label_masks_17, 
                    hm_shape, num_joints=17, root_idx=gt_val_loader.dataset.root_idx_17)

0.7395833333333334

In [37]:
pred_xyz_jts_17_protores = output_protores.pred_xyz_jts_17.reshape(-1, 17, 3)
pred_xyz_jts_17_protores = (pred_xyz_jts_17_protores - pred_xyz_jts_17_protores[:,[0]]).reshape(-1, 51)

calc_coord_accuracy(pred_xyz_jts_17_protores, labels['target_xyz_17'], label_masks_17, 
                    hm_shape, num_joints=17, root_idx=gt_val_loader.dataset.root_idx_17)

0.75

In [46]:
def calc_dist(preds, target, normalize):
    """Calculate normalized distances"""
    preds = preds.astype(np.float32)
    target = target.astype(np.float32)
    dists = np.zeros((preds.shape[1], preds.shape[0]))

    for n in range(preds.shape[0]):
        for c in range(preds.shape[1]):
            if target[n, c, 0] > 1 and target[n, c, 1] > 1:
                normed_preds = preds[n, c, :] / normalize[n]
                normed_targets = target[n, c, :] / normalize[n]
                dists[c, n] = np.linalg.norm(normed_preds - normed_targets)
            else:
                dists[c, n] = -1

    return dists

def dist_acc(dists, thr=0.5):
    """Calculate accuracy with given input distance."""
    dist_cal = np.not_equal(dists, -1)
    num_dist_cal = dist_cal.sum()
    if num_dist_cal > 0:
        return np.less(dists[dist_cal], thr).sum() * 1.0 / num_dist_cal
    else:
        return -1

def calc_coord_accuracy(pred_jts, labels, label_masks, hm_shape, norm='softmax', num_joints=None, root_idx=None):
    """Calculate integral coordinates accuracy."""
    coords = pred_jts.clone().detach().cpu().numpy()
    coords = coords.astype(float)
    
    if num_joints is not None:
        coords = coords.reshape(coords.shape[0], num_joints, -1)
        labels = labels.reshape(labels.shape[0], num_joints, -1)
        label_masks = label_masks.reshape(label_masks.shape[0], num_joints, -1)
        coords = coords[:, :, :3].reshape(coords.shape[0], -1)
        labels = labels[:, :, :3].reshape(coords.shape[0], -1)
        label_masks = label_masks[:, :, :3].reshape(coords.shape[0], -1)
    else:
        num_joints = coords.shape[1] // 3

    hm_width, hm_height, hm_depth = hm_shape
    coords = coords.reshape((coords.shape[0], int(coords.shape[1] / 3), 3))

    coords[:, :, 0] = (coords[:, :, 0] + 0.5) * hm_width
    coords[:, :, 1] = (coords[:, :, 1] + 0.5) * hm_height

    labels = labels.clone().cpu().data.numpy().reshape(pred_jts.shape[0], num_joints, 3)
    label_masks = label_masks.cpu().data.numpy().reshape(pred_jts.shape[0], num_joints, 3)

    labels[:, :, 0] = (labels[:, :, 0] + 0.5) * hm_width
    labels[:, :, 1] = (labels[:, :, 1] + 0.5) * hm_height
    labels[:, :, 2] = (labels[:, :, 2] + 0.5) * hm_depth

    coords[:, :, 2] = (coords[:, :, 2] + 0.5) * hm_depth

    if root_idx is not None:
        labels = labels - labels[:, root_idx, :][:, None, :]
#         coords = coords - coords[:, root_idx, :][:, None, :]

    coords = coords * label_masks
    labels = labels * label_masks

    norm = np.ones((pred_jts.shape[0], 3)) * np.array([hm_width, hm_height, hm_depth]) / 10

    dists = calc_dist(coords, labels, norm)

    acc = 0
    sum_acc = 0
    cnt = 0
    for i in range(num_joints):
        acc = dist_acc(dists[i])
        if acc >= 0:
            sum_acc += acc
            cnt += 1

    if cnt > 0:
        return sum_acc / cnt
    else:
        return 0
    
calc_coord_accuracy(output_protores.pred_xyz_jts_17 + 0.002, labels['target_xyz_17'], label_masks_17, 
                    hm_shape, num_joints=17, root_idx=train_loader.dataset.root_idx_17)


(target_xyz_17 - protores_xyz_17).abs().mean()

NameError: name 'train_loader' is not defined

In [90]:
(target_xyz_17 - protores_xyz_17).mean(axis=0)

tensor([-0.0024,  0.0547,  0.0039], grad_fn=<MeanBackward1>)

In [84]:
labels['target_xyz_17'][3].reshape(17,3)

tensor([[-0.0000,  0.0000,  0.0000],
        [-0.0643, -0.0033,  0.0291],
        [-0.0162,  0.1818,  0.1205],
        [-0.0658,  0.3751,  0.1050],
        [ 0.0637,  0.0043, -0.0291],
        [ 0.0521,  0.2088,  0.0199],
        [ 0.0138,  0.4052,  0.0476],
        [-0.0165, -0.1137, -0.0377],
        [-0.0104, -0.2272, -0.0475],
        [-0.0007, -0.2880, -0.0258],
        [-0.0153, -0.3349, -0.0543],
        [-0.0780, -0.1998, -0.0175],
        [-0.1170, -0.0778,  0.0345],
        [-0.1022,  0.0191,  0.0970],
        [ 0.0532, -0.1934, -0.0734],
        [ 0.1090, -0.0653, -0.0529],
        [ 0.1409,  0.0374, -0.0040]])

In [91]:
protores_xyz_17

tensor([[-0.0566,  0.0636, -0.1009],
        [-0.1862,  0.0663, -0.0396],
        [-0.0782,  0.1364,  0.3452],
        [-0.0554,  0.2945,  0.0454],
        [ 0.0724,  0.0606, -0.1614],
        [ 0.3270,  0.2154,  0.1008],
        [ 0.0306,  0.3036,  0.0253],
        [-0.0320, -0.1601, -0.0914],
        [-0.0036, -0.3857, -0.0198],
        [ 0.0033, -0.4507,  0.0532],
        [-0.0118, -0.5521,  0.0151],
        [-0.1437, -0.3388,  0.0071],
        [-0.2465, -0.1015,  0.0151],
        [-0.0841,  0.0471,  0.0119],
        [ 0.1245, -0.3319, -0.0722],
        [ 0.2302, -0.0884, -0.1089],
        [ 0.0961,  0.0195,  0.0195]], grad_fn=<ViewBackward>)

In [92]:
target_xyz_17 - target_xyz_17[0]

tensor([[ 0.0000,  0.0000,  0.0000],
        [-0.0643, -0.0033,  0.0291],
        [-0.0162,  0.1818,  0.1205],
        [-0.0658,  0.3751,  0.1050],
        [ 0.0637,  0.0043, -0.0291],
        [ 0.0521,  0.2088,  0.0199],
        [ 0.0138,  0.4052,  0.0476],
        [-0.0165, -0.1137, -0.0377],
        [-0.0104, -0.2272, -0.0475],
        [-0.0007, -0.2880, -0.0258],
        [-0.0153, -0.3349, -0.0543],
        [-0.0780, -0.1998, -0.0175],
        [-0.1170, -0.0778,  0.0345],
        [-0.1022,  0.0191,  0.0970],
        [ 0.0532, -0.1934, -0.0734],
        [ 0.1090, -0.0653, -0.0529],
        [ 0.1409,  0.0374, -0.0040]])

In [68]:
labels['target_xyz_17'].shape

torch.Size([32, 51])

In [69]:
target_xyz_17.requires_grad

False