In [1]:
import time

import torch

import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import os
import sys
import errno

from common.camera import *
from common.visualization import *
from common.utils import *
from common.generators import ChunkedGenerator, UnchunkedGenerator
from common.h36m_dataset import Human36mDataset, preprocess_Human36m
from common.visualization import *
from common.model import *
from common.xianhui_dataset import *
import matplotlib
import glob
import plotly
import json
%matplotlib inline
plotly.offline.init_notebook_mode(connected=True)

from scripts.build_model import *
from scripts.train import *
from scripts.eval import *
from scripts.data_preprocessing_cmu_mocap import *

In [2]:
data_root_path = "../wild_data_cmu/output_human/" 

In [3]:
dataset, keypoints = load_and_preprocess_cmu_mocap(data_root_path)

 load 3d data 
 processing 3d data 
 load 2d keypoints 
 processing 2d keypoints 


In [4]:
subjects = sorted(dataset.subjects())
subjects_train = subjects[:15]
subjects_semi = subjects[15:23]
subjects_test = subjects[23:]

In [5]:
cameras_valid, poses_valid, poses_valid_2d = fetch(dataset = dataset,
                                                   keypoints = keypoints, 
                                                   subjects = subjects_test, 
                                                   stride = 1)

cameras_train, poses_train, poses_train_2d = fetch(dataset = dataset,
                                                   keypoints = keypoints, 
                                                   subjects = subjects_train, 
                                                   stride = 1)

cameras_semi, _, poses_semi_2d = fetch(dataset = dataset,
                                       keypoints = keypoints,
                                       subjects = subjects_semi,
                                       stride = 1)

In [6]:
resume = True
filter_widths = [3,3,3,3,3]
model_pos_train, model_pos, model_traj, model_traj_train = build_models(17, 2, 17, filter_widths)

# chk_filename = "../checkpoint/pretrained_h36m_cpn.bin"
chk_filename = "checkpoints/epoch_80.bin"
print('Loading checkpoint', chk_filename)
checkpoint = torch.load(chk_filename, map_location=lambda storage, loc: storage)
print('This model was trained for {} epochs'.format(checkpoint['epoch']))
model_pos_train.load_state_dict(checkpoint['model_pos'])
model_pos.load_state_dict(checkpoint['model_pos'])
model_traj_train.load_state_dict(checkpoint['model_traj'])
model_traj.load_state_dict(checkpoint['model_traj'])

Loading checkpoint checkpoints/epoch_80.bin
This model was trained for 80 epochs


IncompatibleKeys(missing_keys=[], unexpected_keys=[])

In [7]:
receptive_field = model_pos.receptive_field()
print('INFO: Receptive field: {} frames'.format(receptive_field))
pad = (receptive_field - 1) // 2 # Padding on each side

INFO: Receptive field: 243 frames


In [8]:
test_generator = UnchunkedGenerator(cameras_valid[:1], 
                                    poses_valid[:1], 
                                    poses_valid_2d[:1], 
                                    pad=pad, 
                                    causal_shift=0, 
                                    augment=False)

In [9]:
c = 0
for cam, batch, batch_2d in test_generator.next_epoch():
    inputs_3d = torch.from_numpy(batch.astype('float32'))
    inputs_2d = torch.from_numpy(batch_2d.astype('float32'))
    cam = torch.from_numpy(cam.astype('float32'))
    target = inputs_2d[:, pad:-pad, :, :2].contiguous()

    if c > 2:
        break
    c += 1

In [10]:
input_3d_cam = inputs_3d.clone()
trajectory = inputs_3d[:,:,:1,:]
input_3d_cam[:,:,1:,:] += trajectory

In [11]:
# convert pts to homo
nframe = input_3d_cam[0].numpy().shape[0]
pts_3d_cam_homo = np.ones((nframe, 17, 4))
pts_3d_cam_homo[... , :3] = input_3d_cam[0].numpy()
pts_3d_cam_homo = pts_3d_cam_homo.reshape(-1, 4).T

In [12]:
# camera to screen
proj_2d = camera2screen(pts_3d_cam_homo[:3], dataset.cameras()["01"][0]["intrinsics"].reshape(3,3))
proj_2d = proj_2d.reshape(-1, 17, 2)

In [13]:
proj_2d_normal = normalize_screen_coordinates(proj_2d[..., :2], 
                                       w=dataset.cameras()["01"][0]['res_w'], 
                                       h=dataset.cameras()["01"][0]['res_h'])

In [16]:
proj_2d.shape

(1618, 17, 2)

In [14]:
np.sum(np.power(target.numpy()[0] - proj_2d_normal, 2))

2.7869874966051185e-12

In [15]:
proj_2d

array([[[316.27837911, 288.84749774],
        [320.8831498 , 295.39540606],
        [322.32539771, 318.97239582],
        ...,
        [327.55985877, 265.58752069],
        [327.08234312, 292.07778584],
        [327.06340056, 297.41170575]],

       [[316.28363337, 288.84865445],
        [320.89759464, 295.38955891],
        [322.32648375, 318.96602944],
        ...,
        [327.56105675, 265.5882192 ],
        [327.08055313, 292.07128147],
        [327.06022159, 297.40146007]],

       [[316.28858139, 288.84808202],
        [320.91086909, 295.38308118],
        [322.32581063, 318.95995217],
        ...,
        [327.56551435, 265.58380006],
        [327.08846942, 292.07543901],
        [327.06848866, 297.40955259]],

       ...,

       [[315.50595593, 289.45162848],
        [320.045638  , 295.66635778],
        [327.34291324, 317.77558798],
        ...,
        [323.53979776, 264.97679403],
        [325.85379739, 291.38331217],
        [326.95059554, 296.68853589]],

       [[315.67