In [1]:
%load_ext autoreload
%autoreload 2

import pickle
import os
import numpy as np
import cv2
import time
import sys
import re
from copy import deepcopy
from collections import defaultdict
from itertools import islice
from tqdm import tqdm_notebook
from time import time
from easydict import EasyDict
from IPython.core.debugger import set_trace
from matplotlib import pyplot as plt
from warnings import filterwarnings

import torch
from torch import nn
import torch.nn.functional as F
from torch import autograd
from torch.utils.data import DataLoader
from torch.utils.data import DataLoader

from mvn.datasets.human36m import Human36MMultiViewDataset, Human36MSingleViewDataset
from mvn.utils.img import image_batch_to_numpy, denormalize_image,to_numpy
from mvn.models.triangulation import VolumetricTriangulationNet
from mvn.models.volumetric_temporal import VolumetricTemporalNet
from mvn.utils.multiview import project_3d_points_to_image_plane_without_distortion
from mvn.utils.vis import draw_2d_pose
from mvn.utils import img
from mvn.utils import multiview
from mvn.utils import volumetric
from mvn.utils import op
from mvn.utils import vis
from mvn.utils import misc
from mvn.utils import cfg
from mvn.datasets import utils as dataset_utils
from mvn.datasets.human36m import Human36MMultiViewDataset, Human36MSingleViewDataset

from train import setup_human36m_dataloaders

from mvn.models.temporal import Seq2VecRNN,\
                                FeaturesAR_RNN,\
                                FeaturesAR_CNN1D,\
                                FeaturesAR_CNN2D_UNet,\
                                FeaturesAR_CNN2D_ResNet

from mvn.models.volumetric_temporal import VolumetricTemporalNet,\
                                           VolumetricTemporalAdaINNet,\
                                           VolumetricFRAdaINNet



%matplotlib inline

retval = {
    'subject_names': ['S1', 'S5', 'S6', 'S7', 'S8', 'S9', 'S11'],
    'camera_names': ['54138969', '55011271', '58860488', '60457274'],
    'action_names': [
        'Directions-1', 'Directions-2',
        'Discussion-1', 'Discussion-2',
        'Eating-1', 'Eating-2',
        'Greeting-1', 'Greeting-2',
        'Phoning-1', 'Phoning-2',
        'Posing-1', 'Posing-2',
        'Purchases-1', 'Purchases-2',
        'Sitting-1', 'Sitting-2',
        'SittingDown-1', 'SittingDown-2',
        'Smoking-1', 'Smoking-2',
        'TakingPhoto-1', 'TakingPhoto-2',
        'Waiting-1', 'Waiting-2',
        'Walking-1', 'Walking-2',
        'WalkingDog-1', 'WalkingDog-2',
        'WalkingTogether-1', 'WalkingTogether-2']
}

CHANNELS_LIST = [16, 32, 32, 32, 32, 32, 32, 32, 32, 32, 64, 64, 64, 64, 64, 128, 128, 128, 128, 128,\
                                  128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,\
                                  128, 128, 128, 128, 128, 64, 64, 64, 32, 32, 32, 32, 32]

device = 'cuda:0' #torch.cuda.current_device()

torch.backends.cudnn.benchmark = True
torch.backends.cudnn.enabled = True

In [2]:
config_path = './experiments/human36m/train/human36m_vol_temporal_adain.yaml'
config = cfg.load_config(config_path)
config.model.volume_features_dim = 32

In [3]:
self = {
    "vol": VolumetricTriangulationNet,
    "vol_temporal": VolumetricTemporalNet,
    "vol_temporal_adain":VolumetricTemporalAdaINNet,
    "vol_temporal_fr_adain":VolumetricFRAdaINNet,
    "vol_temporal_lstm_v2v":VolumetricTemporalNet
}["vol_temporal_adain"](config, device=device).to(device)

Loading pretrained weights from: ./data/pose_resnet_4.5_pixels_human36m.pth
Parameters [{'final_layer.weight', 'final_layer.bias'}] were not inited
Successfully loaded pretrained weights for backbone


In [4]:
train_loader, val_loader, _ = setup_human36m_dataloaders(config, is_train=True, distributed_train=False)

for batch in train_loader:
    images_batch, keypoints_3d_gt, keypoints_3d_validity_gt, proj_matricies_batch = dataset_utils.prepare_batch(batch, device)
    break


In [5]:
batch_size, dt = images_batch.shape[:2]
image_shape = images_batch.shape[-2:]
images_batch = images_batch.view(-1, 3, *image_shape)

# forward backbone
heatmaps, features, alg_confidences, vol_confidences, bottleneck = self.backbone(images_batch)

# reshape back and take only last view (pivot)
images_batch = images_batch.view(batch_size, dt, 3, *image_shape)[:,-1,...].unsqueeze(1)

# calcualte shapes
features_shape = features.shape[-2:]
features_channels = features.shape[1]

# change camera intrinsics
new_cameras = deepcopy(batch['cameras'])
for view_i in range(dt):
    for batch_i in range(batch_size):
        new_cameras[view_i][batch_i].update_after_resize(image_shape, features_shape)

proj_matricies_batch = torch.stack([torch.stack([torch.from_numpy(camera.projection) \
                                    for camera in camera_batch], dim=0) \
                                    for camera_batch in new_cameras], dim=0).transpose(1, 0)  # shape (batch_size, dt, 3, 4)

proj_matricies_batch = proj_matricies_batch.float().to(device)
proj_matricies_batch = proj_matricies_batch[:,-1,...].unsqueeze(1) 

features = features.view(batch_size, dt, features_channels, *features_shape)
pivot_features = features[:,-1,...]
style_features = features if self.include_pivot else features[:,:-1,...].contiguous() 
pivot_features = self.process_features(pivot_features).unsqueeze(1)

if self.encoder_type == 'backbone':

    bottleneck_shape = bottleneck.shape[-2:]
    bottleneck_channels = bottleneck.shape[1]

    bottleneck = bottleneck.view(batch_size, dt, bottleneck_channels, *bottleneck_shape)
    if not self.include_pivot:
        bottleneck = bottleneck[:,:-1,...].contiguous()
    bottleneck = bottleneck.view(-1, # batch_size*(dt-1)
                                 bottleneck_channels,
                                *bottleneck_shape)

    if not self.style_grad_for_backbone:
        bottleneck = bottleneck.detach()

    encoded_features = self.encoder(bottleneck)
else:
    style_features = style_features.view(-1, # batch_size*(dt-1)
                                         features_channels,
                                        *features_shape)
    if self.style_grad_for_backbone:
        style_features = style_features.detach()
    encoded_features = self.encoder(style_features)

encoded_features = encoded_features.view(batch_size,
                                         -1, # (dt-1) 
                                         self.encoded_feature_space)

style_vector = self.features_sequence_to_vector(encoded_features, device=device) # [batch_size, 512]

if self.use_precalculated_pelvis:
    tri_keypoints_3d = torch.from_numpy(np.array(batch['pred_keypoints_3d'])).type(torch.float).to(device)

elif self.use_gt_pelvis:
    tri_keypoints_3d = torch.from_numpy(np.array(batch['keypoints_3d'])).type(torch.float).to(device)

else:
    raise RuntimeError('In absence of precalculated pelvis or gt pelvis, self.use_volumetric_pelvis should be True') 

In [7]:
# amend coord_volumes position                                                         
coord_volumes, cuboids, base_points = op.get_coord_volumes(self.kind, 
                                                        self.training, 
                                                        self.rotation,
                                                        self.cuboid_side,
                                                        self.volume_size, 
                                                        device,
                                                        keypoints=tri_keypoints_3d
                                                        )

# lift each featuremap to distinct volume and aggregate 
volumes = op.unproject_heatmaps(pivot_features,  
                                proj_matricies_batch, 
                                coord_volumes, 
                                volume_aggregation_method=self.volume_aggregation_method,
                                vol_confidences=vol_confidences
                                )
print (volumes[0].shape)

> [0;32m/nfs/hpc2_storage/ibulygin/learnable-triangulation-pytorch/mvn/utils/op.py[0m(228)[0;36munproject_heatmaps[0;34m()[0m
[0;32m    226 [0;31m            [0mset_trace[0m[0;34m([0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m    227 [0;31m            [0;31m# prepare to F.grid_sample[0m[0;34m[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m--> 228 [0;31m            [0mgrid_coord_proj[0m [0;34m=[0m [0mgrid_coord_proj[0m[0;34m.[0m[0munsqueeze[0m[0;34m([0m[0;36m1[0m[0;34m)[0m[0;34m.[0m[0munsqueeze[0m[0;34m([0m[0;36m0[0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m    229 [0;31m            [0mcurrent_volume[0m [0;34m=[0m [0mF[0m[0;34m.[0m[0mgrid_sample[0m[0;34m([0m[0mheatmap[0m[0;34m,[0m [0mgrid_coord_proj[0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m    230 [0;31m[0;34m[0m[0m
[0m
ipdb> grid_coord_proj.shape
torch.Size([262144, 2])
ipdb> q


BdbQuit: 