In [1]:
# python train.py \
# --config configs/pose3d/MB_train_h36m.yaml \
# --evaluate checkpoint/pose3d/MB_train_h36m/best_epoch.bin         

In [2]:
import getpass
user = getpass.getuser()
motionbert_root = '/home/{}/codes/MotionBERT'.format(user)

In [3]:
import os
import numpy as np
import argparse
import errno
import math
import pickle
import tensorboardX
from tqdm import tqdm
from time import time
import copy
import random
import prettytable

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import DataLoader

os.chdir(motionbert_root)

from lib.utils.tools import *
from lib.utils.learning import *
from lib.utils.utils_data import flip_data
from lib.data.dataset_motion_2d import PoseTrackDataset2D, InstaVDataset2D
from lib.data.dataset_motion_3d import MotionDataset3D
from lib.data.augmentation import Augmenter2D
from lib.data.datareader_aihub import DataReaderAIHUB
from lib.model.loss import *

from train import set_random_seed, save_checkpoint

In [4]:
config = 'MB_ft_tr_aihub_sport_ts_30'
model_name = 'FT-MB_ft_h36m-MB_ft_tr_aihub_sport_ts_30'
#model_name = 'MB_train_h36m'

In [5]:
import easydict

opts = easydict.EasyDict({
    "config": "configs/pose3d/{}.yaml".format(config),
    "checkpoint": 'checkpoint',
    "pretrained": 'checkpoint',
    "resume": '',
    "evaluate": 'checkpoint/pose3d/{}/best_epoch.bin'.format(model_name),
    "selection": 'best_epoch.bin',
    "seed": 0,
    })
set_random_seed(opts.seed)
args = get_config(opts.config)

In [6]:
try:
    os.makedirs(opts.checkpoint)
except OSError as e:
    if e.errno != errno.EEXIST:
        raise RuntimeError('Unable to create checkpoint directory:', opts.checkpoint)
train_writer = tensorboardX.SummaryWriter(os.path.join(opts.checkpoint, "logs"))

In [7]:
args.batch_size

16

In [8]:
args.subset_list

['AIHUB_tr_SPORT_ts_30']

In [9]:
print('Loading dataset...')
trainloader_params = {
        'batch_size': args.batch_size,
        'shuffle': True,
        'num_workers': 12,
        'pin_memory': True,
        'prefetch_factor': 4,
        'persistent_workers': True
}

testloader_params = {
        'batch_size': args.batch_size,
        'shuffle': False,
        'num_workers': 12,
        'pin_memory': True,
        'prefetch_factor': 4,
        'persistent_workers': True
}

train_dataset = MotionDataset3D(args, args.subset_list, 'train')
test_dataset = MotionDataset3D(args, args.subset_list, 'test')
train_loader_3d = DataLoader(train_dataset, **trainloader_params)
test_loader = DataLoader(test_dataset, **testloader_params)

Loading dataset...


In [10]:
datareader = DataReaderAIHUB(n_frames=args.clip_len, sample_stride=args.sample_stride, data_stride_train=args.data_stride, data_stride_test=args.clip_len, dt_root = 'data/motion3d', dt_file=args.dt_file)

In [11]:
min_loss = 100000
model_backbone = load_backbone(args)
model_params = 0
for parameter in model_backbone.parameters():
    model_params = model_params + parameter.numel()
print('INFO: Trainable parameter count:', model_params)

if torch.cuda.is_available():
    model_backbone = nn.DataParallel(model_backbone)
    model_backbone = model_backbone.cuda()

INFO: Trainable parameter count: 42466317


In [12]:
args.finetune, opts.resume, opts.evaluate

(True,
 '',
 'checkpoint/pose3d/FT-MB_ft_h36m-MB_ft_tr_aihub_sport_ts_30/best_epoch.bin')

In [13]:
chk_filename = opts.evaluate if opts.evaluate else opts.resume
print('Loading checkpoint', chk_filename)
checkpoint = torch.load(chk_filename, map_location=lambda storage, loc: storage)
model_backbone.load_state_dict(checkpoint['model_pos'], strict=True)
model_pos = model_backbone

Loading checkpoint checkpoint/pose3d/FT-MB_ft_h36m-MB_ft_tr_aihub_sport_ts_30/best_epoch.bin


In [14]:
args.partial_train

In [15]:
opts.evaluate

'checkpoint/pose3d/FT-MB_ft_h36m-MB_ft_tr_aihub_sport_ts_30/best_epoch.bin'

#### evaluate

In [16]:
# args, model_pos, test_loader, datareader

In [17]:
args.no_conf, args.flip, args.rootrel, args.gt_2d

(False, True, True, False)

In [18]:
args.flip = True

In [19]:
torch.cuda.is_available()

True

In [20]:
results_all = []
model_pos.eval()            
with torch.no_grad():
    for batch_input, batch_gt in tqdm(test_loader):
        N, T = batch_gt.shape[:2] # B, N
        if torch.cuda.is_available():
            batch_input = batch_input.cuda()
        if args.flip:    
            batch_input_flip = flip_data(batch_input)
            predicted_3d_pos_1 = model_pos(batch_input)
            predicted_3d_pos_flip = model_pos(batch_input_flip)
            predicted_3d_pos_2 = flip_data(predicted_3d_pos_flip)                   # Flip back
            predicted_3d_pos = (predicted_3d_pos_1+predicted_3d_pos_2) / 2
        else:
            predicted_3d_pos = model_pos(batch_input)
        results_all.append(predicted_3d_pos.cpu().numpy())
results_all = np.concatenate(results_all)
results_all = datareader.denormalize(results_all)

100%|██████████| 1/1 [00:02<00:00,  2.04s/it]


In [21]:
results_all.shape

(11, 243, 17, 3)

In [22]:
np.save('custom_codes/Inference_and_evaluation/{}_result_denormalized.npy'.format(model_name), results_all)

In [23]:
results_all = np.load('custom_codes/Inference_and_evaluation/{}_result_denormalized.npy'.format(model_name))

_, split_id_test = datareader.get_split_id()
actions = np.array(datareader.dt_dataset['test']['action'])
factors = np.array(datareader.dt_dataset['test']['2.5d_factor'])
gts = np.array(datareader.dt_dataset['test']['joints_2.5d_image'])
sources = np.array(datareader.dt_dataset['test']['source'])

num_test_frames = len(actions)
frames = np.array(range(num_test_frames))
action_clips = np.array([actions[split_id_test[i]] for i in range(len(split_id_test))]) # actions[split_id_test]
factor_clips = np.array([factors[split_id_test[i]] for i in range(len(split_id_test))]) # factors[split_id_test]
source_clips = np.array([sources[split_id_test[i]] for i in range(len(split_id_test))]) # sources[split_id_test]
frame_clips = np.array([frames[split_id_test[i]] for i in range(len(split_id_test))]) # frames[split_id_test]
gt_clips = np.array([gts[split_id_test[i]] for i in range(len(split_id_test))]) # gts[split_id_test]
assert len(results_all)==len(action_clips)

e1_all = np.zeros(num_test_frames)
e2_all = np.zeros(num_test_frames)
oc = np.zeros(num_test_frames)
action_names = sorted(set(datareader.dt_dataset['test']['action']))
block_list = ['s_09_act_05_subact_02', 
                's_09_act_10_subact_02', 
                's_09_act_13_subact_01']

for idx in range(len(action_clips)):
    source = source_clips[idx][0]
    if source in block_list:
        continue
    frame_list = frame_clips[idx] # numpy.ndarray
    action = action_clips[idx][0]
    factor = factor_clips[idx][:,None,None]
    gt = gt_clips[idx]
    pred = copy.deepcopy(results_all[idx])
    pred *= factor
    
    # Root-relative Errors
    pred = pred - pred[:,0:1,:] # (243, 17, 3)
    gt = gt - gt[:,0:1,:] # (243, 17, 3)
    err1 = mpjpe(pred, gt) # (243,)
    err2 = p_mpjpe(pred, gt) # (243,)
    e1_all[frame_list] += err1 # numpy.ndarray를 인덱스로 사용 가능
    e2_all[frame_list] += err2
    oc[frame_list] += 1 # 프레임별 카운팅

In [24]:
results = {}
results_procrustes = {}

for action in action_names:
    results[action] = []
    results_procrustes[action] = []

for idx in range(num_test_frames):
    if e1_all[idx] > 0:
        err1 = e1_all[idx] / oc[idx]
        err2 = e2_all[idx] / oc[idx]
        action = actions[idx]
        results[action].append(err1)
        results_procrustes[action].append(err2)

final_result = []
final_result_procrustes = []
summary_table = prettytable.PrettyTable()
summary_table.field_names = ['test_name'] + action_names
for action in action_names:
    final_result.append(np.mean(results[action]))
    final_result_procrustes.append(np.mean(results_procrustes[action]))
summary_table.add_row(['P1'] + final_result)
summary_table.add_row(['P2'] + final_result_procrustes)
print(summary_table)
e1 = np.mean(np.array(final_result))
e2 = np.mean(np.array(final_result_procrustes))
print('Protocol #1 Error (MPJPE):', e1, 'mm')
print('Protocol #2 Error (P-MPJPE):', e2, 'mm')
print('----------')

+-----------+--------------------+
| test_name |         30         |
+-----------+--------------------+
|     P1    | 67.40624741414243  |
|     P2    | 46.555171046777424 |
+-----------+--------------------+
Protocol #1 Error (MPJPE): 67.40624741414243 mm
Protocol #2 Error (P-MPJPE): 46.555171046777424 mm
----------


### Visualization

In [25]:
import matplotlib
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
import matplotlib.gridspec as gridspec
from custom_codes.test_utils import *

plt.switch_backend('TkAgg')
matplotlib.rcParams['pdf.fonttype'] = 42
matplotlib.rcParams['ps.fonttype'] = 42

In [26]:
pred.shape, gt.shape

((243, 17, 3), (243, 17, 3))

In [27]:
pred[0].shape

(17, 3)

In [28]:
frame = 200
visualize_3d_pose([pred[frame], gt[frame]])

In [29]:
source_clips.shape, frame_clips.shape

((11, 243), (11, 243))

In [30]:
source_clips[-1][frame], frame_clips[-1][frame]

('res_30_F170D_5', 3142)

### Visualize one clip

In [31]:
# AIHUB_tr_SPORT_ts_30/test/00000010.pkl
idx = 10
factor = factor_clips[idx][:,None,None]
gt = copy.deepcopy(gt_clips[idx])
pred = copy.deepcopy(results_all[idx])
gt /= factor
pred = pred - pred[:,0:1,:] # (243, 17, 3)
gt = gt - gt[:,0:1,:] # (243, 17, 3)

#### save frames

In [37]:
xlim=(-512, 512)
ylim=(-512, 512)
zlim=(-512, 512)
fig = plt.figure(0, figsize=(10, 10))
ax = plt.axes(projection="3d")
ax.set_xlim(xlim)
ax.set_ylim(ylim)
ax.set_zlim(zlim)
ax.set_xlabel('X')
ax.set_ylabel('Y')
ax.set_zlabel('Z')
for i in tqdm(range(pred.shape[0])):
    _ax = copy.deepcopy(ax)
    _ax.view_init(elev=12., azim=80+i)
    visualize_multiple_3d_pose([pred[i], gt[i]], _ax, save=True, save_path='./custom_codes/Inference_and_evaluation/{}_idx{}_result'.format(model_name, idx), name='{}.jpg'.format(i), i=i)

100%|██████████| 243/243 [00:59<00:00,  4.10it/s]


#### make the video

In [49]:
import imageio
from natsort import natsorted

img_list = natsorted(os.listdir('./custom_codes/Inference_and_evaluation/{}_idx{}_result'.format(model_name, idx)))
videowriter = imageio.get_writer('./custom_codes/Inference_and_evaluation/{}_idx{}_result/video.mp4'.format(model_name, idx), fps=30)

for img in img_list:
    img_path = os.path.join('./custom_codes/Inference_and_evaluation/{}_idx{}_result'.format(model_name, idx), img)
    print(img_path)
    img = cv2.imread(img_path)
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGBA)
    videowriter.append_data(img)
videowriter.close()



./custom_codes/Inference_and_evaluation/FT-MB_ft_h36m-MB_ft_tr_aihub_sport_ts_30_idx10_result/0.jpg




./custom_codes/Inference_and_evaluation/FT-MB_ft_h36m-MB_ft_tr_aihub_sport_ts_30_idx10_result/1.jpg
./custom_codes/Inference_and_evaluation/FT-MB_ft_h36m-MB_ft_tr_aihub_sport_ts_30_idx10_result/2.jpg
./custom_codes/Inference_and_evaluation/FT-MB_ft_h36m-MB_ft_tr_aihub_sport_ts_30_idx10_result/3.jpg
./custom_codes/Inference_and_evaluation/FT-MB_ft_h36m-MB_ft_tr_aihub_sport_ts_30_idx10_result/4.jpg
./custom_codes/Inference_and_evaluation/FT-MB_ft_h36m-MB_ft_tr_aihub_sport_ts_30_idx10_result/5.jpg
./custom_codes/Inference_and_evaluation/FT-MB_ft_h36m-MB_ft_tr_aihub_sport_ts_30_idx10_result/6.jpg
./custom_codes/Inference_and_evaluation/FT-MB_ft_h36m-MB_ft_tr_aihub_sport_ts_30_idx10_result/7.jpg
./custom_codes/Inference_and_evaluation/FT-MB_ft_h36m-MB_ft_tr_aihub_sport_ts_30_idx10_result/8.jpg
./custom_codes/Inference_and_evaluation/FT-MB_ft_h36m-MB_ft_tr_aihub_sport_ts_30_idx10_result/9.jpg
./custom_codes/Inference_and_evaluation/FT-MB_ft_h36m-MB_ft_tr_aihub_sport_ts_30_idx10_result/10.jpg