In [None]:
import os
import sys
import random
from PIL import Image
import matplotlib.pyplot as plt
import numpy as np
import torch
import torch.nn as nn
import cv2
from tqdm import tqdm
from pathlib import Path
from torch.utils.data import DataLoader

WORK_DIR = Path(Path.cwd()).parent
sys.path.append(str(WORK_DIR))
from src.datasets import get_dataset, get_dataloader
from src.utils import parse_data_cfg, IMG, FPHA, LMDB, DATA_DIR

In [None]:
exp_fold = 'exp2'
cfgname = 'fpha_hpo_2hand_1_data_rand_alt'
cfg = parse_data_cfg(WORK_DIR/'data_cfg'/exp_fold/(cfgname + '.cfg'))
epoch = 200
exp_dir = cfg["exp_dir"]
data_split = 'test'
split_set = cfg[data_split + '_set']

# Visualize Dataloader

In [None]:
def collate_fn(batch):
    """
    Flip entire dataset batch to same side
    Args:
        batch   : list of img, bbox_gt, uvd_gt
        img     : [img_1, ..., img_batch]
        bbox_gt : [bbox_gt_1, ..., bbox_gt_batch]
        uvd_gt  : [uvd_gt_1, ..., uvd_gt_batch]
    Out:
        Vertically mirrored inputs
    """
    FT          = torch.FloatTensor
    img, uvd_gt = zip(*batch)
    flip        = random.randint(1, 10000)%2
    # Do flipping
    # 0 = left, 1 = right
    hand_side = 1
    if flip:
        hand_side = 0  

    new_img     = []
    new_uvd     = []
    for i, u in batch:
        if flip:
            i       = i.transpose(Image.FLIP_LEFT_RIGHT)
            u[:, 0] = 0.999 - u[:, 0]
        i = np.asarray(i)
        i = i/255.0
        i = IMG.imgshape2torch(i)
        new_img.append(i)
        new_uvd.append(u)

    new_img     = FT(new_img)
    new_uvd     = FT(new_uvd)
    return new_img, new_uvd, hand_side

In [None]:
dataset_kwargs = {'split_set': split_set}
dataset   = get_dataset(cfg, dataset_kwargs)
sampler   = None
shuffle   = cfg['shuffle']
kwargs = {'batch_size'  : int(cfg['batch_size']),
          'shuffle'     : shuffle,
          'num_workers' : int(cfg['num_workers']),
          'pin_memory'  : True,
          'collate_fn'  : collate_fn}
data_loader = get_dataloader(dataset, sampler, kwargs)

In [None]:
idx = 0 
for i, (img, uvd_gt, hand_side) in enumerate(data_loader):
    if i == idx:
        if hand_side == 0:
            print('left')
        else:
            print('right')        
        batch_size = img.shape[0]
        img = img.cpu().numpy()
        img = np.swapaxes(img, 2, 3)
        img = np.swapaxes(img, 1, 3)
        img = IMG.scale_img_255(img)
        uvd_gt = uvd_gt.cpu().numpy()
        uvd_gt = np.squeeze(uvd_gt)
        break
    i += 1

In [None]:
fig, ax = plt.subplots(4, 4, figsize=(15, 15))
idx = 0
for i in range(4):
    for j in range(4):
        k = np.ravel_multi_index((i, j), (4, 4))
        if k >= len(img):
            break        
        cur_img = img[idx]
        u = uvd_gt[idx]        
        ax[i, j].imshow(cur_img)
        u = IMG.scale_points_WH(u, (1,1), (cur_img.shape[0], cur_img.shape[1]))
        FPHA.visualize_joints_2d(ax[i, j], u[FPHA.REORDER_IDX], joint_idxs=False)
        idx += 1

# Evaluation

In [None]:
keys = LMDB.get_keys(os.path.join(DATA_DIR, split_set + "_keys_cache.p"))
xyz_gt = LMDB.read_all_lmdb_dataroot(keys, os.path.join(DATA_DIR, split_set + '_xyz_gt.lmdb'), 'float32', (21, 3))
uvd_gt = FPHA.xyz2uvd_color(xyz_gt)

# Left hand
pred_file = os.path.join(DATA_DIR, exp_dir, 'predict_{}_{}_uvd_left.txt'.format(epoch, data_split))
pred_uvd_left = np.reshape(np.loadtxt(pred_file), (-1, 21, 3))
pred_uvd_left = IMG.scale_points_WH(pred_uvd_left, (1, 1), (1920, 1080))
pred_uvd_left[..., 2] *= 1000
pred_xyz_left = FPHA.uvd2xyz_color(pred_uvd_left)
pred_file = os.path.join(DATA_DIR, exp_dir, 'predict_{}_{}_conf_left.txt'.format(epoch, data_split))
pred_conf_left = np.loadtxt(pred_file)

# Right hand
pred_file = os.path.join(DATA_DIR, exp_dir, 'predict_{}_{}_uvd_right.txt'.format(epoch, data_split))
pred_uvd_right = np.reshape(np.loadtxt(pred_file), (-1, 21, 3))
pred_uvd_right = IMG.scale_points_WH(pred_uvd_right, (1, 1), (1920, 1080))
pred_uvd_right[..., 2] *= 1000
pred_xyz_right = FPHA.uvd2xyz_color(pred_uvd_right)
pred_file = os.path.join(DATA_DIR, exp_dir, 'predict_{}_{}_conf_right.txt'.format(epoch, data_split))
pred_conf_right = np.loadtxt(pred_file)

if cfg['pred_img_side'] == 'left':
    uvd_gt = IMG.scale_points_WH(uvd_gt, (1920, 1080), (1,1))
    uvd_gt[: , :, 0] = 0.999 - uvd_gt[:, :, 0]
    uvd_gt = IMG.scale_points_WH(uvd_gt, (1,1), (1920, 1080))
    xyz_gt = FPHA.uvd2xyz_color(uvd_gt)

In [None]:
# idx = 15427
# idx = 21664
idx = 42863
print(keys[idx])

img = Image.open(os.path.join(DATA_DIR, 'First_Person_Action_Benchmark', 'Video_files', keys[idx]))

if cfg['pred_img_side'] == 'left':
    img = img.transpose(Image.FLIP_LEFT_RIGHT)

img = np.asarray(img)
fig, ax = plt.subplots(figsize=(10, 10))
ax.imshow(img)

FPHA.visualize_joints_2d(ax, pred_uvd_left[idx][FPHA.REORDER_IDX], joint_idxs=False, c='r')
FPHA.visualize_joints_2d(ax, pred_uvd_right[idx][FPHA.REORDER_IDX], joint_idxs=False, c='g')
FPHA.visualize_joints_2d(ax, uvd_gt[idx][FPHA.REORDER_IDX], joint_idxs=False, c='b')

In [None]:
side = 'left'
if side == 'right':
    pred_uvd = pred_uvd_right
    pred_xyz = pred_xyz_right
    pred_conf = pred_conf_right
else:
    pred_uvd = pred_uvd_left
    pred_xyz = pred_xyz_left
    pred_conf = pred_conf_left

In [None]:
max_idx = []
# get the best idx for each 2D cell
for i in range(len(pred_conf[idx])//5):
    max_idx.append(i*5 + np.argmax(pred_conf[idx][i*5:i*5+5]))

fig, ax = plt.subplots(figsize=(5,5))
pred_uvd_416 = IMG.scale_points_WH(pred_uvd[idx], (1920, 1080), (416, 416))
uvd_gt_416 = IMG.scale_points_WH(uvd_gt[idx], (1920, 1080), (416, 416))
FPHA.visualize_joints_2d(ax, pred_uvd_416[FPHA.REORDER_IDX], joint_idxs=False, c='r')
img_rsz = IMG.resize_img(img, (416, 416))
ax.imshow(img_rsz.astype('uint32'))

# red is the best
# yellow is anything over 0.9
import matplotlib.patches as patches
for i in range(len(max_idx)):
    index = np.unravel_index(i, (13, 13))
    x = index[1]
    y = index[0]
    al = pred_conf[idx][max_idx[i]]
    if al == np.amax(pred_conf[idx]):
        c = 'r'
    elif al <= 0.8:
        c = 'b'
    else:
        c = 'y'
    rect = patches.Rectangle((x*32,y*32),32,32,linewidth=1, edgecolor=c, facecolor=c, fill=True, alpha=al)
    ax.add_patch(rect)

In [None]:
print('%s UVD mean_l2_error: ' %data_split, FPHA.mean_L2_error(uvd_gt[:len(pred_uvd)], pred_uvd))
print('%s XYZ mean_l2_error: ' %data_split, FPHA.mean_L2_error(xyz_gt[:len(pred_uvd)], pred_xyz))
error = []
for i, (pred, uvd) in enumerate(zip(pred_uvd, uvd_gt)):
#     print(i, FPHA.mean_L2_error(uvd, pred))
    error.append(FPHA.mean_L2_error(uvd, pred))
error = np.asarray(error)
min_error_idx = np.argmin(error)
max_error_idx = np.argmax(error)
print('Best Pose id:', min_error_idx, 'uvd_error:', error[min_error_idx])
print('Worst Pose id:', max_error_idx, 'uvd_error:', error[max_error_idx])
for idx in np.argsort(error):
    print(idx)

In [None]:
pck = FPHA.percentage_frames_within_error_curve(xyz_gt[:len(pred_uvd)], pred_xyz)
pck_str = ''
for p in pck:
    pck_str += str(p) + ', '
print(pck_str)
thresholds = np.arange(0, 85, 5)
print('AUC:', FPHA.calc_auc(pck, thresholds))

# Create GIF

In [None]:
from moviepy.editor import ImageSequenceClip
from tqdm import tqdm
from matplotlib.backends.backend_agg import FigureCanvasAgg as FigureCanvas
from matplotlib.figure import Figure


seq = 'Subject_3/put_salt/3'
# seq = 'Subject_3/squeeze_paper/3'
# seq = 'Subject_2/charge_cell_phone/1'
SAVE_DIR = Path(DATA_DIR)/'acv-data'/'gifs'
seq_keys_list = [(i, k.split('/')) for i, k in enumerate(keys) if seq in k]

index_list, seq_list = zip(*seq_keys_list)

seq_list = [int(i[-1].split('_')[-1].split('.')[0]) for i in seq_list]
ind = np.argsort(seq_list).astype('uint32')

index_list = np.asarray(index_list)
index_list = index_list[ind]

frames = []
for idx in tqdm(index_list):
    idx = int(idx)
    img = Image.open(os.path.join(DATA_DIR, 'First_Person_Action_Benchmark', 'Video_files', keys[idx]))
    if cfg['pred_img_side'] == 'left':
        img = img.transpose(Image.FLIP_LEFT_RIGHT)    
    img = np.asarray(img)

    fig, ax = plt.subplots()
    ax = fig.gca()
    ax.imshow(img)
    ax.axis('off')
    FPHA.visualize_joints_2d(ax, pred_uvd_left[idx][FPHA.REORDER_IDX], joint_idxs=False, c='r')
    FPHA.visualize_joints_2d(ax, pred_uvd_right[idx][FPHA.REORDER_IDX], joint_idxs=False, c='g')
    FPHA.visualize_joints_2d(ax, uvd_gt[idx][FPHA.REORDER_IDX], joint_idxs=False, c='b')
    fig.canvas.draw()
    data = np.fromstring(fig.canvas.tostring_rgb(), dtype=np.uint8, sep='')
    data = data.reshape(fig.canvas.get_width_height()[::-1] + (3,))
    frames.append(data)

    plt.close() 
    
segment_clip = ImageSequenceClip(frames, fps=24)
name = SAVE_DIR/('{}_{}_{}_{}.gif'.format(cfgname, epoch, data_split, seq.replace('/', '_')))
segment_clip.write_gif(name, fps=24)
from IPython.display import Image as IPythonImage
with open(name,'rb') as f:
    display(IPythonImage(data=f.read(), format='png'))

# Detect

In [None]:
import torch

from src.models import get_model
from src.utils import EK

# cfg['device'] = 2
model = get_model(cfg, False, epoch, None, None)

In [None]:
# Image

img = Image.open(EK.get_img_path(22, 16, 11616))
# idx = 1000
# img = Image.open(os.path.join(DATA_DIR, 'First_Person_Action_Benchmark', 'Video_files', keys[idx]))
pred = model.detect(img)
pred_left, pred_right = pred

In [None]:
fig, ax = plt.subplots()
ax.imshow(img)
FPHA.visualize_joints_2d(ax, pred_left[FPHA.REORDER_IDX], joint_idxs=False, c='r')
FPHA.visualize_joints_2d(ax, pred_right[FPHA.REORDER_IDX], joint_idxs=False, c='g')

In [None]:
# Video
dom = 'action'
modality = 'rgb'
data_split_vid = 'train'

# Single vid
vid_idx = 7119
all_img_path = EK.get_video_frames(vid_idx, dom=dom, modality=modality, data_split=data_split_vid)

# Multi vid
# start_vid = 54
# end_vid = 57
# vid_idx = '{}-{}'.format(start_vid, end_vid)
# all_img_path = []
# for idx in range(54, end_vid + 1):
#     cur_img_paths = EK.get_video_frames(idx, dom=dom, modality=modality, data_split=data_split_vid)
#     all_img_path += cur_img_paths

# print(len(all_img_path))

In [None]:
from moviepy.editor import ImageSequenceClip
from tqdm import tqdm
from matplotlib.backends.backend_agg import FigureCanvasAgg as FigureCanvas
from matplotlib.figure import Figure
import time
SAVE_DIR = Path(DATA_DIR)/'acv-data'/'gifs'

frames = []
total_time = 0
for img_path in tqdm(all_img_path):
    img = Image.open(img_path)
    start = time.time()
    pred_left, pred_right = model.detect(img)
    end = time.time()
    total_time += (end - start)
    fig, ax = plt.subplots()
    ax = fig.gca()
    ax.imshow(img)
    ax.axis('off')
    FPHA.visualize_joints_2d(ax, pred_left[FPHA.REORDER_IDX], joint_idxs=False, c='r')
    FPHA.visualize_joints_2d(ax, pred_right[FPHA.REORDER_IDX], joint_idxs=False, c='g')
    fig.canvas.draw()
    data = np.fromstring(fig.canvas.tostring_rgb(), dtype=np.uint8, sep='')
    data = data.reshape(fig.canvas.get_width_height()[::-1] + (3,))
    frames.append(data)
    plt.close()


print('fps:', len(all_img_path)/total_time)
segment_clip = ImageSequenceClip(frames, fps=60)
name = SAVE_DIR/('{}_{}_{}_{}_{}_{}.gif'.format(cfgname, epoch, vid_idx, dom, modality, data_split_vid))
segment_clip.write_gif(name, fps=6)
from IPython.display import Image as IPythonImage
with open(name,'rb') as f:
    display(IPythonImage(data=f.read(), format='png'))