In [None]:
import cv2 as cv
import numpy as np
import scipy.io
from tqdm import tqdm
from itertools import product

In [None]:
root = 'D:/data/MPI-INF-3DHP'

available_subject = [1, 2, ]
available_sequence = [1, 2, ]
available_camera = [camera for camera in range(14)]

In [None]:
class SequentialDictionary:
    '''The custom dictionary class
    
    You can use a dictionary with multiple indices, i.e. x['1st dim']['2nd dim'] = 2.
    '''
    
    def __init__(self):
        self.data = dict()
    
    def __getitem__(self, index):
        if index not in self.data.keys():
            self.data[index] = SequentialDictionary()
        return self.data[index]
    
    def __setitem__(self, index, value):
        self.data[index] = value
    
    def __len__(self):
        length = 0
        for key, value in self.data.items():
            if type(value) is SequentialDictionary:
                length = length + len(value)
            else:
                length = length + 1
        return length

In [None]:
# Video
VIDEO_RGB = 'imageSequence'
VIDEO_MASK_HUMAN_AND_CHAIR = 'FGmasks'
VIDEO_MASK_CHAIR = 'ChairMasks'

available_format = [
    VIDEO_RGB,
    VIDEO_MASK_HUMAN_AND_CHAIR,
    VIDEO_MASK_CHAIR,
]

video_path = '{root}/{subject}/{sequence}/{format}/video_{camera}.avi'
video = SequentialDictionary()

available_video = product(*[
    available_subject, 
    available_sequence, 
    available_format, 
    available_camera,
])
total = len(available_subject) * len(available_sequence) * len(available_format) * len(available_camera)

for subject, sequence, format, camera in tqdm(available_video, total=total):
    video[subject][sequence][format][camera] = cv.VideoCapture(video_path.format(
        root=root,
        subject='S%d' % subject,
        sequence='Seq%d' % sequence,
        format=format,
        camera=camera,
    ))

In [None]:
# annotation
ANNOT_CAMERA_2D = 'annot2'
ANNOT_CAMERA_3D = 'annot3'
ANNOT_WORLD_3D = 'univ_annot3'
ANNOT_CAMERA_CALI = 'cameras'

annot_path = '{root}/{subject}/{sequence}/annot.mat'
annot = SequentialDictionary()

available_annot = product(*[
    available_subject, 
    available_sequence,
])
total = len(available_subject) * len(available_sequence)

for subject, sequence, in tqdm(available_annot, total=total):
    annot[subject][sequence] = scipy.io.loadmat(annot_path.format(
        root=root,
        subject='S%d' % subject,
        sequence='Seq%d' % sequence,
    ))
    progress.update(1)

In [None]:
# camera parameters
CAMERA_INTRINSIC = 'intrinsic'
CAMERA_EXTRINSIC = 'extrinsic'

camera_path = '{root}/{subject}/{sequence}/camera.calibration'
camera_parameter = SequentialDictionary()

available_camera_parameter = product(*[
    available_subject, 
    available_sequence,
])
total = len(available_subject) * len(available_sequence)

for subject, sequence, in tqdm(available_camera_parameter, total=total):
    camera_index = -1
    with open(camera_path.format(
        root=root,
        subject='S%d' % subject,
        sequence='Seq%d' % sequence,
    ), 'r') as file:
        for line in file:
            word = line.strip().split() # remove whilespace

            if word[0] == 'name':
                camera_index = int(word[-1])

            elif word[0] == CAMERA_INTRINSIC:
                mat = np.reshape(np.asarray(word[1:], dtype=np.float), newshape=(4, 4))
                mat = mat[0:3, 0:3]
                camera_parameter[subject][sequence][camera_index][CAMERA_INTRINSIC] = mat
            elif word[0] == CAMERA_EXTRINSIC:
                mat = np.reshape(np.asarray(word[1:], dtype=np.float), newshape=(4, 4))
                mat = mat[0:3, 0:4]
                camera_parameter[subject][sequence][camera_index][CAMERA_EXTRINSIC] = mat
            else:
                continue

In [None]:
subject = 2
sequence = 2
camera = 8
frame = 572

image = SequentialDictionary()
for format in available_format:
    video[subject][sequence][format][camera].set(cv.CAP_PROP_POS_FRAMES, frame)
    success, image[format] = video[subject][sequence][format][camera].read()
    assert success

in_3D = np.reshape(annot[subject][sequence][ANNOT_CAMERA_3D][camera, 0][frame], newshape=(-1, 3))

num_keypoints = len(in_3D)

# reshape for easy matrix multiplication
in_3D = np.concatenate((in_3D, np.ones(shape=(num_keypoints, 1))), axis=1).transpose(1, 0)
identity_transform = np.concatenate((np.eye(3), np.ones(shape=(3, 1))), axis=1)

projected = np.matmul(identity_transform, in_3D)
projected = np.matmul(camera_parameter[subject][sequence][camera][CAMERA_INTRINSIC], projected)
projected = projected / projected[-1, :]
projected = projected.transpose(1, 0)

for keypoint in projected:
    x, y, _ = keypoint
    
    for tx in range(-10, 10):
        for ty in range(-10, 10):
            xx = x + tx
            yy = y + ty
            
            if xx < 0 or image[VIDEO_RGB].shape[1] <= xx \
            or yy < 0 or image[VIDEO_RGB].shape[0] <= yy:
                continue
            
            image[VIDEO_RGB][int(yy), int(xx), :] = [255, 0, 0]

            
for format in available_format:
    success = cv.imwrite('{format}.jpg'.format(format=format), image[format])
    assert success

In [None]:
available_video = product(*[
    available_subject, 
    available_sequence, 
    available_format, 
    available_camera,
])

for subject, sequence, format, camera in available_video:
    video[subject][sequence][format][camera].release()

In [None]:
# print('Video:', video_path)
# print('Open:', video.isOpened())
# print('Resolution:', '%dx%d' % (video.get(cv.CAP_PROP_FRAME_WIDTH), video.get(cv.CAP_PROP_FRAME_HEIGHT)))
# print('Total frames:', video.get(cv.CAP_PROP_FRAME_COUNT))
# print('Frame-rate:', video.get(cv.CAP_PROP_FPS))
# print('OpenCV:', cv.__version__)