In [1]:
import os
import sys
import numpy as np
import matplotlib.pyplot as plt

In [115]:
# indices to columns of the ground-truth matrix
key = 'camera ID frame left top width height worldX worldY feetX feetyY'.split()
key = {k: v for v, k in enumerate(key)}

# cameras are synshcornised with respect to camera #5 and these are frame offsets
start_frame_nums = [5543, 3607, 27244, 31182, 1, 22402, 18968, 46766]

# num pixels in the longer side of the original videos
original_longer = 1920

# relative path to the folder with all frames
frames_folder = 'frames'

In [114]:
# path to the ground-truth file
gt_path = 'trainvalRaw.mat'
# path = 'trainval.mat'

# path to the folder with extracted frames
data_path = 'processed/camera2_2'

camera_num = 2
max_n_objects = 3

# region of interest as y, x, h, w in the downscaled image
roi = 13, 48, 64, 64

# fraction of object that needs to be within the roi to be counted as present
intersection_threshold = .05
assert 0. <= intersection_threshold <= 1.

# num pixels in the longer side of downscaled videos
downscaled_longer = 160

In [116]:
ratio = float(original_longer) / downscaled_longer
original_roi = ratio * np.asarray(roi)
start_frame_num = start_frame_nums[camera_num - 1]

frames_folder = os.path.join(data_path, frames_folder)

In [17]:
print 'Loading "{}"'.format(os.path.basename(gt_path))

if 'Raw' in path:
    import h5py
    f = h5py.File(gt_path, 'r')
    data = f['trainData'][()].T
else:
    import scipy.io
    f = scipy.io.loadmat(gt_path)
    data = f['trainData']

Loading "trainvalRaw.mat"


In [86]:
# Select data for the specified camera and shift frame nums by the starting frame num
camera_idx = np.equal(data[:, 0], camera_num)
frames, left, top, width, height = np.split(data[camera_idx, 2:7], 5, -1)

boxes = np.concatenate([top, left, height, width], -1)
frames -= start_frame_num

objects = np.concatenate([frames, boxes], -1) # frame, bbox

print 'Total number of objects:', objects.shape[0]
print 'Frames from {} to {}'.format(frames.min(), frames.max())

Total number of objects: 738050
Frames from 40518.0 to 220327.0


In [70]:
# Select objects that are only withing the region of interest
def intersection(bbox, roi):
    """Computes area of intersection between boxes in `bbox` and boxes in `roi`.
    Dimensions in both should be given as (y, x, h, w).
    
    :param bbox: np.array of shape [N, 4] 
    :param roi: np.array of shape [4] or [k, 4] where k \in {1, N}
    :return: np.array of shape [N]
    """
    
    while len(roi.shape) < len(bbox.shape):
        roi = roi[np.newaxis, ...]
    
    y_top = np.maximum(bbox[..., 0], roi[..., 0])
    x_left = np.maximum(bbox[..., 1], roi[..., 1])
    y_bottom = np.minimum(bbox[..., 0] + bbox[..., 2], roi[..., 0] + roi[..., 2])
    x_right = np.minimum(bbox[..., 1] + bbox[..., 3], roi[..., 1] + roi[..., 3])

    invalid_x = np.less(x_right, x_left)
    invalid_y = np.less(y_bottom, y_top)
    invalid = np.logical_or(invalid_x, invalid_y)

    # The intersection of two axis-aligned bounding boxes is always an
    # axis-aligned bounding box
    intersection_area = (x_right - x_left) * (y_bottom - y_top)
    intersection_area[invalid] = 0.
    return intersection_area

intersection_area = intersection(objects[..., 1:], original_roi)
object_area = objects[..., 3] * objects[..., 4]
fraction_of_object_area_in_roi = intersection_area / (object_area + 1e-8)
objects_within_roi = np.greater(fraction_of_object_area_in_roi, intersection_threshold)

objects = objects[objects_within_roi]

print 'Total number of objects within ROI:', objects.shape[0]

Total number of objects within ROI: 420104


In [146]:
# Count objects in all frames
unique_frames, object_counts = np.unique(objects[..., 0], return_counts=True)

max_frame = int(objects[:, 0].max())
frames_and_counts = np.zeros((max_frame, 2), dtype=np.int32)
frames_and_counts[:, 0] = np.arange(1, max_frame + 1)
frames_and_counts[unique_frames.astype(np.int32) - 1, 1] = object_counts.astype(np.int32)

In [147]:
# Find frames with up to `max_n_objects`
up_to_n_objects = np.less_equal(frames_and_counts[:, 1], max_n_objects)
no_objects = np.equal(frames_and_counts[:, 1], 0)


allowed_frames = frames_and_counts[up_to_n_objects, 0]
print 'There are {} frames where the number of objects is between 1 and {}.'.format(up_to_n_objects.sum() - no_objects.sum(), max_n_objects)
print 'There are {} frames with zero objects.'.format(no_objects.sum())

There are 82279 frames where the number of objects is between 1 and 3.
There are 48235 frames with zero objects.


In [137]:
# Find all frames we have
def find_frames(path):
    frames_paths = [os.path.join(path, p) for p in os.listdir(path) if p.endswith('jpeg')]
    frames = {int(p.split('_')[-1].split('.')[0]): p for p in frames_paths}
    return frames

frames = find_frames(frames_folder)
frame_nums = sorted(frames.keys())
print frame_nums[:10]

[30, 60, 90, 120, 150, 180, 210, 240, 270, 300]


In [162]:
allowed_and_present_frames = set(frame_nums).intersection(allowed_frames)
allowed_and_present_frames = np.asarray(sorted(list(allowed_and_present_frames)))

print 'There are {} present frames that have less than {} objects.'.format(len(allowed_and_present_frames), max_n_objects)

There are 4351 present frames that have less than 3 objects.


In [179]:
object_counts_in_present_frames = frames_and_counts[allowed_and_present_frames - 1, 1]
print len(object_counts_in_present_frames)
for i in xrange(0, len(object_counts_in_present_frames), 30):
    print object_counts_in_present_frames[i:i+30]

4351
[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
[0 