# VIRAT Dataset Preprocessing Report

In [1]:
EXAMPLE                = 'VIRAT_S_000201_07_001485_001581'
SELECTED_OBJECT_TYPE   = [ 2, 3]  # only pick pedastrian, car or vehicles
object_anno_fields     = ['object_id', 'object_duration', 'current_frame','left_top_x','left_top_y', 'width', 'height', 'object_type']

DEFAULT_ANNOTATION_DIR = './annotations'
DEFAULT_VIDEO_DIR      = './videos_original'


In [2]:
import cv2
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

In [3]:
class Color(object):
    '''Simple Color Mapper'''
    red    = [255, 0, 0]
    green  = [0, 255, 0]
    blue   = [0, 0, 255]
    yellow = [255, 255, 0]
    def random(self): return np.random.randint(0, 255, size=3)
    
def generate_object_trajectory(image, boxes_list, color, opacity=50):
    ''' Visualize object path on video using list of boxes from ground truths'''
    copied_img = np.copy(image)
    transparency = opacity/100.
    for (p1, p2) in boxes_list:
        # Calculate centroid
        x = p1[0] + (p2[0] - p1[0])/2.
        y = p1[1] + (p2[1] - p1[1])/2.
        centroid = (int(x), int(y))
        
        drawed_img = cv2.circle(copied_img, centroid, radius=3, color=color)
        # apply the overlay
        cv2.addWeighted(copied_img, 1 - transparency, 
                        drawed_img, transparency, 0, copied_img)
    return copied_img

def generate_focused_area_mask(image, boxes_list, offset, color, thickness=5):
    '''Gernate focused area'''
    
    bboxes = np.array(boxes_list)
    height, width, _ = image.shape
    
    min_x = max(min(filter_ouliner(bboxes[:, 0, 0] - offset)), 0.)
    min_y = max(min(filter_ouliner(bboxes[:, 0, 1] - offset)), 0.)
    max_x = min(max(bboxes[:, 1, 0] + offset), width)
    max_y = min(max(bboxes[:, 1, 1] + offset), height)
    
    
    mask = np.zeros_like(image)
    mask = cv2.rectangle(mask, (int(min_x), int(min_y)), (int(max_x), int(max_y)), color, thickness=5)

    return mask

def filter_ouliner(data,m = 2):
    data = data[abs(data - np.mean(data)) < m * np.std(data)]
    if len(data) > 0:
        return data
    else:
        return [np.mean(data)]


## Proccess Object Annotation File 
File format: `*viratdata.objects.txt`

|Column| Type |Comment|
|---|:------|:--------|
|1| Object id|a unique identifier of an object track. Unique within a file.|
|2| Object duration | duration of the object track|
|3| Currnet frame|corresponding frame number|
|4| bbox lefttop x|horizontal x coordinate of the left top of bbox, origin is lefttop of the frame|
|5| bbox lefttop y|vertical y coordinate of the left top of bbox, origin is lefttop of the frame|
|6| bbox width|horizontal width of the bbox|
|7| bbox height|vertical height of the bbox|
|8| Object Type|object type|

Object Type ID (for column 8 above for object files)

ID | Type|
---|:----|
1| person
2| car              (usually passenger vehicles such as sedan, truck)
3| vehicles         (vehicles other than usual passenger cars. Examples include construction vehicles)
4| object           (neither car or person, usually carried objects)
5| bike, bicylces   (may include engine-powered auto-bikes)

In [4]:
import os
import pandas as pd

annotation_path = os.path.join(DEFAULT_ANNOTATION_DIR, EXAMPLE + '.viratdata.objects.txt')

    
# Load file into data frames
df = pd.read_csv(annotation_path, delim_whitespace =True, names=object_anno_fields)

# Group each objects appeared in the video by object id
separated_objects_by_id = df.groupby('object_id')

# Generate a list of _boxes for each objects appear in the video
object_bboxes = []
temp_frames = []

for id, obj in separated_objects_by_id:
    # Only ke
    if obj['object_type'].values[0] not in SELECTED_OBJECT_TYPE:
        print('Object is not in selected object type')
        continue
    # Extract the bounding boxes
    upper_left_pts   = zip(obj['left_top_x'].values, obj['left_top_y'].values)
    width_height_lst = zip(obj['width'].values,      obj['height'].values)
    lower_right_pts  = [(px + w, py + h) for (px, py),(w, h) in zip(upper_left_pts, width_height_lst)]
    bbox_lst = [[(px1, py1), (px2, py2)] for (px1, py1), (px2, py2) in zip(upper_left_pts, lower_right_pts)]
    object_bboxes.append(bbox_lst)
    temp_frames.append(obj)
  
print "Number of annotated objects in the video %s"%len(object_bboxes)
grouped_objects = pd.concat(temp_frames).groupby('object_id')

Object is not in selected object type
Object is not in selected object type
Number of annotated objects in the video 3


In [5]:
# pick random object in grouped object
import random
SELECTED_OBJECT =  random.choice(grouped_objects.groups.keys()) # ID of object in the video
OBJECT_IDX      = grouped_objects.groups.keys().index(SELECTED_OBJECT)
print SELECTED_OBJECT
print grouped_objects.groups.keys()

3
[3, 4, 5]


In [6]:
# Download FFMEG to display video if not available
import imageio
imageio.plugins.ffmpeg.download()
from moviepy.editor import *

# Get path
video_path = os.path.join(DEFAULT_VIDEO_DIR, EXAMPLE+'.mp4')

# Construct video
videoclip = VideoFileClip(video_path)

# In this example, we only show the video when the selected object appears
appear_frames = grouped_objects.get_group(SELECTED_OBJECT)['current_frame'].values
start_frame = np.min(appear_frames)
end_frame = np.max(appear_frames)

# Select duration object appears
clip = videoclip.subclip(start_frame/videoclip.fps, end_frame/videoclip.fps)

# Get object bounding boxes\
bboxes_of_object = object_bboxes[OBJECT_IDX]
print "Number of required frames: %s"%len(bboxes_of_object)

Number of required frames: 2875


In [7]:
# Split each bboxes into chunk for generating focused area
bbox_arr = np.asarray(bboxes_of_object)
num_chunk =  len(bbox_arr) / 50
bbox_chunks = np.array_split(bbox_arr,num_chunk)

# Generate a image mask, size of the clip's dimension, contains path of the object
empty_mask = np.zeros_like(clip.get_frame(0))

object_trajectory = generate_object_trajectory(empty_mask, 
                                               bboxes_of_object, 
                                               color=Color.green, 
                                               opacity=60)
    
# For each frames, draw all the appear bounding boxes:
processed_frames = []
prev_frames = 0

for bbox_chunk in bbox_chunks:
    
    prev_frames += len(bbox_chunk)
    focused_area_mask =  generate_focused_area_mask(empty_mask, 
                                                    bbox_chunk, 
                                                    color=Color.yellow, 
                                                    offset=45)
    for idx, (p1, p2) in enumerate(bbox_chunk):
        # Get current frame from clip
        frame = clip.get_frame(float(idx+prev_frames-60)/clip.fps)
        
        # Draw bounding box of the object on current frame
        frame = cv2.rectangle(frame, (p1[0], p1[1]), (p2[0], p2[1]), color=Color.green, thickness=4)

        # Add object flow mask to current frame
        flow_frame = cv2.addWeighted(frame, 1.0, object_trajectory, 0.5, 0)
        
        # Add focused area to frame
        new_frame = cv2.addWeighted(flow_frame, 1.0, focused_area_mask, 1.0, 0)

        processed_frames.append(new_frame)
        del new_frame, frame

ValueError: cannot convert float NaN to integer

In [None]:
# Generate new video clip
new_clip = ImageSequenceClip(processed_frames, fps=clip.fps)
# ipython_display(new_clip, width=320)
new_clip.write_videofile('processed_clip_car.mp4', fps=clip.fps)