Put all videos into a single path under data/A1_A2_videos/. There should be 60 ".MP4" under this directory

Processed annotations is data/annotations/annotation_A1.edited.csv.


### 1. Generate files for training on A1 videos

In [1]:
import argparse
import os
import decord
from collections import defaultdict
import numpy as np

In [39]:
# Read train data
df = pd.read_csv('../data/annotations/annotation_A1.edited.csv',
                 names=['userID','fileName','cameraView','activityType', 'startTime','endTime','labelClass','appearanceBlock'], header=0)

In [41]:
df.loc[df['cameraView'].str.contains('Dashboard') == True, 'cView'] = "1"
df.loc[df['cameraView'].str.contains('Right') == True, 'cView'] = "2"
df.loc[df['cameraView'].str.contains('Rear') == True, 'cView'] = "3"

In [53]:
df = df.drop(['cameraView','activityType'], axis=1)

In [54]:
old_cols = df.columns.values 
old_cols

array(['userID', 'fileName', 'startTime', 'endTime', 'labelClass',
       'appearanceBlock', 'cView'], dtype=object)

In [55]:
new_cols= ['fileName', 'startTime', 'endTime', 'userID','cView','labelClass']
df = df.reindex(columns=new_cols)

In [56]:
df

Unnamed: 0,fileName,startTime,endTime,userID,cView,labelClass
0,Rightside_user_id_24491_1,0:00,0:17,24491,2,
1,Rightside_user_id_24491_1,0:18,0:45,24491,2,3
2,Rightside_user_id_24491_1,0:45,0:54,24491,2,14
3,Rightside_user_id_24491_1,1:14,1:45,24491,2,2
4,Rightside_user_id_24491_1,1:53,1:57,24491,2,8
...,...,...,...,...,...,...
554,Rightside_user_id_49381_1,06:41,07:05,49381,2,16
555,Rightside_user_id_49381_1,07:15,07:31,49381,2,10
556,Rightside_user_id_49381_1,07:41,07:58,49381,2,5
557,Rightside_user_id_49381_1,08:08,08:23,49381,2,11


In [2]:
def time2int(time_str):
    # 00:18 to integer seconds
    minutes, seconds = time_str.split(":")
    minutes = int(minutes)
    seconds = int(seconds)
    return minutes*60 + seconds

def int2time(secs):
    # seconds to 00:00
    m, s = divmod(secs, 60)
    if s >= 10.0:
        return "%02d:%.3f" % (m, s)
    else:
        return "%02d:0%.3f" % (m, s)

def process_file_name(file_name, user_id, view):
    # Rightside_user_id_24491_1, Rightside_window -> Rightside_window_user_id_24491_NoAudio_1
    perform_id = file_name[-1]
    # Dashboard_User_id_24026_NoAudio_3.MP4
    if user_id == "38508":  # junwei: wow
        user_id = "38058"
    if user_id in ["24026", "38058"]:
        if view == "Rightside_window":
            view = "Right_side_window"  # junwei: srsly?
        if view == "Rearview":
            view = "Rear_view"
        if view == "Rightside window":
            view = "Right_side_window"

        return "%s_User_id_%s_NoAudio_%s" % (view, user_id, perform_id)
    if user_id in ["35133"]:
        if view == "Rearview":
            view = "Rear_view"
        if view == "Rightside window":
            view = "Rightside_window"
    if user_id in ["49381"]:
        if view == "Rear_view":
            view = "Rearview_mirror"
        if view == "Rightside_window":
            view = "Right_window"

    return "%s_user_id_%s_NoAudio_%s" % (view, user_id, perform_id)

In [13]:
anno_file = "../data/annotations/annotation_A1.edited.csv"
video_path = "../data/A1_A2_videos/"  # get the total seconds of the video
out_anno_file = "../data/annotations/processed_anno_original.csv"
clip_cmds = "A1_cut.sh"  # ffmpeg bash file for cutting the videos into clips
target_path = "../data/A1_clips/"
resolution = "-2:540"

#### Creat processed_anno_original.csv and Get the processed annotations and video cutting cmds



In [18]:
def processing_anno_original():
    data = defaultdict(list)  # video_file to segments
    users = {}
    action_lengths = []
    action_id_to_count = defaultdict(int)
    vid_to_seg = defaultdict(dict)  # video_file to segment, make sure no overlap
    # compute some stats
    # 1. the action id num, the length stats
    for line in open(anno_file, "r").readlines()[1:]:
        user_id, video_file_name, view, _, start, end, action_id, block = line.strip().split(",")
        users[user_id] = 1
        #original video has "NoAudio" but annotation does not
        video_file_name = "%s.MP4" % process_file_name(video_file_name.strip(), user_id.strip(), view.strip())

        start = time2int(start)
        end = time2int(end)
        # action_id could be 0-17, and "NA"

        #action_id = int(action_id)
        #assert action_id in range(18), line

        action_id = action_id.strip()
        action_id_to_count[action_id] += 1

        # assert no overlap
        assert (start, end) not in vid_to_seg[video_file_name], line
        vid_to_seg[video_file_name][(start, end)] = 1

        action_lengths.append(end - start)

        data[video_file_name].append((user_id, video_file_name, start, end, action_id))

    print(action_id_to_count)
    # user num: 5, action length min/max/median: 3, 38, 20.0
    print("user num: %s, action length min/max/median: %s, %s, %s" % (
        len(users),
        min(action_lengths), max(action_lengths), np.median(action_lengths)))

    # get the max length of each video, and check non-annotated segment length
    total_empty, total_length = 0, 0
    data_empty = {}  # video_file -> empty segments
    for video_file in data:
        video = os.path.join(video_path, video_file)
        vcap = decord.VideoReader(video)
        num_frame = len(vcap)
        max_length = int(num_frame / 30.0)
        anno_max_length = data[video_file][-1][3]
        user_id = data[video_file][0][0]

        anno_segments = [(None, None, 0, 0, 0)] + data[video_file]

        if max_length > anno_max_length:
            print("%s anno ends on %s, has %s total" % (video_file, anno_max_length, max_length))
            anno_segments += [(None, None, max_length, 0, 0)]
        elif max_length < anno_max_length:
            print("warning for %s, %s, %s" % (video_file, anno_segments[-1], max_length))
            # some annotation might be longer than the video

        empty_segments = []
        for s1, s2 in zip(anno_segments[0:-1], anno_segments[1:]):
            last_end = s1[3]
            next_start = s2[2]

            gap = next_start - last_end
            if gap > 0:
                empty_segments.append((user_id, video_file, last_end, next_start, "empty"))
                total_empty += gap
            elif gap < 0:
                print(s1, s2)
                sys.exit()

        data_empty[video_file] = empty_segments
        total_length += max_length
    print("total length %s, empty %s" % (total_length, total_empty))

    # write the annotation file
    video_clips = []  # video_file_name.user_id.start.end.mp4
    with open(out_anno_file, "w") as f:

        for video_file in data:

            anno_segs = data[video_file]
            empty_segs = data_empty[video_file]
            for user_id, _, start, end, action_id in anno_segs + empty_segs:
                video_id = "%s.%s.%d.%d.MP4" % (
                    os.path.splitext(video_file)[0],
                    user_id, start, end)
                if action_id == "NA":
                    action_id = -1
                elif action_id == "empty":
                    action_id = -2
                action_id = int(action_id)
                video_clips.append((video_file, int2time(start), int2time(end), video_id))

                f.writelines("%s %d\n" % (video_id, action_id))

    # write the cutting command

    with open(clip_cmds, "w") as f:
        for ori_video, start, end, target_clip in video_clips:
            f.writelines("ffmpeg -nostdin -y -i %s -vf scale=%s -c:v libx264 -ss %s -to %s %s\n" % (
                os.path.join(video_path, ori_video),
                resolution,
                start, end,
                os.path.join(target_path, target_clip)))

In [19]:
#processing_anno_original()

Cut the videos (you can also directly run bash).

mkdir data/A1_clips

parallel -j 4 < A1_cut.sh


### 2. Make annotation splits (without empty segments, see paper for details)

In [21]:
anno_file = "../data/annotations/processed_anno_original.csv"
out_path = "../data/annotations/pyslowfast_anno_na0" 
method = 1 # help="If method = 1:, NA to 0,If method = 2: NA and empty to 0"


In [22]:

def annotationsplits():
    data = defaultdict(list)  # user_id ->
    for line in open(anno_file).readlines():
        video_file, action_id = line.strip().split()
        user_id = video_file.split(".")[1]
        action_id = int(action_id)
        if action_id in [-1, -2]:
            if method == 1:
                if action_id == -2:
                    continue
                else:
                    action_id = 0
            else:
                action_id = 0

        assert action_id in range(18), action_id
        data[user_id].append((video_file, action_id))

    print("total user %s" % len(data))

    # each user as a validation set
    for i, user_id in enumerate(data.keys()):
        target_path = os.path.join(out_path, "splits_%s" % (i+1))
        val_data = data[user_id]
        train_data = []
        for t_user_id in data:
            if t_user_id != user_id:
                train_data += data[t_user_id]
        print("train %s, val %s" % (len(train_data), len(val_data)))

        os.makedirs(target_path, exist_ok=True)
        train_file = os.path.join(target_path, "train.csv")
        val_file = os.path.join(target_path, "val.csv")
        with open(train_file, "w") as f:
            for one in train_data:
                f.writelines("%s %s\n" % (one[0], one[1]))
        with open(val_file, "w") as f:
            for one in val_data:
                f.writelines("%s %s\n" % (one[0], one[1]))

In [23]:
annotationsplits()

total user 5
train 448, val 111
train 441, val 118
train 451, val 108
train 451, val 108
train 445, val 114


#### Merging annotation files for training on the whole A1 set

In [None]:
mkdir data/annotations/pyslowfast_anno_na0/full

cat data/annotations/pyslowfast_anno_na0/splits_1/train.csv data/annotations/pyslowfast_anno_na0/splits_1/val.csv \
data/annotations/pyslowfast_anno_na0/full/train.csv
cp data/annotations/pyslowfast_anno_na0/splits_1/val.csv \
data/annotations/pyslowfast_anno_na0/full/