In [None]:
# Copyright 2017 The PARSE-ego4D Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

In [None]:
def load_spreadsheet(end_idx=None):
  pass

def load_ego4d_narration_data():
  pass

def copy_ego4d_video_to_local(remove_video_path, local_video_path):
  pass

def get_video_file_size_mb(video_id):
  pass

def get_video_duration_from_video_id(video_id):
  pass

def get_processed_video_fnames():
  pass

def save_local_file_to_drive(local_video_path, local_video_fname):
  pass

In [None]:
from functools import partial
from tqdm.notebook import tqdm
import cv2
import json
import numpy as np
import os

########################################################
## Loading data
########################################################

def load_data(end_idx=None):
  """Return a pandas dataframe with the Ego4D suggestion data.
  The dataframe should contain columns: Line, Video ID, Batch idx, Batch size.
  """
  df = load_spreadsheet(end_idx=end_idx)
  df = df[["Line", "Video ID", "Batch idx", "Batch size"]]
  df = df.drop_duplicates(subset=['Video ID', 'Line', 'Batch idx', 'Batch size'])
  df = df.sort_values(by=['Video ID', 'Batch idx', 'Batch size', 'Line'], ascending=True).reset_index(drop=True)
  return df


def load_narration_data(df):
  """Return a dictionary of video ID -> array of sentence timestamps."""
  used_vids = set(df['Video ID'].unique())
  ndata = load_ego4d_narration_data()
  data = {}
  for video_id in used_vids:
    if "narration_pass_1" not in ndata[video_id] or "narrations" not in ndata[video_id]["narration_pass_1"]:
      print(f"Video ID: {video_id} failed. narrations not found.")
      continue
    data[video_id] = np.array([e['timestamp_sec'] for e in ndata[video_id]["narration_pass_1"]['narrations']])
  return data

########################################################
## Data processing and cleaning
########################################################

def add_timestamps(d, row):
  vid = row['Video ID']

  if row['Line'].strip() == "":
    row["t_start"] = None
    row["t_end"] = None
    row["t_prestart"] = None
    return row

  sentence_idx = int(row['Line']) + int(row['Batch idx']) * int(row['Batch size'])
  if sentence_idx == d[vid].shape[0]:
    sentence_idx -= 1
  row["t_prestart"] = d[vid][sentence_idx-1] if sentence_idx-1 >= 0 else None
  row["t_start"] = d[vid][sentence_idx]
  row["t_end"] = d[vid][sentence_idx+1] if sentence_idx+1 < d[vid].shape[0] else None
  return row


def add_file_size(row):
  row['File size (MiB)'] = get_video_file_size_mb(row['Video ID'])
  return row


def add_video_duration(row):
  video_id = row['Video ID']
  row["Duration (sec)"] = get_video_duration_from_video_id(video_id)
  return row


def add_start_end_video_trim(row):
  # t_end is when the suggestion can be displayed. trim_end can be bigger than t_end.
  ts = row['t_start']
  te = row['t_end']
  dur = te - ts
  if dur < MIN_LENGTH:
    ts = row['t_prestart']
    if dur < MIN_LENGTH:
      ts = max(0, te - MIN_LENGTH)
      if dur < MIN_LENGTH:
        # assuming here that every video must be at least ~2*MIN_LENGTH
        te = ts + MIN_LENGTH
  if dur > MAX_LENGTH:
    ts = te - MAX_LENGTH

  row['trim_start'] = ts
  row['trim_end'] = te

  return row


def drop_problematic_timestamp_rows(df_, debug=True):
  if debug:
    print(f"Starting with rows: {df_.shape[0]:>5}")

  # drop NaN timestamps
  n_before = df_.shape[0]
  df_ = df_.dropna(subset=['t_start', 't_end', 't_prestart'])
  n_after = df_.shape[0]
  if debug:
    print(f"Dropped {n_before-n_after:>5}, now: {n_after:>5} - removed all NaN timestamps")

  # drop t_start > t_end
  n_before = df_.shape[0]
  df_ = df_[df_['t_start'] < df_['t_end']]
  n_after = df_.shape[0]
  if debug:
    print(f"Dropped {n_before-n_after:>5}, now: {n_after:>5} - removed t_start > t_end")

  # drop t_prestart > t_start
  n_before = df_.shape[0]
  df_ = df_[df_['t_prestart'] < df_['t_start']]
  n_after = df_.shape[0]
  if debug:
    print(f"Dropped {n_before-n_after:>5}, now: {n_after:>5} - removed t_prestart > t_start")
  return df_

########################################################
## OpenCV video helpers
########################################################

def trim_video(source_path, output_path, t_start, t_end=None):
  # Open the source video
  cap = cv2.VideoCapture(source_path)

  if not cap.isOpened():
    print("Error: Could not open source video.")
    return False

  fps = cap.get(cv2.CAP_PROP_FPS)
  total_frames = cap.get(cv2.CAP_PROP_FRAME_COUNT)
  width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
  height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))

  # Calculate the starting and ending frames
  start_frame = int(t_start * fps)
  end_frame = int(t_end * fps) if t_end is not None else int(total_frames)

  # Define the codec and create a VideoWriter object
  fourcc = cv2.VideoWriter_fourcc(*'mp4v') # You can change 'mp4v' to another codec if needed
  out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))

  # Set the current video frame to the starting frame
  cap.set(cv2.CAP_PROP_POS_FRAMES, start_frame)

  for _ in range(start_frame, end_frame):
    ret, frame = cap.read()
    if not ret:
      break  # End of video
    out.write(frame)

  # Release everything
  cap.release()
  out.release()

  return True


def get_video_duration(filename):
  cap = cv2.VideoCapture(filename)
  if not cap.isOpened():
    print("Error: Could not open video.")
    cap.release()
    return None
  else:
    n_frames = cap.get(cv2.CAP_PROP_FRAME_COUNT)
    fps = cap.get(cv2.CAP_PROP_FPS)
    duration_seconds = n_frames / fps
    cap.release()
    return duration_seconds

In [None]:
MIN_LENGTH = 5
MAX_LENGTH = 20

df = load_data(end_idx=None)
d = load_narration_data(df)

Beginning OAuth2 authorization process ...
Authentication successful.


In [None]:
df = df.apply(partial(add_timestamps, d), axis=1)
df = drop_problematic_timestamp_rows(df, debug=True)
df = df.apply(add_start_end_video_trim, axis=1)
if df.shape[0] < 200:
  df = df.apply(add_file_size, axis=1)

Starting with rows: 13809
Dropped   486, now: 13323 - removed all NaN timestamps
Dropped   573, now: 12750 - removed t_start > t_end
Dropped   523, now: 12227 - removed t_prestart > t_start


At this point, `t_end` is the time of the suggestion, and the video should be trimmed to `[trim_start : trim_end]`.

In [None]:
duration = df['trim_end'] - df['trim_start']
print(f"Duration")
print(f"min = {duration.min():.2f}s, max = {duration.max():.2f}s")
print(f"median = {duration.median():.2f}s, mean = {duration.mean():.2f}s, std = {duration.std():.2f}s")
print(f"Total: {duration.sum()/60/60:.2f}h for {df.shape[0]} samples")

Duration
min = 5.00s, max = 20.00s
median = 5.00s, mean = 6.92s, std = 4.25s
Total: 23.49h for 12227 samples


Now, we iterate over the dataframe and trim the video for each sample. We store the trimmed video in a gdrive folder, and we name each trimmed video after the pattern `{video_id}-{line}-{batch_idx}-{batch_size}.mp4`. We don't write the video to the gdrive folder if a video with that file name already exists (there may be duplicates, we can reuse them).

In [None]:
map_idx_to_file_id = {}
map_idx_to_debug = {}
video_items = get_processed_video_fnames()

In [None]:
len(video_items), df.shape

(3420, (12227, 9))

In [None]:
remote_video_folder = "/cns/li-d/home/ego4d-data/ego4d/v1/full_scale/"

In [None]:
last_video_id = None
tmp_folder = "/tmp/ego4d/"
os.makedirs(tmp_folder, exist_ok=True)
iterator = tqdm(df.iterrows(), total=df.shape[0], desc="Trimming videos")

for idx, row in iterator:
  if idx not in map_idx_to_file_id:
    video_id = row['Video ID']
    # create filenames and paths
    tmp_video_path = os.path.join(tmp_folder, f"{video_id}.mp4")
    trimmed_fname = f"{video_id}-{row['Line']}-{row['Batch idx']}-{row['Batch size']}.mp4"
    trimmed_tmp_video_path = os.path.join(tmp_folder, trimmed_fname)
    remote_video_path = os.path.join(remote_video_folder, f"{video_id}.mp4")

    # skip if file already exists in drive
    if trimmed_fname in video_items:
      continue

    fsize_mib = get_video_file_size_mb(video_id)
    fsize = f'{fsize_mib:4.0f} MiB'
    if fsize_mib > 2_000:
      map_idx_to_debug[idx] = f"Video {video_id} is too large: {fsize}"
      continue
    iterator.set_postfix_str(f"{video_id.split('-')[0]} -- {row['Batch idx']:>2} @ {row['Line']:>3} ({fsize})")

    # release local disk space
    if idx % 10 == 0:
      for tmp_fname in os.listdir(tmp_folder):
        if tmp_fname != f"{video_id}.mp4":
          os.remove(os.path.join(tmp_folder, tmp_fname))
    # remove previous video to free up disk space
    if last_video_id is not None and last_video_id != video_id:
      if os.path.exists(os.path.join(tmp_folder, f"{last_video_id}.mp4")):
        os.remove(os.path.join(tmp_folder, f"{last_video_id}.mp4"))

    # download new video and update pointer
    if last_video_id is None or video_id != last_video_id:
      copy_ego4d_video_to_local(remote_video_path, tmp_video_path)
      last_video_id = video_id

    # trim video
    success = trim_video(source_path=tmp_video_path, output_path=trimmed_tmp_video_path, t_start=row['trim_start'], t_end=row['trim_end'])
    if not success:
      map_idx_to_debug[idx] = f"Failed to trim video: row {idx}, {video_id} from {row['trim_start']} to {row['trim_end']}\n"
      continue

    # save trimmed video file to drive folder
    try:
      file_id = save_local_file_to_drive(trimmed_tmp_video_path, trimmed_fname)
    except Exception as e:
      map_idx_to_debug[idx] = f"Failed to save file: row {idx}, {video_id} from {row['trim_start']} to {row['trim_end']}\n{e}\n"
      continue

    # created a file on gdrive -> store file ID & remove local trimmed file
    map_idx_to_file_id[idx] = file_id
    if os.path.exists(trimmed_tmp_video_path):
      os.remove(trimmed_tmp_video_path)

Trimming videos:   0%|          | 0/12227 [00:00<?, ?it/s]