## Keyframe / Summarization

In [None]:
import cv2
import json
import numpy as np
import re

from datetime import timedelta
from os import listdir, makedirs, path
from scenedetect import detect, AdaptiveDetector

from PIL import Image as PImage

VIDEO_PATH = "../../vids/0801-500"

OUT_PATH = "./metadata/keyframe-500"
makedirs(OUT_PATH, exist_ok=True)

DIR_PATTERN = re.compile("^[0-3][0-9]-")

In [None]:
VIDEO_DB_PATH = "./metadata/videos.json"
with open(VIDEO_DB_PATH, "r") as f:
  video_data = json.load(f)

### OpenCV

In [None]:
CORNER_DIST_THOLD = 500

VID_WIDTH = 500
VID_HEIGHT = 282

def frameToTime(fi, fps):
  return str(timedelta(seconds=(fi/fps)//1))

def calculateDistance(P0, P1):
  x0, y0 = P0.ravel()
  x1, y1 = P1.ravel()
  return ((x1 - x0) ** 2 + (y1 - y0) ** 2)

mask_features = np.zeros((VID_HEIGHT, VID_WIDTH), dtype=np.uint8)
mask_features[:VID_HEIGHT//6, :VID_WIDTH//6] = 1
mask_features[-VID_HEIGHT//5:, -VID_WIDTH//10:] = 1
mask_features[VID_HEIGHT//2 - VID_HEIGHT//20: VID_HEIGHT//2 + VID_HEIGHT//10,
              VID_WIDTH//2 - VID_WIDTH//10: VID_WIDTH//2 + VID_WIDTH//5] = 1

# Parameters for features to track
feature_params = dict(maxCorners=100,
                      qualityLevel=0.2,
                      minDistance=3,
                      blockSize=7,
                      mask=mask_features)

# Parameters for lucas kanade optical flow
lk_params = dict(winSize=(15, 15),
                 maxLevel=2,
                 criteria=(cv2.TERM_CRITERIA_EPS | cv2.TERM_CRITERIA_COUNT, 10, 0.03))

In [None]:
input_dirs = sorted([d for d in listdir(VIDEO_PATH) if DIR_PATTERN.search(d) is not None])

for io_dir in input_dirs[0:1]:
  input_dir_path = path.join(VIDEO_PATH, io_dir)
  input_files = sorted([f for f in listdir(input_dir_path) if f.endswith("mp4")])

  for io_file in input_files[0:1]:
    print(io_dir, io_file)
    input_file_path = path.join(input_dir_path, io_file)
    output_file_path = path.join(OUT_PATH, io_file.replace("mp4", "json"))

    vid = cv2.VideoCapture(input_file_path)
    frame_count = int(vid.get(cv2.CAP_PROP_FRAME_COUNT))
    fps = vid.get(cv2.CAP_PROP_FPS)

    vid.set(cv2.CAP_PROP_POS_FRAMES, 0)
    _, prev_frame = vid.read()
    prev_edges = cv2.Canny(prev_frame, 10, 100, 3)
    prev_frame_grey = cv2.cvtColor(prev_frame, cv2.COLOR_BGR2GRAY)
    prev_corners = cv2.goodFeaturesToTrack(prev_frame_grey, **feature_params)

    action_start = 0
    action_end = 0

    camera_movements = []

    for frameIdx in range(1, int(120*fps)):
      _, frame = vid.read()
      frame_grey = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)

      edges = cv2.Canny(frame, 10, 100, 11)

      if prev_corners is not None:
        corners, _, _ = cv2.calcOpticalFlowPyrLK(prev_frame_grey, frame_grey, prev_corners, None, **lk_params)

      valid_prev_corners = prev_corners is not None and len(prev_corners) > 0
      valid_corners = corners is not None and len(corners) > 0

      corner_avg = 2 * CORNER_DIST_THOLD
      if valid_prev_corners and valid_corners:
        corner_avg = np.array([calculateDistance(p0, p1) for p0,p1 in zip(corners, prev_corners)]).mean()

      corner_moved = corner_avg > CORNER_DIST_THOLD

      if corner_moved:
        if len(camera_movements) == 0 or (frameIdx - camera_movements[-1][1]) > 15:
          camera_movements.append([frameIdx, frameIdx])
        else:
          camera_movements[-1][1] = frameIdx

        if abs(action_end - action_start) > 5:
          # TODO: save action range
          pass

        action_start = frameIdx
        action_end = frameIdx

        prev_corners = cv2.goodFeaturesToTrack(frame_grey, **feature_params)

      # edge_diff = edges - prev_edges

      prev_frame = frame.copy()
      prev_frame_grey = frame_grey.copy()
      # prev_edges = edges.copy()

    camera_movement_pairs = [(frameToTime(f0, fps), frameToTime(f1, fps)) for f0,f1 in camera_movements]
    print(camera_movement_pairs)

# display(PImage.fromarray(frame))
# display(PImage.fromarray(frame_grey))
# display(PImage.fromarray(edges))
# display(PImage.fromarray(prev_edges))
# display(PImage.fromarray(edge_diff))

In [None]:
# - reduce size of frames to around 160px (?)

# - threshold difference between consecutive frames
# - compute a perceptual hash for each remaining frame and 
# - de-duplicate non-neighboring scenes using hash
# - use canny edge representation to estimate sharpness quality

# - check optical flow vs edge variance
# - check accuracy vs size of image

### Time Tests

In [None]:
input_dirs = sorted([d for d in listdir(VIDEO_PATH) if DIR_PATTERN.search(d) is not None])
input_dir_path = path.join(VIDEO_PATH, input_dirs[0])
input_files = sorted([f for f in listdir(input_dir_path) if f.endswith("mp4")])
input_file_path = path.join(input_dir_path, input_files[0])
vid = cv2.VideoCapture(input_file_path)
frame_count = int(vid.get(cv2.CAP_PROP_FRAME_COUNT))

print(vid.get(cv2.CAP_PROP_FPS))

In [None]:
%%timeit

vid.set(cv2.CAP_PROP_POS_FRAMES, 0)
_, prev_frame = vid.read()
prev_frame_grey = cv2.cvtColor(prev_frame, cv2.COLOR_BGR2GRAY)
prev_corners = cv2.goodFeaturesToTrack(prev_frame_grey, **feature_params)

vid.set(cv2.CAP_PROP_POS_FRAMES, 1000)
for frameIdx in range(0, 600):
  _, frame = vid.read()
  # edges = cv2.Canny(frame, 10, 100, 11)

  corner_avg = 0
  if prev_corners is not None:
    corners, _, _ = cv2.calcOpticalFlowPyrLK(prev_frame, frame, prev_corners, None, **lk_params)
    corner_avg = np.array([calculateDistance(p0, p1) for p0,p1 in zip(corners, prev_corners)]).mean()

  if corner_avg > CORNER_DIST_THOLD or corner_avg == 0:
    frame_grey = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
    prev_corners = cv2.goodFeaturesToTrack(frame_grey, **feature_params)

  prev_frame = frame.copy()

### SceneDetect
https://www.scenedetect.com/

In [None]:
input_dirs = sorted([d for d in listdir(VIDEO_PATH) if DIR_PATTERN.search(d) is not None])

for io_dir in input_dirs[1:2]:
  input_dir_path = path.join(VIDEO_PATH, io_dir)
  input_files = sorted([f for f in listdir(input_dir_path) if f.endswith("mp4")])
  print(io_dir, input_files)

  for io_file in input_files[:1]:
    input_file_path = path.join(input_dir_path, io_file)
    output_file_path = path.join(OUT_PATH, io_file.replace("mp4", "json"))

    scene_list = detect(input_file_path, AdaptiveDetector())