## Keyframe / Summarization

In [None]:
import cv2
import json
import numpy as np
import re
import scipy.fftpack as fftpack

from datetime import timedelta
from os import listdir, makedirs, path

from imagehash import ImageHash
from PIL import Image as PImage

VIDEO_PATH = "../../vids/0801-500"

OUT_PATH = "./metadata/keyframe-500"
makedirs(OUT_PATH, exist_ok=True)

DIR_PATTERN = re.compile("^[0-3][0-9]-")

### OpenCV

In [None]:
CORNER_DIST_THOLD = 500

def frameToTime(fi, fps):
  seconds = int(fi / fps)
  f_frames = round(fps * ((fi / fps) % 1))
  return f"{timedelta(seconds=seconds)}.{f_frames}"

def calculateDistance(P0, P1):
  x0, y0 = P0.ravel()
  x1, y1 = P1.ravel()
  return ((x1 - x0) ** 2 + (y1 - y0) ** 2)

def getFeatureMask(vw, vh):
  mask_features = np.zeros((vh, vw), dtype=np.uint8)
  mask_features[:vh//6, :vw//6] = 1
  mask_features[-vh//5:, -vw//10:] = 1
  mask_features[vh//2 - vh//20: vh//2 + vh//20,
                vw//2 - vw//10: vw//2 + vw//10] = 1
  return mask_features

blur_size = (5,5)

# Parameters for features to track
feature_params = dict(maxCorners=100,
                      qualityLevel=0.2,
                      minDistance=3,
                      blockSize=7)

# Parameters for lucas kanade optical flow
lk_params = dict(winSize=(15, 15),
                 maxLevel=2,
                 criteria=(cv2.TERM_CRITERIA_EPS | cv2.TERM_CRITERIA_COUNT, 10, 0.03))

hash_params = dict(
  hash_size=8,
  highfreq_factor=4
)

### helper functions

In [None]:
def phash(im, hash_size=8, highfreq_factor=4):
  """from vframe: https://github.com/vframeio/vframe/blob/master/src/vframe/utils/im_utils.py#L37-L48"""
  """Perceptual hash rewritten from https://github.com/JohannesBuchner/imagehash/blob/master/imagehash.py#L197"""
  wh = hash_size * highfreq_factor
  im = cv2.resize(im, (wh, wh), interpolation=cv2.INTER_NEAREST)
  if len(im.shape) > 2 and im.shape[2] > 1:
    im = cv2.cvtColor(im, cv2.COLOR_BGR2GRAY)
  mdct = fftpack.dct(fftpack.dct(im, axis=0), axis=1)
  dctlowfreq = mdct[:hash_size, :hash_size]
  med = np.median(dctlowfreq)
  diff = dctlowfreq > med
  return ImageHash(diff)

In [None]:
def getCornerDiffs(prev_frame, frame, vw, vh):
  prev_edges = cv2.blur(prev_frame, blur_size).astype(np.int16)
  edges = cv2.blur(frame, blur_size).astype(np.int16)

  edge_diff = np.abs(edges - prev_edges)
  edge_diff_erosion = ((edge_diff > 32) * 255)

  T = edge_diff_erosion[:vh//40, :].mean()
  B = edge_diff_erosion[-vh//40:, :].mean()
  L = edge_diff_erosion[:, :vw//60].mean()
  R = edge_diff_erosion[:, -vw//60:].mean()
  return np.array([T, B, L, R])

In [None]:
def is_bw(frame, thold=1.0):
  rvs = frame[:,:,0].astype(np.int16)
  gvs = frame[:,:,1].astype(np.int16)
  bvs = frame[:,:,2].astype(np.int16)
  dsum = abs(rvs - gvs).sum() + abs(rvs - bvs).sum() + abs(gvs - bvs).sum()
  davg = dsum / (frame.shape[0] * frame.shape[1])
  return davg < thold

In [None]:
def in_seq(val, seqs):
  nseqs = [val for f0,f1 in seqs if f0<=val and val<=f1]
  return len(nseqs) > 0

In [None]:
def duplicate_hash(h0, hf_list, thold=4):
  for h,_ in hf_list:
    if abs(h - h0) < thold:
      return True
  return False

### Run Keyframe

In [None]:
%%time
input_dirs = sorted([d for d in listdir(VIDEO_PATH) if DIR_PATTERN.search(d) is not None])

for io_dir in input_dirs:
  output_dir_path = path.join(OUT_PATH, io_dir)
  makedirs(output_dir_path, exist_ok=True)

  input_dir_path = path.join(VIDEO_PATH, io_dir)
  input_files = sorted([f for f in listdir(input_dir_path) if f.endswith("mp4")])

  vid = cv2.VideoCapture(path.join(input_dir_path, input_files[0]))
  vw = int(vid.get(cv2.CAP_PROP_FRAME_WIDTH))
  vh = int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT))
  feature_mask = getFeatureMask(vw, vh)
  vid.release()

  for io_file in input_files:
    input_file_path = path.join(input_dir_path, io_file)
    output_file_path = path.join(output_dir_path, io_file.replace("mp4", "json"))

    if path.isfile(output_file_path):
      continue

    print(io_dir, io_file)

    vid = cv2.VideoCapture(input_file_path)
    frame_count = int(vid.get(cv2.CAP_PROP_FRAME_COUNT))
    fps = int(vid.get(cv2.CAP_PROP_FPS))

    vid.set(cv2.CAP_PROP_POS_FRAMES, 0)
    _, prev_frame = vid.read()
    prev_frame_hash = phash(prev_frame, **hash_params)
    prev_frame_grey = cv2.cvtColor(prev_frame, cv2.COLOR_BGR2GRAY)
    prev_edges = cv2.blur(prev_frame_grey, blur_size).astype(np.int16)
    prev_corners = cv2.goodFeaturesToTrack(prev_frame_grey, mask=feature_mask, **feature_params)

    hash_size = prev_frame_hash.hash.size
    frame_hash_threshold = hash_size // 2
    static_hash_threshold = hash_size // 3

    camera_sequences = []
    action_sequences = []
    static_hashes = []

    for frameIdx in range(1, frame_count):
      _, frame = vid.read()
      if frameIdx % 5 != 0:
        continue
      if is_bw(frame):
        continue

      frame_grey = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)

      if prev_corners is not None:
        corners, _, _ = cv2.calcOpticalFlowPyrLK(prev_frame_grey, frame_grey, prev_corners, None, **lk_params)
      else:
        corners = None

      valid_prev_corners = prev_corners is not None and len(prev_corners) > 0
      valid_corners = corners is not None and len(corners) > 0

      if valid_prev_corners and valid_corners:
        corner_avg = np.array([calculateDistance(p0, p1) for p0,p1 in zip(corners, prev_corners)]).mean()
      else:
        corner_diffs = getCornerDiffs(prev_frame_grey, frame_grey, vw, vh)
        corner_diffs_count = (corner_diffs > 200).sum()
        if corner_diffs_count > 2:
          corner_avg = 2 * CORNER_DIST_THOLD
        else:
          corner_avg = 0

      camera_moved = corner_avg > CORNER_DIST_THOLD

      if camera_moved:
        if len(camera_sequences) == 0 or (frameIdx - camera_sequences[-1][1]) > fps:
          camera_sequences.append([frameIdx, frameIdx])
        else:
          camera_sequences[-1][1] = frameIdx

        prev_corners = cv2.goodFeaturesToTrack(frame_grey, mask=feature_mask, **feature_params)
      else:
        # grab static frames by hash independent of action
        frame_hash = phash(frame, **hash_params)
        if len(static_hashes) < 1:
          static_hashes.append([frame_hash, frameIdx])
        else:
          frame_hash_diff = frame_hash - prev_frame_hash
          static_hash_diff = frame_hash - static_hashes[-1][0]

          if (frame_hash_diff < frame_hash_threshold and
              static_hash_diff > static_hash_threshold and
              int(str(frame_hash), 16) != 0 and
              (not duplicate_hash(frame_hash, static_hashes))):
            static_hashes.append([frame_hash, frameIdx])

        edges = cv2.blur(frame_grey, blur_size).astype(np.int16)
        edge_diff = np.abs(edges - prev_edges)
        edge_diff_avg = ((edge_diff > 7) * 255).mean()

        if edge_diff_avg > 1.0:
          if len(action_sequences) == 0 or (frameIdx - action_sequences[-1][1]) > fps:
            action_sequences.append([frameIdx, frameIdx])
          else: 
            action_sequences[-1][1] = frameIdx

        prev_edges = edges.copy()
        prev_frame_hash = ImageHash(frame_hash.hash)

      prev_frame = frame.copy()
      prev_frame_grey = frame_grey.copy()

    camera_sequences = [[f0, f1] for f0,f1 in camera_sequences if f0 != f1]
    action_sequences = [[f0, f1] for f0,f1 in action_sequences if f0 != f1]

    with open(output_file_path, "w") as of:
      representative_frames = set()
      foi_sum = 0

      for _,f in static_hashes:
        # DON'T skip static frames that are also in action seqs
        # if not in_seq(f, action_sequences):
        representative_frames.add(f)
      for f0,f1 in action_sequences:
        # skip action seqs that are also in camera seqs
        if not (in_seq(f0, camera_sequences) and in_seq(f1, camera_sequences)):
          representative_frames.add(f0)
          representative_frames.add(f1)
          foi_sum += abs(f1 - f0)

      frame_data = {
        "camera_movement_sequences": camera_sequences,
        "foi_sequences": action_sequences,
        "foi_count_frames": foi_sum,
        "foi_count_seconds": foi_sum / fps,
        "foi_pct": foi_sum / frame_count,
        "static_frames": sorted([f for _,f in static_hashes]),
        "static_frames_count": len([f for _,f in static_hashes]),
        "representative_frames": sorted(representative_frames),
        "representative_frames_count": len(representative_frames),
      }

      json.dump(frame_data, of, sort_keys=True)

    vid.release()

### Post-Process (add to metadata)

In [None]:
import json
import re

from os import listdir, path

VIDEO_DB_PATH_IN = "./metadata/videos.json"

KEYFRAME_PATH = "./metadata/keyframe-500"
VIDEO_DB_PATH_OUT = path.join(KEYFRAME_PATH, "videos.json")

DIR_PATTERN = re.compile("^[0-3][0-9]-")

In [None]:
# open all keyframe files
keyframe_data = {}

input_dirs = sorted([d for d in listdir(KEYFRAME_PATH) if DIR_PATTERN.search(d) is not None])

for io_dir in input_dirs:
  input_dir_path = path.join(KEYFRAME_PATH, io_dir)
  input_files = sorted([f for f in listdir(input_dir_path) if f.endswith("json")])

  for io_file in input_files:
    input_file_path = path.join(input_dir_path, io_file)
    video_key = io_file.replace("json", "mp4")
    with open(input_file_path, "r") as f:
      keyframe_data[video_key] = json.load(f)

In [None]:
with open(VIDEO_DB_PATH_IN, "r") as f:
  video_data = json.load(f)

In [None]:
for k, vdata in video_data.items():
  if k not in keyframe_data:
    print(k, "has no keyframe info")
  else:
    video_data[k] = vdata | keyframe_data[k]

In [None]:
with open(VIDEO_DB_PATH_OUT, "w") as f:
  json.dump(video_data, f, separators=(',',':'))

### Debug

In [None]:
camera_movement_pairs = [(frameToTime(f0, fps), frameToTime(f1, fps)) for f0,f1 in camera_sequences]
action_sequence_pairs = [(frameToTime(f0, fps), frameToTime(f1, fps)) for f0,f1 in action_sequences]
static_frames = [frameToTime(f, fps) for h,f in static_hashes]
static_frame_hashes = [str(h) for h,f in static_hashes]

print(camera_movement_pairs)
print(action_sequence_pairs)
print(len(static_hashes), static_frames)
print(static_frame_hashes)

In [None]:
vid = cv2.VideoCapture(input_file_path)
for h,f in static_hashes:
  vid.set(cv2.CAP_PROP_POS_FRAMES, f)
  _, frame = vid.read()
  print(frameToTime(f, fps), h)
  display(PImage.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)))

### Algo Experiments

In [None]:
input_dirs = sorted([d for d in listdir(VIDEO_PATH) if DIR_PATTERN.search(d) is not None])
input_dir_path = path.join(VIDEO_PATH, input_dirs[0])
input_files = sorted([f for f in listdir(input_dir_path) if f.endswith("mp4")])
input_file_path = path.join(input_dir_path, input_files[5])

vid = cv2.VideoCapture(input_file_path)
vw = int(vid.get(cv2.CAP_PROP_FRAME_WIDTH))
vh = int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT))
feature_mask = getFeatureMask(vw, vh)
frame_count = int(vid.get(cv2.CAP_PROP_FRAME_COUNT))
fps = int(vid.get(cv2.CAP_PROP_FPS))

FRAME_NUM = 82*fps + 0
FRAME_DELTA = 5

vid.set(cv2.CAP_PROP_POS_FRAMES, FRAME_NUM)
_, prev_frame = vid.read()

vid.set(cv2.CAP_PROP_POS_FRAMES, FRAME_NUM + FRAME_DELTA)
_, frame = vid.read()

prev_frame_hash = phash(prev_frame, **hash_params)
prev_frame_grey = cv2.cvtColor(prev_frame, cv2.COLOR_BGR2GRAY)
prev_edges = cv2.blur(prev_frame_grey, blur_size).astype(np.int16)
prev_corners = cv2.goodFeaturesToTrack(prev_frame_grey, mask=feature_mask, **feature_params)
corners = None

hash_size = prev_frame_hash.hash.size
frame_hash_threshold = hash_size // 2
static_hash_threshold = hash_size // 3

frame_grey = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)

if prev_corners is not None:
  corners, _, _ = cv2.calcOpticalFlowPyrLK(prev_frame_grey, frame_grey, prev_corners, None, **lk_params)

valid_prev_corners = prev_corners is not None and len(prev_corners) > 0
valid_corners = corners is not None and len(corners) > 0

if valid_prev_corners and valid_corners:
  corner_avg = np.array([calculateDistance(p0, p1) for p0,p1 in zip(corners, prev_corners)]).mean()
else:
  print(frameToTime(frameIdx, fps), getCornerDiffs(prev_frame, frame, vw, vh))
  corner_diffs = getCornerDiffs(prev_frame, frame, vw, vh)
  corner_diffs_count = (corner_diffs > 128).sum()
  corner_avg = 2 * CORNER_DIST_THOLD if corner_diffs_count > 2 else 0

camera_moved = corner_avg > CORNER_DIST_THOLD

frame_hash = phash(frame, **hash_params)

frame_hash_diff = frame_hash - prev_frame_hash

edges = cv2.blur(frame_grey, blur_size).astype(np.int16)
edge_diff = np.abs(edges - prev_edges)
edge_diff_avg = ((edge_diff > 7) * 255).mean()

In [None]:
display(PImage.fromarray(cv2.cvtColor(prev_frame, cv2.COLOR_BGR2RGB)))
display(PImage.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)))
display(PImage.fromarray(cv2.cvtColor(prev_edges.astype(np.uint8), cv2.COLOR_BGR2RGB)))
display(PImage.fromarray(cv2.cvtColor(edges.astype(np.uint8), cv2.COLOR_BGR2RGB)))
display(PImage.fromarray(cv2.cvtColor(edge_diff.astype(np.uint8), cv2.COLOR_BGR2RGB)))
display(PImage.fromarray(cv2.cvtColor(((edge_diff > 8) * 255).astype(np.uint8), cv2.COLOR_BGR2RGB)))

print("valid_prev_corners", valid_prev_corners, "valid_corners", valid_corners, "camera_moved", camera_moved)
print("frame hash diff", frame_hash_diff, "thresh: <", frame_hash_threshold, " > ", static_hash_threshold)
print("edge_diff_avg", edge_diff_avg)

### B&W Experiments

In [None]:
def is_bw(frame, thold=1.0):
  rvs = frame[:,:,0].astype(np.int16)
  gvs = frame[:,:,1].astype(np.int16)
  bvs = frame[:,:,2].astype(np.int16)
  rgd = abs(rvs - gvs).sum()
  rbd = abs(rvs - bvs).sum()
  gbd = abs(gvs - bvs).sum()
  dsum = rgd + rbd + gbd
  davg = dsum / (frame.shape[0] * frame.shape[1])
  print(rgd, rbd, gbd, dsum, davg)
  return davg < thold

In [None]:
input_dirs = sorted([d for d in listdir(VIDEO_PATH) if DIR_PATTERN.search(d) is not None])
input_dir_path = path.join(VIDEO_PATH, input_dirs[0])
input_files = sorted([f for f in listdir(input_dir_path) if f.endswith("mp4")])
input_file_path = path.join(input_dir_path, input_files[0])

vid = cv2.VideoCapture(input_file_path)
fps = int(vid.get(cv2.CAP_PROP_FPS))

FRAME_NUM = 50*fps

vid.set(cv2.CAP_PROP_POS_FRAMES, FRAME_NUM)
_, frame = vid.read()

display(PImage.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)))

print(is_bw(frame))

### Action Experiments

In [None]:
FRAME_NUM = 15*fps
FRAME_DELTA = 5 # 10*60*fps

vid = cv2.VideoCapture(input_file_path)
vid.set(cv2.CAP_PROP_POS_FRAMES, FRAME_NUM)
_, prev_frame = vid.read()

vid.set(cv2.CAP_PROP_POS_FRAMES, FRAME_NUM + FRAME_DELTA)
_, frame = vid.read()

In [None]:
prev_edges = cv2.blur(cv2.cvtColor(prev_frame, cv2.COLOR_BGR2GRAY), blur_size)
edges = cv2.blur(cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY), blur_size)

edge_diff = np.abs(edges.astype(np.int16) - prev_edges.astype(np.int16)).astype(np.uint8)
edge_diff_erosion = ((edge_diff > 8) * 255).astype(np.uint8)

In [None]:
(
    (edge_diff.size, np.sum(edge_diff), np.mean(edge_diff)),
    (edge_diff_erosion.size, np.sum(edge_diff_erosion), np.mean(edge_diff_erosion))
)

In [None]:
display(PImage.fromarray(cv2.cvtColor(prev_frame, cv2.COLOR_BGR2RGB)))
display(PImage.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)))
display(PImage.fromarray(cv2.cvtColor(prev_edges, cv2.COLOR_BGR2RGB)))
display(PImage.fromarray(cv2.cvtColor(edges, cv2.COLOR_BGR2RGB)))

display(PImage.fromarray(cv2.cvtColor(edge_diff, cv2.COLOR_BGR2RGB)))
display(PImage.fromarray(cv2.cvtColor(edge_diff_erosion, cv2.COLOR_BGR2RGB)))

### Time Tests

In [None]:
dkernel = np.ones((4, 4), np.uint8)
ekernel = np.ones((4, 4), np.uint8)

input_dirs = sorted([d for d in listdir(VIDEO_PATH) if DIR_PATTERN.search(d) is not None])
input_dir_path = path.join(VIDEO_PATH, input_dirs[0])
input_files = sorted([f for f in listdir(input_dir_path) if f.endswith("mp4")])
input_file_path = path.join(input_dir_path, input_files[0])
vid = cv2.VideoCapture(input_file_path)

frame_count = int(vid.get(cv2.CAP_PROP_FRAME_COUNT))
vw = int(vid.get(cv2.CAP_PROP_FRAME_WIDTH))
vh = int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT))

print(vid.get(cv2.CAP_PROP_FPS))

In [None]:
%%timeit

vid.set(cv2.CAP_PROP_POS_FRAMES, 0)
_, prev_frame = vid.read()
prev_frame_grey = cv2.cvtColor(prev_frame, cv2.COLOR_BGR2GRAY)
prev_corners = cv2.goodFeaturesToTrack(prev_frame_grey, **feature_params)

vid.set(cv2.CAP_PROP_POS_FRAMES, 1000)
for frameIdx in range(0, 600):
  _, frame = vid.read()
  # edges = cv2.Canny(frame, 10, 100, 11)

  corner_avg = 0
  if prev_corners is not None:
    corners, _, _ = cv2.calcOpticalFlowPyrLK(prev_frame, frame, prev_corners, None, **lk_params)
    corner_avg = np.array([calculateDistance(p0, p1) for p0,p1 in zip(corners, prev_corners)]).mean()

  if corner_avg > CORNER_DIST_THOLD or corner_avg == 0:
    frame_grey = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
    prev_corners = cv2.goodFeaturesToTrack(frame_grey, **feature_params)

  prev_frame = frame.copy()

### SceneDetect
https://www.scenedetect.com/

In [None]:
from scenedetect import detect, AdaptiveDetector

input_dirs = sorted([d for d in listdir(VIDEO_PATH) if DIR_PATTERN.search(d) is not None])

for io_dir in input_dirs[1:2]:
  input_dir_path = path.join(VIDEO_PATH, io_dir)
  input_files = sorted([f for f in listdir(input_dir_path) if f.endswith("mp4")])
  print(io_dir, input_files)

  for io_file in input_files[:1]:
    input_file_path = path.join(input_dir_path, io_file)
    output_file_path = path.join(OUT_PATH, io_file.replace("mp4", "json"))

    scene_list = detect(input_file_path, AdaptiveDetector())