## Keyframe / Summarization

In [None]:
import cv2
import json
import numpy as np
import re
import scipy.fftpack as fftpack

from datetime import timedelta
from os import listdir, makedirs, path
from scenedetect import detect, AdaptiveDetector

from imagehash import ImageHash
from PIL import Image as PImage

VIDEO_PATH = "../../vids/0801-500"

OUT_PATH = "./metadata/keyframe-500"
makedirs(OUT_PATH, exist_ok=True)

DIR_PATTERN = re.compile("^[0-3][0-9]-")

In [None]:
%%script false --no-raise-error

VIDEO_DB_PATH = "./metadata/videos.json"
with open(VIDEO_DB_PATH, "r") as f:
  video_data = json.load(f)

### OpenCV

In [None]:
CORNER_DIST_THOLD = 500

def frameToTime(fi, fps):
  return str(timedelta(seconds=(fi/fps)//1))

def calculateDistance(P0, P1):
  x0, y0 = P0.ravel()
  x1, y1 = P1.ravel()
  return ((x1 - x0) ** 2 + (y1 - y0) ** 2)

def getFeatureMask(vw, vh):
  mask_features = np.zeros((vh, vw), dtype=np.uint8)
  mask_features[:vh//6, :vw//6] = 1
  mask_features[-vh//5:, -vw//10:] = 1
  mask_features[vh//2 - vh//20: vh//2 + vh//20,
                vw//2 - vw//10: vw//2 + vw//10] = 1
  return mask_features

# Canny parameters
canny_params = dict(
  threshold1=10,
  threshold2=100,
  apertureSize=3)

blur_size = (5,5)

# Parameters for features to track
feature_params = dict(maxCorners=100,
                      qualityLevel=0.2,
                      minDistance=3,
                      blockSize=7)

# Parameters for lucas kanade optical flow
lk_params = dict(winSize=(15, 15),
                 maxLevel=2,
                 criteria=(cv2.TERM_CRITERIA_EPS | cv2.TERM_CRITERIA_COUNT, 10, 0.03))

### perceptual hash function

In [None]:
def phash(im, hash_size=8, highfreq_factor=4):
    """from vframe: https://github.com/vframeio/vframe/blob/master/src/vframe/utils/im_utils.py#L37-L48"""
    """Perceptual hash rewritten from https://github.com/JohannesBuchner/imagehash/blob/master/imagehash.py#L197"""
    wh = hash_size * highfreq_factor
    im = cv2.resize(im, (wh, wh), interpolation=cv2.INTER_NEAREST)
    if len(im.shape) > 2 and im.shape[2] > 1:
        im = cv2.cvtColor(im, cv2.COLOR_BGR2GRAY)
    mdct = fftpack.dct(fftpack.dct(im, axis=0), axis=1)
    dctlowfreq = mdct[:hash_size, :hash_size]
    med = np.median(dctlowfreq)
    diff = dctlowfreq > med
    return ImageHash(diff)

In [None]:
input_dirs = sorted([d for d in listdir(VIDEO_PATH) if DIR_PATTERN.search(d) is not None])

for io_dir in input_dirs[0:1]:
  input_dir_path = path.join(VIDEO_PATH, io_dir)
  input_files = sorted([f for f in listdir(input_dir_path) if f.endswith("mp4")])

  vid = cv2.VideoCapture(path.join(input_dir_path, input_files[0]))
  vid_width = int(vid.get(cv2.CAP_PROP_FRAME_WIDTH))
  vid_height = int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT))
  vid_mask = getFeatureMask(vid_width, vid_height)
  vid.release()

  for io_file in input_files[0:1]:
    print(io_dir, io_file)
    input_file_path = path.join(input_dir_path, io_file)
    output_file_path = path.join(OUT_PATH, io_file.replace("mp4", "json"))

    vid = cv2.VideoCapture(input_file_path)
    frame_count = int(vid.get(cv2.CAP_PROP_FRAME_COUNT))
    fps = vid.get(cv2.CAP_PROP_FPS)

    vid.set(cv2.CAP_PROP_POS_FRAMES, 0)
    _, prev_frame = vid.read()
    prev_frame_hash = phash(prev_frame)
    prev_frame_grey = cv2.cvtColor(prev_frame, cv2.COLOR_BGR2GRAY)
    prev_edges = cv2.blur(prev_frame_grey, blur_size).astype(np.int16)
    prev_corners = cv2.goodFeaturesToTrack(prev_frame_grey, mask=vid_mask, **feature_params)

    hash_size = prev_frame_hash.hash.size
    frame_hash_threshold = hash_size // 2
    static_hash_threshold = hash_size // 3

    camera_movements = []
    static_hashes = []
    action_sequences = []

    for frameIdx in range(1, int(120*fps)):
      _, frame = vid.read()
      if frameIdx % 5 != 0:
        continue

      frame_grey = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)

      if prev_corners is not None:
        corners, _, _ = cv2.calcOpticalFlowPyrLK(prev_frame_grey, frame_grey, prev_corners, None, **lk_params)

      valid_prev_corners = prev_corners is not None and len(prev_corners) > 0
      valid_corners = corners is not None and len(corners) > 0

      corner_avg = 2 * CORNER_DIST_THOLD
      if valid_prev_corners and valid_corners:
        corner_avg = np.array([calculateDistance(p0, p1) for p0,p1 in zip(corners, prev_corners)]).mean()

      camera_moved = corner_avg > CORNER_DIST_THOLD

      if camera_moved:
        if len(camera_movements) == 0 or (frameIdx - camera_movements[-1][1]) > fps:
          camera_movements.append([frameIdx, frameIdx])
        else:
          camera_movements[-1][1] = frameIdx

        prev_corners = cv2.goodFeaturesToTrack(frame_grey, mask=vid_mask, **feature_params)
      else:
        # grab static frames by hash independent of action
        # TODO: clean these up in post-processing with O(n^2)
        frame_hash = phash(frame)
        if len(static_hashes) < 1:
          static_hashes.append([frame_hash, frameIdx])
        else:
          frame_hash_diff = frame_hash - prev_frame_hash
          static_hash_diff = frame_hash - static_hashes[-1][0]

          if (frame_hash_diff < frame_hash_threshold and
              static_hash_diff > static_hash_threshold):
            static_hashes.append([frame_hash, frameIdx])

        edges = cv2.blur(frame_grey, blur_size).astype(np.int16)
        edge_diff = np.abs(edges - prev_edges)
        edge_diff_avg = ((edge_diff > 8) * 255)[vid_width//8:, :].mean()

        if edge_diff_avg > 1.0:
          # TODO: update action sequence
          pass

        prev_edges = edges.copy()
        prev_frame_hash = ImageHash(frame_hash.hash)

      prev_frame = frame.copy()
      prev_frame_grey = frame_grey.copy()

    # possible_action = len(action_sequences) < 1 or action_start != action_sequences[-1][0]
    # if possible_action and (action_end - action_start) > 15:
    # action_sequences.append([action_start, action_end])

    vid.release()

In [None]:
camera_movement_pairs = [(frameToTime(f0, fps), frameToTime(f1, fps)) for f0,f1 in camera_movements]
static_frames = [frameToTime(f, fps) for h,f in static_hashes]
static_frame_hashes = [str(h) for h,f in static_hashes]

print(camera_movement_pairs)
print(len(static_hashes), static_frames)
print(static_frame_hashes)

In [None]:
vid = cv2.VideoCapture(input_file_path)
for h,f in static_hashes:
  vid.set(cv2.CAP_PROP_POS_FRAMES, f)
  _, frame = vid.read()
  print(frameToTime(f, fps), h)
  display(PImage.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)))

In [None]:
# - reduce size of frames to around 160px (?)

# - threshold difference between consecutive frames

# - check accuracy vs size of image

### Action Experiments

In [None]:
# Canny parameters
canny_params = dict(
  threshold1=10,
  threshold2=100,
  apertureSize=3)
bkernel = (5,5)
dkernel = np.ones((4, 4), np.uint8)
ekernel = np.ones((4, 4), np.uint8)

FRAME_NUM = 117*fps
FRAME_DELTA = 5 # 10*60*fps


vid = cv2.VideoCapture(input_file_path)
vid.set(cv2.CAP_PROP_POS_FRAMES, FRAME_NUM)
_, prev_frame = vid.read()

vid.set(cv2.CAP_PROP_POS_FRAMES, FRAME_NUM + FRAME_DELTA)
_, frame = vid.read()

In [None]:
%%script false --no-raise-error
%%timeit

prev_edges = cv2.Canny(cv2.blur(prev_frame, bkernel), **canny_params)
edges = cv2.Canny(cv2.blur(frame, bkernel), **canny_params)

prev_edges = cv2.erode(cv2.dilate(prev_edges, dkernel), ekernel)
edges = cv2.erode(cv2.dilate(edges, dkernel), ekernel)

edge_diff = np.abs(edges.astype(np.int16) - prev_edges.astype(np.int16)).astype(np.uint8)

edge_diff_erosion = cv2.erode(cv2.dilate(edge_diff, dkernel), ekernel)

In [None]:
%%script false --no-raise-error

display(PImage.fromarray(cv2.cvtColor(prev_frame, cv2.COLOR_BGR2RGB)))
display(PImage.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)))
display(PImage.fromarray(cv2.cvtColor(prev_edges, cv2.COLOR_BGR2RGB)))
display(PImage.fromarray(cv2.cvtColor(edges, cv2.COLOR_BGR2RGB)))

display(PImage.fromarray(cv2.cvtColor(edge_diff, cv2.COLOR_BGR2RGB)))
display(PImage.fromarray(cv2.cvtColor(edge_diff_erosion, cv2.COLOR_BGR2RGB)))

In [None]:
%%script false --no-raise-error

#edge_diff = np.abs(edges.astype(np.int16) - prev_edges.astype(np.int16)).astype(np.uint8)
np.mean(edge_diff), np.mean(edge_diff_erosion)

In [None]:
#%%timeit

prev_edges = cv2.blur(cv2.cvtColor(prev_frame, cv2.COLOR_BGR2GRAY), bkernel)
edges = cv2.blur(cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY), bkernel)

edge_diff = np.abs(edges.astype(np.int16) - prev_edges.astype(np.int16)).astype(np.uint8)
edge_diff_erosion = ((edge_diff > 8) * 255).astype(np.uint8)[vid_width//8:, :]

In [None]:
(
    (edge_diff.size, np.sum(edge_diff), np.mean(edge_diff)),
    (edge_diff_erosion.size, np.sum(edge_diff_erosion), np.mean(edge_diff_erosion))
)

In [None]:
display(PImage.fromarray(cv2.cvtColor(prev_frame, cv2.COLOR_BGR2RGB)))
display(PImage.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)))
display(PImage.fromarray(cv2.cvtColor(prev_edges, cv2.COLOR_BGR2RGB)))
display(PImage.fromarray(cv2.cvtColor(edges, cv2.COLOR_BGR2RGB)))

display(PImage.fromarray(cv2.cvtColor(edge_diff, cv2.COLOR_BGR2RGB)))
display(PImage.fromarray(cv2.cvtColor(edge_diff_erosion, cv2.COLOR_BGR2RGB)))

### Time Tests

In [None]:
input_dirs = sorted([d for d in listdir(VIDEO_PATH) if DIR_PATTERN.search(d) is not None])
input_dir_path = path.join(VIDEO_PATH, input_dirs[0])
input_files = sorted([f for f in listdir(input_dir_path) if f.endswith("mp4")])
input_file_path = path.join(input_dir_path, input_files[0])
vid = cv2.VideoCapture(input_file_path)
frame_count = int(vid.get(cv2.CAP_PROP_FRAME_COUNT))

print(vid.get(cv2.CAP_PROP_FPS))

In [None]:
%%timeit

vid.set(cv2.CAP_PROP_POS_FRAMES, 0)
_, prev_frame = vid.read()
prev_frame_grey = cv2.cvtColor(prev_frame, cv2.COLOR_BGR2GRAY)
prev_corners = cv2.goodFeaturesToTrack(prev_frame_grey, **feature_params)

vid.set(cv2.CAP_PROP_POS_FRAMES, 1000)
for frameIdx in range(0, 600):
  _, frame = vid.read()
  # edges = cv2.Canny(frame, 10, 100, 11)

  corner_avg = 0
  if prev_corners is not None:
    corners, _, _ = cv2.calcOpticalFlowPyrLK(prev_frame, frame, prev_corners, None, **lk_params)
    corner_avg = np.array([calculateDistance(p0, p1) for p0,p1 in zip(corners, prev_corners)]).mean()

  if corner_avg > CORNER_DIST_THOLD or corner_avg == 0:
    frame_grey = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
    prev_corners = cv2.goodFeaturesToTrack(frame_grey, **feature_params)

  prev_frame = frame.copy()

### SceneDetect
https://www.scenedetect.com/

In [None]:
input_dirs = sorted([d for d in listdir(VIDEO_PATH) if DIR_PATTERN.search(d) is not None])

for io_dir in input_dirs[1:2]:
  input_dir_path = path.join(VIDEO_PATH, io_dir)
  input_files = sorted([f for f in listdir(input_dir_path) if f.endswith("mp4")])
  print(io_dir, input_files)

  for io_file in input_files[:1]:
    input_file_path = path.join(input_dir_path, io_file)
    output_file_path = path.join(OUT_PATH, io_file.replace("mp4", "json"))

    scene_list = detect(input_file_path, AdaptiveDetector())