#Pose and face estimation

This colab is from the Algorithmic Filmmaking course: https://www.youtube.com/watch?v=ohrf6MIVvyI&list=PLWuCzxqIpJs_8IpPl1bkKNFAe98ejDHsQ&index=32

Let’s look at how to track bodies and faces in our video clips.

For this notebook, we do not need a GPU.

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


## Person Detection
Let’s detect how many people are in each clip. This will help the pose tracking system find the right number or people per clip.

In [None]:
!pip install -q ultralytics

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m779.6/779.6 kB[0m [31m10.9 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m21.3/21.3 MB[0m [31m91.5 MB/s[0m eta [36m0:00:00[0m
[?25h

In [None]:
import torch
from ultralytics import YOLO

if(torch.cuda.is_available()):
    torch.cuda.set_device(0)

model = YOLO('yolov8x.pt')

Downloading https://github.com/ultralytics/assets/releases/download/v8.2.0/yolov8x.pt to 'yolov8x.pt'...


100%|██████████| 131M/131M [00:01<00:00, 95.6MB/s]


In [None]:
import cv2
def getPersonCountVideo(video_path):
    person_count = []
    cap = cv2.VideoCapture(video_path)
    framecount = 0
    while(cap.isOpened()):
        # Convert the frame received from OpenCV to a MediaPipe’s Image object.
        ret, frame = cap.read()
        if ret == True:
            if (framecount % 24 == 0):
                person_count.append(getPersonCount(frame))
            framecount += 1
        else:
            break
    cap.release()

    maxcount = 0
    # print(person_count)
    if (len(person_count) > 0):
        maxcount = max(person_count)
    return maxcount

def getPersonCount(image_path):
    objects = []
    results = model.predict(image_path, classes=0, imgsz=224, conf=0.5, verbose=False)
    for r in results:
        boxes = r.boxes
        for box in boxes:
            objects.append(model.names[int(box.cls)])
            #print(f'{model.names[int(box.cls)]}: {float(box.conf)}')
    return objects.count('person')

### Test on a single image

In [None]:
img_path = "/content/drive/MyDrive/algo-film/frames/punch-drunk-love-feature/Punch-Drunk.Love.2002.1080p.BluRay.x264.AAC5.1-[YTS.MX]-Scene-077/000000026.png" #@param {type:"string"}
print(getPersonCount(img_path))

2


### Test on a video

In [None]:
vid_path = "/content/drive/MyDrive/algo-film/clips/punch-drunk-love-feature/Punch-Drunk.Love.2002.1080p.BluRay.x264.AAC5.1-[YTS.MX]-Scene-077.mp4" #@param {type:"string"}
pc = getPersonCountVideo(vid_path)
print(pc)

2


In [None]:
import json
import os
import glob
import cv2

json_path = "/content/drive/MyDrive/future-of-tv/_clips.json" #@param {type:"string"}
filter = ""  #@param {type:"string"}

keep_videos = []

if os.path.isfile(json_path):
    print('loading json...')
    f = open(json_path)
    json_data = json.load(f)

updated = False
for i,v in enumerate(json_data.items()):
    if("person_count" not in json_data[v[0]].keys()):
        if (i % 250 == 0) and updated:
            print("Saving files...")
            with open(json_path, 'w') as f:
                json.dump(json_data, f)

        if filter in v[0]:
            json_data[v[0]]["person_count"] = getPersonCountVideo(v[1]["path"])
            updated = True
            print(f'{v[0]}: {json_data[v[0]]["person_count"]}')
    else:
        print(f'{v[0]} previously processed, skipping classification')

with open(json_path, 'w') as f:
  json.dump(json_data, f)


[1;30;43mStreaming output truncated to the last 5000 lines.[0m
hannibal.s03e03.bdrip.x264-reward-Scene-315.mp4: 0
hannibal.s03e03.bdrip.x264-reward-Scene-316.mp4: 0
hannibal.s03e03.bdrip.x264-reward-Scene-317.mp4: 0
hannibal.s03e03.bdrip.x264-reward-Scene-318.mp4: 0
hannibal.s03e03.bdrip.x264-reward-Scene-319.mp4: 0
hannibal.s03e03.bdrip.x264-reward-Scene-320.mp4: 0
hannibal.s03e03.bdrip.x264-reward-Scene-321.mp4: 0
hannibal.s03e03.bdrip.x264-reward-Scene-322.mp4: 1
hannibal.s03e03.bdrip.x264-reward-Scene-323.mp4: 1
hannibal.s03e03.bdrip.x264-reward-Scene-324.mp4: 0
hannibal.s03e03.bdrip.x264-reward-Scene-325.mp4: 1
hannibal.s03e03.bdrip.x264-reward-Scene-326.mp4: 2
hannibal.s03e03.bdrip.x264-reward-Scene-327.mp4: 0
hannibal.s03e03.bdrip.x264-reward-Scene-328.mp4: 1
hannibal.s03e03.bdrip.x264-reward-Scene-329.mp4: 0
hannibal.s03e03.bdrip.x264-reward-Scene-330.mp4: 2
hannibal.s03e03.bdrip.x264-reward-Scene-331.mp4: 2
hannibal.s03e03.bdrip.x264-reward-Scene-332.mp4: 0
hannibal.s03e03.b

In [None]:
with open(json_path, 'w') as f:
  json.dump(json_data, f)

## Pose Estimation

Now we can begin to create our pose skeletons for each clip.


### Preparation

Let's start with installing MediaPipe.


In [None]:
!pip install -q mediapipe==0.10.0

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m33.9/33.9 MB[0m [31m28.1 MB/s[0m eta [36m0:00:00[0m
[?25h

Then download an off-the-shelf model bundle. Check out the [MediaPipe documentation](https://developers.google.com/mediapipe/solutions/vision/pose_landmarker#models) for more information about this model bundle.

In [None]:
!wget -O pose_landmarker.task -q https://storage.googleapis.com/mediapipe-models/pose_landmarker/pose_landmarker_heavy/float16/1/pose_landmarker_heavy.task

### Utility Functions

In [None]:
import math
from mediapipe import solutions
from mediapipe.framework.formats import landmark_pb2
import numpy as np
import cv2
import os
from datetime import datetime

def create_tmp_folder():
    now = datetime.now()
    date_time = now.strftime("%m%d%Y%H%M%S")

    #create tmp folder to save files in
    tmp = "tmp-"+date_time
    tmp_folder = os.path.join('/content',tmp)

    if not os.path.exists(tmp_folder):
        os.makedirs(tmp_folder)

    return tmp_folder

def get_landmarks(video_path, generate_video_path=False):
    cap = cv2.VideoCapture(video_path)
    fps = cap.get(cv2.CAP_PROP_FPS)
    width = cap.get(cv2.CAP_PROP_FRAME_WIDTH)
    height = cap.get(cv2.CAP_PROP_FRAME_HEIGHT)
    framecount = 0
    json_landmarks = {}
    landmark_keypoints = []

    if generate_video_path:
        tmp = create_tmp_folder()

    while(cap.isOpened()):
        # Convert the frame received from OpenCV to a MediaPipe’s Image object.
        ret, frame = cap.read()

        if ret == True:
            frame_timestamp_ms = int((framecount/math.ceil(fps))*1000)
            mp_image = mp.Image(image_format=mp.ImageFormat.SRGB, data=frame)

            pose_landmarker_result = landmarker.detect_for_video(mp_image, frame_timestamp_ms)
            # print(pose_landmarker_result.pose_landmarks)
            if generate_video_path:
                annotated_image = draw_landmarks_on_image(frame,pose_landmarker_result)
                cv2.imwrite(os.path.join(tmp,str(framecount).zfill(9)+".png"), annotated_image)

            # print(len(pose_landmarker_result.pose_landmarks))
            if(len(pose_landmarker_result.pose_landmarks) > 0):
                json_landmarks[framecount] = {}

                for idx in range(len(pose_landmarker_result.pose_landmarks)):
                    results = pose_landmarker_result.pose_landmarks[idx]
                    json_landmarks[framecount][idx] = {}
                    keypoints = []
                    for mark, data_point in zip(solutions.pose.PoseLandmark, results):
                        json_landmarks[framecount][idx][mark.value] = dict(
                            landmark = mark.name,
                            x = data_point.x,
                            y = data_point.y,
                        )
                        keypoints.append([data_point.x,data_point.y])
                landmark_keypoints.append(keypoints)
            else:
                json_landmarks[framecount] = {}
                landmark_keypoints.append([])
            framecount += 1


        else:
            break

    cap.release()

    if generate_video_path:
        basename = os.path.basename(video_path)
        output_path = os.path.join(tmp,basename)
        print(output_path)
        !ffmpeg -r 24 -i {tmp}/%09d.png -r 24 -vcodec libx264 -pix_fmt yuv420p {output_path}
        # !rm -r "{tmp}"

    return json_landmarks, landmark_keypoints

def draw_landmarks_on_image(rgb_image, detection_result):
  pose_landmarks_list = detection_result.pose_landmarks
  annotated_image = np.copy(rgb_image)

  # Loop through the detected poses to visualize.
  for idx in range(len(pose_landmarks_list)):
    pose_landmarks = pose_landmarks_list[idx]

    # Draw the pose landmarks.
    pose_landmarks_proto = landmark_pb2.NormalizedLandmarkList()
    pose_landmarks_proto.landmark.extend([
      landmark_pb2.NormalizedLandmark(x=landmark.x, y=landmark.y, z=landmark.z) for landmark in pose_landmarks
    ])
    solutions.drawing_utils.draw_landmarks(
      annotated_image,
      pose_landmarks_proto,
      solutions.pose.POSE_CONNECTIONS,
      solutions.drawing_styles.get_default_pose_landmarks_style())
  return annotated_image

ModuleNotFoundError: No module named 'mediapipe'

### Process test video


In [None]:
import mediapipe as mp
import json

video_path = "/content/drive/MyDrive/algo-film/clips/night-hunter/night-hunter-Scene-066.mp4" #@param {type:"string"}
nposes = 1 #@param {type:"integer"}

BaseOptions = mp.tasks.BaseOptions
PoseLandmarker = mp.tasks.vision.PoseLandmarker
PoseLandmarkerOptions = mp.tasks.vision.PoseLandmarkerOptions
VisionRunningMode = mp.tasks.vision.RunningMode

# Create a pose landmarker instance with the video mode:
options = PoseLandmarkerOptions(
    base_options=BaseOptions(model_asset_path='pose_landmarker.task'),
    running_mode=VisionRunningMode.VIDEO,
    num_poses = nposes)

with PoseLandmarker.create_from_options(options) as landmarker:
    json, xy = get_landmarks(video_path, True)
    print(json)
    print(xy)

/content/tmp-04022024181729/night-hunter-Scene-066.mp4
ffmpeg version 4.4.2-0ubuntu0.22.04.1 Copyright (c) 2000-2021 the FFmpeg developers
  built with gcc 11 (Ubuntu 11.2.0-19ubuntu1)
  configuration: --prefix=/usr --extra-version=0ubuntu0.22.04.1 --toolchain=hardened --libdir=/usr/lib/x86_64-linux-gnu --incdir=/usr/include/x86_64-linux-gnu --arch=amd64 --enable-gpl --disable-stripping --enable-gnutls --enable-ladspa --enable-libaom --enable-libass --enable-libbluray --enable-libbs2b --enable-libcaca --enable-libcdio --enable-libcodec2 --enable-libdav1d --enable-libflite --enable-libfontconfig --enable-libfreetype --enable-libfribidi --enable-libgme --enable-libgsm --enable-libjack --enable-libmp3lame --enable-libmysofa --enable-libopenjpeg --enable-libopenmpt --enable-libopus --enable-libpulse --enable-librabbitmq --enable-librubberband --enable-libshine --enable-libsnappy --enable-libsoxr --enable-libspeex --enable-libsrt --enable-libssh --enable-libtheora --enable-libtwolame --enab

Find the output folder (in your `/content` folder, beginning with a `tmp` in the folder name). Open it and grab the video path from inside and paste it below to see your output video.

In [None]:
from ipywidgets import Video

pose_video_path = "/content/tmp-04022024181729/night-hunter-Scene-066.mp4" #@param {type:"string"}

Video.from_file(pose_video_path,width=500,play=True)

Video(value=b'\x00\x00\x00 ftypisom\x00\x00\x02\x00isomiso2avc1mp41\x00\x00\x00\x08free\x00\x17,\xdbmdat\x00\x…

### Save Pose Detection to JSON

First we need to collect our video files. I recommend starting by sorting our videos by the number of people detected in each scene. This helps ensure our pose estimations are more accurate.

In [None]:
import os
import json

json_path = "/content/drive/MyDrive/future-of-tv/_clips.json" #@param {type:"string"}
file_filter = ""  #@param {type:"string"}
nperson_filter = 0 #@param {type:"integer"}

selection = {}

if os.path.isfile(json_path):
    print('loading json...')
    f = open(json_path)
    json_data = json.load(f)
    print('loaded.')

for v in json_data.items():
    if("person_count" in v[1].keys()):
        if file_filter in str(v[0]):
            #print(f'{v[0]} matched in filter: {filter_word}')
            if (v[1]["person_count"] == nperson_filter):
                selection[v[0]] = json_data[v[0]]

print(f'Number of clips selected: {len(selection)}')


loading json...
loaded.
Number of clips selected: 3124


In [None]:
selection.keys()

dict_keys(['Hannibal.S02E08.BDRip.x264-DEMAND-Scene-091.mp4', 'Hannibal.S02E08.BDRip.x264-DEMAND-Scene-101.mp4', 'Hannibal.S02E08.BDRip.x264-DEMAND-Scene-109.mp4', 'Hannibal.S02E08.BDRip.x264-DEMAND-Scene-112.mp4', 'Hannibal.S02E08.BDRip.x264-DEMAND-Scene-113.mp4', 'Hannibal.S02E08.BDRip.x264-DEMAND-Scene-114.mp4', 'Hannibal.S02E08.BDRip.x264-DEMAND-Scene-118.mp4', 'Hannibal.S02E08.BDRip.x264-DEMAND-Scene-119.mp4', 'Hannibal.S02E08.BDRip.x264-DEMAND-Scene-120.mp4', 'Hannibal.S02E08.BDRip.x264-DEMAND-Scene-129.mp4', 'Hannibal.S02E08.BDRip.x264-DEMAND-Scene-133.mp4', 'Hannibal.S02E08.BDRip.x264-DEMAND-Scene-145.mp4', 'Hannibal.S02E08.BDRip.x264-DEMAND-Scene-146.mp4', 'Hannibal.S02E08.BDRip.x264-DEMAND-Scene-147.mp4', 'Hannibal.S02E08.BDRip.x264-DEMAND-Scene-149.mp4', 'Hannibal.S02E08.BDRip.x264-DEMAND-Scene-176.mp4', 'Hannibal.S02E08.BDRip.x264-DEMAND-Scene-178.mp4', 'Hannibal.S02E08.BDRip.x264-DEMAND-Scene-197.mp4', 'Hannibal.S02E08.BDRip.x264-DEMAND-Scene-198.mp4', 'Hannibal.S02E08.BDR

In [None]:
import mediapipe as mp
import json
from mediapipe import solutions
from mediapipe.framework.formats import landmark_pb2

BaseOptions = mp.tasks.BaseOptions
PoseLandmarker = mp.tasks.vision.PoseLandmarker
PoseLandmarkerOptions = mp.tasks.vision.PoseLandmarkerOptions
VisionRunningMode = mp.tasks.vision.RunningMode

# Create a pose landmarker instance with the video mode:
options = PoseLandmarkerOptions(
    base_options=BaseOptions(model_asset_path='pose_landmarker.task'),
    running_mode=VisionRunningMode.VIDEO,
    min_pose_detection_confidence = .25,
    num_poses = nperson_filter)

for i,v in enumerate(selection.items()):
    with PoseLandmarker.create_from_options(options) as landmarker:
        if("landmarks_xy" not in json_data[v[0]].keys()):
            if i % 250 == 0:
                print("Saving files...")
                with open(json_path, 'w') as f:
                    json.dump(json_data, f)

            json_xy, xy = get_landmarks(v[1]["path"])
            # json_data[v[0]]["landmarks"] = json_xy
            json_data[v[0]]["landmarks_xy"] = xy
            print(f'{v[0]}: processed')
        else:
            print(f'{v[0]} previously processed, skipping classification')

with open(json_path, 'w') as f:
  json.dump(json_data, f)

night-hunter-Scene-002.mp4 previously processed, skipping classification
night-hunter-Scene-006.mp4 previously processed, skipping classification
night-hunter-Scene-018.mp4 previously processed, skipping classification
night-hunter-Scene-020.mp4 previously processed, skipping classification
night-hunter-Scene-022.mp4 previously processed, skipping classification
night-hunter-Scene-024.mp4 previously processed, skipping classification
night-hunter-Scene-025.mp4 previously processed, skipping classification
night-hunter-Scene-033.mp4 previously processed, skipping classification
night-hunter-Scene-037.mp4 previously processed, skipping classification
night-hunter-Scene-041.mp4 previously processed, skipping classification
night-hunter-Scene-056.mp4 previously processed, skipping classification
night-hunter-Scene-058.mp4 previously processed, skipping classification
night-hunter-Scene-077.mp4 previously processed, skipping classification
night-hunter-Scene-079.mp4 previously processed, sk

In [None]:
print(selection['night-hunter-Scene-002.mp4'])

{'path': '/content/drive/MyDrive/algo-film/clips/night-hunter/night-hunter-Scene-002.mp4', 'dimensions': [958, 576], 'duration': '124.1660', 'fps': '23.98', 'shottype_confidence': 0.5485696196556091, 'shottype': 'LS', 'person_count': 1, 'landmarks_xy': [[], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [[0.6595587134361267, 0.02911660075187683], [0.6629541516304016, 0.02017238736152649], [0.6644803881645203, 0.02037709951400757], [0.6661616563796997, 0.020388513803482056], [0.6563248634338379, 0.018235385417938232], [0.6526743769645691, 0.017003655433654785], [0.6500157117843628, 0.01581031084060669], [0.6646510362625122, 0.022447258234024048], [0.6426970362663269, 0.015812397003173828], [0.6581703424453735, 0.0405864417552948], [0.65193659

In [None]:
import json

with open(json_path, 'w') as f:
  json.dump(json_data, f)

### Select based on people count
Now lets grab all of the clips based on the shots we want. It might be good to grab full shots or wide shots with the full body pose estimation.

In [None]:
import os
import json

json_path = "/content/drive/MyDrive/future-of-tv/_clips.json" #@param {type:"string"}
file_filter = ""  #@param {type:"string"}
nperson_filter = 0 #@param {type:"integer"}

selection = {}

if os.path.isfile(json_path):
    print('loading json...')
    f = open(json_path)
    json_data = json.load(f)
    print('loaded.')

for v in json_data.items():
    if "person_count" in v[1].keys():
        if file_filter in str(v[0]):
            print(f'{v[0]} matched in filter: {file_filter}')
            if (v[1]["person_count"] == nperson_filter):
                selection[v[0]] = json_data[v[0]]

print(f'Number of clips selected: {len(selection)}')


[1;30;43mStreaming output truncated to the last 5000 lines.[0m
hannibal.s03e03.bdrip.x264-reward-Scene-296.mp4 matched in filter: 
hannibal.s03e03.bdrip.x264-reward-Scene-297.mp4 matched in filter: 
hannibal.s03e03.bdrip.x264-reward-Scene-298.mp4 matched in filter: 
hannibal.s03e03.bdrip.x264-reward-Scene-299.mp4 matched in filter: 
hannibal.s03e03.bdrip.x264-reward-Scene-300.mp4 matched in filter: 
hannibal.s03e03.bdrip.x264-reward-Scene-301.mp4 matched in filter: 
hannibal.s03e03.bdrip.x264-reward-Scene-302.mp4 matched in filter: 
hannibal.s03e03.bdrip.x264-reward-Scene-303.mp4 matched in filter: 
hannibal.s03e03.bdrip.x264-reward-Scene-304.mp4 matched in filter: 
hannibal.s03e03.bdrip.x264-reward-Scene-305.mp4 matched in filter: 
hannibal.s03e03.bdrip.x264-reward-Scene-306.mp4 matched in filter: 
hannibal.s03e03.bdrip.x264-reward-Scene-307.mp4 matched in filter: 
hannibal.s03e03.bdrip.x264-reward-Scene-308.mp4 matched in filter: 
hannibal.s03e03.bdrip.x264-reward-Scene-309.mp4 mat

### Save body directions

Let’s calculate the direction of our body in the clip.
`threshold` sets the minimum amount of movement we want to test for. `0.1` equals 10% of the clip’s height or width.

In [None]:
threshold = 0.1 #@param {type:"number"}

def h_direction(dist):
    dir = ""
    if ((dist < threshold) and (dist > -threshold)):
        dir = "neutral"
    if (dist > threshold):
        dir = "right"
    if (dist < -threshold):
        dir = "left"

    return dir

def v_direction(dist):
    dir = ""
    if ((dist < threshold) and (dist > -threshold)):
        dir = "neutral"
    if (dist > threshold):
        dir = "down"
    if (dist < -threshold):
        dir = "up"
    return dir

In [None]:
for i in selection.keys():
    if (len(selection[i]["landmarks_xy"]) > 2):
        xy = np.asarray(selection[i]["landmarks_xy"], dtype=object)
        nz = np.nonzero(xy)
        if (len(nz[0]) > 2):

            dist = np.subtract(selection[i]["landmarks_xy"][nz[0][-1]], selection[i]["landmarks_xy"][nz[0][0]])
            average = np.mean(dist,axis=0)
            json_data[i]["body_direction_horizontal"] = h_direction(average[0])
            json_data[i]["body_direction_vertical"] = v_direction(average[1])
            print(f'{i}: {json_data[i]["body_direction_horizontal"]}, {json_data[i]["body_direction_vertical"]}')
        else:
            print(f'{i}: not enough keypoint data, skipping')
    else:
        print(f'{i}: not enough keypoint data, skipping')

with open(json_path, 'w') as f:
  json.dump(json_data, f)

### Find pose endpoints and matches

In [None]:
import os
import json

json_path = "/content/drive/MyDrive/algo-film/_video_clips.json" #@param {type:"string"}
file_filter = ""  #@param {type:"string"}
nperson_filter = 1 #@param {type:"integer"}

shottype = "LS" #@param ["LS", "FS", "MS", "CS", "ECS"]
min_confidence = 3.0 #@param {type:"number"}
selection = {}

if os.path.isfile(json_path):
    print('loading json...')
    f = open(json_path)
    json_data = json.load(f)
    print('loaded.')

for v in json_data.items():
    if(("person_count" in v[1].keys()) and ("shottype" in v[1].keys()) and ("landmarks_xy" in v[1].keys())):
        if file_filter in str(v[0]):
            #print(f'{v[0]} matched in filter: {filter_word}')
            if (v[1]["shottype"] == shottype) and (float(v[1]["shottype_confidence"]) >= float(min_confidence) and (v[1]["person_count"] == nperson_filter)):
                selection[v[0]] = json_data[v[0]]

print(f'Number of clips selected: {len(selection)}')


loading json...
loaded.
Number of clips selected: 69


In [None]:
selection.keys()

In [None]:
from ipywidgets import Video

filename = "Punch-Drunk.Love.2002.1080p.BluRay.x264.AAC5.1-[YTS.MX]-Scene-012.mp4" #@param {type:"string"}

video_path = selection[filename]["path"]
Video.from_file(video_path,width=320,play=True)

Video(value=b'\x00\x00\x00 ftypisom\x00\x00\x02\x00isomiso2avc1mp41\x00\x00\x00\x08free\x00@\xe1\xc2mdat\x00\x…

In [None]:
def remove_from_lists(index,sk):
    sk.pop(index)
    f = np.delete(firsts, index, axis=0)
    l = np.delete(lasts, index, axis=0)

    return sk,f,l

def get_closest(vid):
    distances = np.sqrt(np.sum((np.subtract(firsts[:], vid))**2, axis=-1))
    d = np.sum(distances, axis=1)
    return np.argmin(d)


Set `max_videos` to `-1` if you want to use all of the clips.

In [None]:
import numpy as np

start_video = "Punch-Drunk.Love.2002.1080p.BluRay.x264.AAC5.1-[YTS.MX]-Scene-012.mp4" #@param {type:"string"}
max_videos = -1 #@param {type:"integer"}

firsts = []
lasts = []
sorted_keys = []
count = 0
for i in sorted(selection.keys()):
    if (len(selection[i]["landmarks_xy"]) > 2):
        if (len(selection[i]["landmarks_xy"][0]) == 33) and (len(selection[i]["landmarks_xy"][-1]) == 33):
            sorted_keys.append(i)
            firsts.append(np.asarray(selection[i]["landmarks_xy"][0]))
            lasts.append(np.asarray(selection[i]["landmarks_xy"][-1]))

if max_videos == -1:
    max_v = len(sorted_keys)
else:
    max_v = int(max_videos)

lasts = np.asarray(lasts)
firsts = np.asarray(firsts)

# print(len(sorted_keys))

video_order = []
start_index = sorted_keys.index(start_video)

video_order.append(start_video)

counter = 0
while ((counter+1) < max_v):
    kps = lasts[start_index]
    sorted_keys, firsts, lasts = remove_from_lists(start_index, sorted_keys)
    next_video = get_closest(kps)
    video_order.append(sorted_keys[next_video])
    start_index = next_video
    print(f'videos remaining: {len(sorted_keys)}')
    counter+=1


videos remaining: 14
videos remaining: 13
videos remaining: 12
videos remaining: 11
videos remaining: 10
videos remaining: 9
videos remaining: 8
videos remaining: 7
videos remaining: 6
videos remaining: 5
videos remaining: 4
videos remaining: 3
videos remaining: 2
videos remaining: 1


In [None]:
video_order

['Punch-Drunk.Love.2002.1080p.BluRay.x264.AAC5.1-[YTS.MX]-Scene-012.mp4',
 'Pitch-Black-2000-Scene-0521.mp4',
 'Pitch-Black-2000-Scene-0244.mp4',
 'Pitch-Black-2000-Scene-0766.mp4',
 'Pitch-Black-2000-Scene-0789.mp4',
 'Pitch-Black-2000-Scene-1368.mp4',
 'night-hunter-Scene-033.mp4',
 'night-hunter-Scene-056.mp4',
 'Punch-Drunk.Love.2002.1080p.BluRay.x264.AAC5.1-[YTS.MX]-Scene-002.mp4',
 'Pitch-Black-2000-Scene-0515.mp4',
 'Punch-Drunk.Love.2002.1080p.BluRay.x264.AAC5.1-[YTS.MX]-Scene-033.mp4',
 'Pitch-Black-2000-Scene-0256.mp4',
 'Pitch-Black-2000-Scene-0508.mp4',
 'Pitch-Black-2000-Scene-1367.mp4',
 'Pitch-Black-2000-Scene-1399.mp4']

In [None]:
import os
from datetime import datetime

def create_tmp_folder():
    now = datetime.now()
    date_time = now.strftime("%m%d%Y%H%M%S")

    #create tmp folder to save files in
    tmp = "tmp-"+date_time
    tmp_folder = os.path.join('/content',tmp)

    if not os.path.exists(tmp_folder):
        os.makedirs(tmp_folder)

    return tmp_folder

In [None]:
tmp_folder = create_tmp_folder()
output_path = "/content/drive/MyDrive/pose-video-match-cut.mp4" #@param {type:"string"}
concat_path = os.path.join(tmp_folder,"concat.txt")

dimensions = [v[1]["dimensions"] for v in selection.items()]
widths = [w[0] for w in dimensions]
heights = [h[1] for h in dimensions]
max_width = max(widths)
max_height = max(heights)
min_width = min(widths)
min_height = min(heights)
min_ratio = min_width/min_height
max_ratio = max_width/max_height

vids = []

min_w_ratio = int(min_height * min_ratio)

for v in video_order:
    basename = v
    base_path = selection[v]["path"]
    new_path = os.path.join(tmp_folder,basename)

    w = selection[v]["dimensions"][0]
    h = selection[v]["dimensions"][1]
    # print(basename)
    # print(f'ratio: {w/h}')
    # print(f'width: {w}')
    # print(f'height: {h}')

    if ((w/h) < min_ratio):
        !ffmpeg -hide_banner -loglevel error -i "$base_path" -filter_complex "fps=24,scale={min_width}:-1,crop={min_width}:{min_height}" -vcodec libx264 -c:a aac -crf 13 "$new_path" -y
    else:
        !ffmpeg -hide_banner -loglevel error -i "$base_path" -filter_complex "fps=24,scale=-1:{min_height},crop={min_width}:{min_height}" -vcodec libx264 -c:a aac -crf 13 "$new_path" -y

    vids.append("file '" + new_path +"'")

#write to text file for concatenation
with open(concat_path, 'w') as f:
    f.write('\n'.join(vids))

# render randomly arranged video
!ffmpeg -f concat -safe 0 -i "{concat_path}" -c copy "{output_path}" -y

# cleanup, remove temp folder
!rm -r "{tmp_folder}"

## Face Keypoint Estimation

### Download model and utilities

In [None]:
!wget -q -O detector.tflite -q https://storage.googleapis.com/mediapipe-models/face_detector/blaze_face_short_range/float16/1/blaze_face_short_range.tflite

In [None]:
from typing import Tuple, Union
import math
import cv2
import numpy as np

def create_tmp_folder():
    now = datetime.now()
    date_time = now.strftime("%m%d%Y%H%M%S")

    #create tmp folder to save files in
    tmp = "tmp-"+date_time
    tmp_folder = os.path.join('/content',tmp)

    if not os.path.exists(tmp_folder):
        os.makedirs(tmp_folder)

    return tmp_folder

def get_face_landmarks(video_path, generate_video_path=False):
    cap = cv2.VideoCapture(video_path)
    fps = cap.get(cv2.CAP_PROP_FPS)
    width = cap.get(cv2.CAP_PROP_FRAME_WIDTH)
    height = cap.get(cv2.CAP_PROP_FRAME_HEIGHT)
    framecount = 0
    frame_timestamp_ms = 0
    json_landmarks = {}
    landmark_keypoints = []

    if generate_video_path:
        tmp = create_tmp_folder()

    while(cap.isOpened()):
        # Convert the frame received from OpenCV to a MediaPipe’s Image object.
        ret, frame = cap.read()

        if ret == True:
            json_landmarks[framecount] = {}
            frame_timestamp_ms = int((framecount/math.ceil(fps))*1000)
            mp_image = mp.Image(image_format=mp.ImageFormat.SRGB, data=frame)

            face_detector_result = detector.detect_for_video(mp_image, frame_timestamp_ms)
            # print(face_detector_result.detections)
            if generate_video_path:
                annotated_image = visualize(frame, face_detector_result)
                cv2.imwrite(os.path.join(tmp,str(framecount).zfill(9)+".png"), annotated_image)

            idx = 0
            for detection in face_detector_result.detections:
                # print(detection.keypoints)

                json_landmarks[framecount][idx] = {}
                keypoints = []
                for mark, data_point in zip(solutions.face_detection.FaceKeyPoint, detection.keypoints):
                    json_landmarks[framecount][idx][mark.value] = dict(
                        landmark = mark.name,
                        x = data_point.x,
                        y = data_point.y,
                    )

                    keypoints.append([data_point.x,data_point.y])

                landmark_keypoints.append(keypoints)
                idx += 1

            framecount += 1
        else:
            break

    cap.release()
    if generate_video_path:
        basename = os.path.basename(video_path)
        output_path = os.path.join(tmp,basename)
        print(output_path)
        !ffmpeg -r 24 -i {tmp}/%09d.png -r 24 -vcodec libx264 -pix_fmt yuv420p {output_path}
        # !rm -r "{tmp}"

    return json_landmarks, landmark_keypoints

MARGIN = 10  # pixels
ROW_SIZE = 10  # pixels
FONT_SIZE = 1
FONT_THICKNESS = 1
TEXT_COLOR = (255, 0, 0)  # red

def _normalized_to_pixel_coordinates(
    normalized_x: float, normalized_y: float, image_width: int,
    image_height: int) -> Union[None, Tuple[int, int]]:
  """Converts normalized value pair to pixel coordinates."""

  # Checks if the float value is between 0 and 1.
  def is_valid_normalized_value(value: float) -> bool:
    return (value > 0 or math.isclose(0, value)) and (value < 1 or
                                                      math.isclose(1, value))

  if not (is_valid_normalized_value(normalized_x) and
          is_valid_normalized_value(normalized_y)):
    # TODO: Draw coordinates even if it's outside of the image bounds.
    return None
  x_px = min(math.floor(normalized_x * image_width), image_width - 1)
  y_px = min(math.floor(normalized_y * image_height), image_height - 1)
  return x_px, y_px


def visualize(
    image,
    detection_result
) -> np.ndarray:
  """Draws bounding boxes and keypoints on the input image and return it.
  Args:
    image: The input RGB image.
    detection_result: The list of all "Detection" entities to be visualize.
  Returns:
    Image with bounding boxes.
  """
  annotated_image = image.copy()
  height, width, _ = image.shape

  for detection in detection_result.detections:
    # Draw bounding_box
    bbox = detection.bounding_box
    start_point = bbox.origin_x, bbox.origin_y
    end_point = bbox.origin_x + bbox.width, bbox.origin_y + bbox.height
    cv2.rectangle(annotated_image, start_point, end_point, TEXT_COLOR, 3)

    # Draw keypoints
    for keypoint in detection.keypoints:
      keypoint_px = _normalized_to_pixel_coordinates(keypoint.x, keypoint.y,
                                                     width, height)
      color, thickness, radius = (0, 255, 0), 2, 2
      cv2.circle(annotated_image, keypoint_px, thickness, color, radius)

    # Draw label and score
    category = detection.categories[0]
    category_name = category.category_name
    category_name = '' if category_name is None else category_name
    probability = round(category.score, 2)
    result_text = category_name + ' (' + str(probability) + ')'
    text_location = (MARGIN + bbox.origin_x,
                     MARGIN + ROW_SIZE + bbox.origin_y)
    cv2.putText(annotated_image, result_text, text_location, cv2.FONT_HERSHEY_PLAIN,
                FONT_SIZE, TEXT_COLOR, FONT_THICKNESS)

  return annotated_image

In [None]:
import os
import json

json_path = "/content/drive/MyDrive/algo-film/_video_clips.json" #@param {type:"string"}
file_filter = ""  #@param {type:"string"}
nperson_filter = 1 #@param {type:"integer"}

shottype = "ECS" #@param ["LS", "FS", "MS", "CS", "ECS"]
min_confidence = 8.0 #@param {type:"number"}
selection = {}

if os.path.isfile(json_path):
    print('loading json...')
    f = open(json_path)
    json_data = json.load(f)
    print('loaded.')

for v in json_data.items():
    if(("person_count" in v[1].keys()) and ("shottype" in v[1].keys())):
        if file_filter in str(v[0]):
            #print(f'{v[0]} matched in filter: {filter_word}')
            if (v[1]["shottype"] == shottype) and (float(v[1]["shottype_confidence"]) >= float(min_confidence) and (v[1]["person_count"] == nperson_filter)):
                selection[v[0]] = json_data[v[0]]

print(f'Number of clips selected: {len(selection)}')


loading json...
loaded.
Number of clips selected: 329


In [None]:
selection.keys()

### Get face keypoints for single video

In [None]:
from ipywidgets import Video

video_key = "Punch-Drunk.Love.2002.1080p.BluRay.x264.AAC5.1-[YTS.MX]-Scene-106.mp4" #@param {type:"string}"

video_path = selection[video_key]["path"]
Video.from_file(video_path,width=500,play=True)

Video(value=b'\x00\x00\x00 ftypisom\x00\x00\x02\x00isomiso2avc1mp41\x00\x00\x00\x08free\x00v\xbb\xb6mdat\x00\x…

In [None]:
import mediapipe as mp
from mediapipe.tasks import python
from mediapipe.tasks.python import vision
from mediapipe import solutions
import json
import math
import cv2
import os
from datetime import datetime

import mediapipe as mp

BaseOptions = mp.tasks.BaseOptions
FaceDetector = mp.tasks.vision.FaceDetector
FaceDetectorOptions = mp.tasks.vision.FaceDetectorOptions
VisionRunningMode = mp.tasks.vision.RunningMode

# Create a face detector instance with the video mode:
options = FaceDetectorOptions(
    base_options=BaseOptions(model_asset_path='/content/detector.tflite'),
    running_mode=VisionRunningMode.VIDEO)

with FaceDetector.create_from_options(options) as detector:
    json, xy = get_face_landmarks(video_path, True)
    print(json)
    print(xy)

In [None]:
from ipywidgets import Video

pose_video_path = "/content/tmp-08292023033233/Punch-Drunk.Love.2002.1080p.BluRay.x264.AAC5.1-[YTS.MX]-Scene-106.mp4" #@param {type:"string"}

Video.from_file(pose_video_path,width=500,play=True)

Video(value=b'\x00\x00\x00 ftypisom\x00\x00\x02\x00isomiso2avc1mp41\x00\x00\x00\x08free\x00^H\xcbmdat\x00\x00\…

### Save Face keypoints to JSON

In [None]:
import mediapipe as mp
from mediapipe.tasks import python
from mediapipe.tasks.python import vision
from mediapipe import solutions
import json
import math
import cv2

import mediapipe as mp

BaseOptions = mp.tasks.BaseOptions
FaceDetector = mp.tasks.vision.FaceDetector
FaceDetectorOptions = mp.tasks.vision.FaceDetectorOptions
VisionRunningMode = mp.tasks.vision.RunningMode

# Create a face detector instance with the video mode:
options = FaceDetectorOptions(
    base_options=BaseOptions(model_asset_path='/content/detector.tflite'),
    running_mode=VisionRunningMode.VIDEO)

for i,v in enumerate(selection.items()):
    with FaceDetector.create_from_options(options) as detector:
        if("face_landmarks_xy" not in json_data[v[0]].keys()):
            if i % 250 == 0:
                print("Saving files...")
                with open(json_path, 'w') as f:
                    json.dump(json_data, f)

            json_xy, xy = get_face_landmarks(v[1]["path"])
            # json_data[v[0]]["face_landmarks"] = json_xy
            json_data[v[0]]["face_landmarks_xy"] = xy
            print(f'{v[0]}: {json_data[v[0]]["face_landmarks_xy"]}')
        else:
            print(f'{v[0]} previously processed, skipping classification')

with open(json_path, 'w') as f:
  json.dump(json_data, f)

Saving files...
Punch-Drunk.Love.2002.1080p.BluRay.x264.AAC5.1-[YTS.MX]-Scene-001.mp4: [[[0.5181922912597656, 0.5528110861778259], [0.5724982619285583, 0.490211546421051], [0.5724231004714966, 0.572509229183197], [0.5763235688209534, 0.6458221077919006], [0.46750178933143616, 0.6393066048622131], [0.5873143672943115, 0.5024562478065491]], [[0.46009325981140137, 0.2009832262992859], [0.5397093892097473, 0.30052071809768677], [0.4940858781337738, 0.35595375299453735], [0.45938679575920105, 0.44973224401474], [0.3701145052909851, 0.1561795473098755], [0.5460216999053955, 0.372466504573822]]]
Punch-Drunk.Love.2002.1080p.BluRay.x264.AAC5.1-[YTS.MX]-Scene-013.mp4: []
Punch-Drunk.Love.2002.1080p.BluRay.x264.AAC5.1-[YTS.MX]-Scene-047.mp4: [[[0.48361194133758545, 0.3315630555152893], [0.5545117855072021, 0.3982083201408386], [0.49763330817222595, 0.46788161993026733], [0.4879087209701538, 0.5566942095756531], [0.44688552618026733, 0.3193516135215759], [0.6067560315132141, 0.4565737843513489]], 

### Find face endpoints and matches

In [None]:
import os
import json

json_path = "/content/drive/MyDrive/algo-film/_video_clips.json" #@param {type:"string"}
file_filter = ""  #@param {type:"string"}
nperson_filter = 1 #@param {type:"integer"}

shottype = "CS" #@param ["LS", "FS", "MS", "CS", "ECS"]
min_confidence = 3.0 #@param {type:"number"}
selection = {}

if os.path.isfile(json_path):
    print('loading json...')
    f = open(json_path)
    json_data = json.load(f)
    print('loaded.')

for v in json_data.items():
    if(("person_count" in v[1].keys()) and ("shottype" in v[1].keys()) and ("face_landmarks_xy" in v[1].keys())):
        if file_filter in str(v[0]):
            #print(f'{v[0]} matched in filter: {filter_word}')
            if (v[1]["shottype"] == shottype) and (float(v[1]["shottype_confidence"]) >= float(min_confidence) and (v[1]["person_count"] == nperson_filter)):
                selection[v[0]] = json_data[v[0]]

print(f'Number of clips selected: {len(selection)}')


loading json...
loaded.
Number of clips selected: 186


In [None]:
selection.keys()

dict_keys(['Punch-Drunk.Love.2002.1080p.BluRay.x264.AAC5.1-[YTS.MX]-Scene-001.mp4', 'Punch-Drunk.Love.2002.1080p.BluRay.x264.AAC5.1-[YTS.MX]-Scene-013.mp4', 'Punch-Drunk.Love.2002.1080p.BluRay.x264.AAC5.1-[YTS.MX]-Scene-047.mp4', 'Punch-Drunk.Love.2002.1080p.BluRay.x264.AAC5.1-[YTS.MX]-Scene-050.mp4', 'Punch-Drunk.Love.2002.1080p.BluRay.x264.AAC5.1-[YTS.MX]-Scene-062.mp4', 'Punch-Drunk.Love.2002.1080p.BluRay.x264.AAC5.1-[YTS.MX]-Scene-106.mp4', 'Punch-Drunk.Love.2002.1080p.BluRay.x264.AAC5.1-[YTS.MX]-Scene-131.mp4', 'Punch-Drunk.Love.2002.1080p.BluRay.x264.AAC5.1-[YTS.MX]-Scene-132.mp4', 'Punch-Drunk.Love.2002.1080p.BluRay.x264.AAC5.1-[YTS.MX]-Scene-133.mp4', 'Punch-Drunk.Love.2002.1080p.BluRay.x264.AAC5.1-[YTS.MX]-Scene-134.mp4', 'Punch-Drunk.Love.2002.1080p.BluRay.x264.AAC5.1-[YTS.MX]-Scene-135.mp4', 'Punch-Drunk.Love.2002.1080p.BluRay.x264.AAC5.1-[YTS.MX]-Scene-136.mp4', 'Punch-Drunk.Love.2002.1080p.BluRay.x264.AAC5.1-[YTS.MX]-Scene-138.mp4', 'Punch-Drunk.Love.2002.1080p.BluRay.x264

In [None]:
from ipywidgets import Video

pose_video_path = "Punch-Drunk.Love.2002.1080p.BluRay.x264.AAC5.1-[YTS.MX]-Scene-132.mp4" #@param {type:"string"}

Video.from_file(selection[pose_video_path]["path"],width=500,play=True)

Video(value=b'\x00\x00\x00 ftypisom\x00\x00\x02\x00isomiso2avc1mp41\x00\x00\x00\x08free\x00 \xbf\xf5mdat\x00\x…

In [None]:
def remove_from_lists(index,sk):
    sk.pop(index)
    f = np.delete(firsts, index, axis=0)
    l = np.delete(lasts, index, axis=0)

    return sk,f,l

def get_closest_face(vid):
    distances = np.sqrt(np.sum((np.subtract(firsts[:], vid))**2, axis=-1))
    d = np.sum(distances, axis=1)
    return np.argmin(d)


In [None]:
selection["Punch-Drunk.Love.2002.1080p.BluRay.x264.AAC5.1-[YTS.MX]-Scene-131.mp4"]["face_landmarks_xy"][-1]

In [None]:
import numpy as np

start_video = "Punch-Drunk.Love.2002.1080p.BluRay.x264.AAC5.1-[YTS.MX]-Scene-132.mp4" #@param {type:"string"}
max_videos = -1 #@param {type:"integer"}

firsts = []
lasts = []
sorted_keys = []
for i in sorted(selection.keys()):
    if (len(selection[i]["face_landmarks_xy"]) > 2):
        if (len(selection[i]["face_landmarks_xy"][0]) == 6) and (len(selection[i]["face_landmarks_xy"][-1]) == 6):
            sorted_keys.append(i)
            firsts.append(np.asarray(selection[i]["face_landmarks_xy"][0]))
            lasts.append(np.asarray(selection[i]["face_landmarks_xy"][-1]))
lasts = np.asarray(lasts)
firsts = np.asarray(firsts)

if max_videos == -1:
    max_v = len(sorted_keys)
else:
    max_v = int(max_videos)

video_order = []
start_index = sorted_keys.index(start_video)
#print(start_index)

video_order.append(start_video)

print(max_v)
counter = 0
while ((counter+1) < max_v):
    kps = lasts[start_index]
    sorted_keys, firsts, lasts = remove_from_lists(start_index, sorted_keys)
    next_video = get_closest_face(kps)
    video_order.append(sorted_keys[next_video])
    start_index = next_video
    print(f'videos remaining: {len(sorted_keys)}')
    counter+=1


239
videos remaining: 238
videos remaining: 237
videos remaining: 236
videos remaining: 235
videos remaining: 234
videos remaining: 233
videos remaining: 232
videos remaining: 231
videos remaining: 230
videos remaining: 229
videos remaining: 228
videos remaining: 227
videos remaining: 226
videos remaining: 225
videos remaining: 224
videos remaining: 223
videos remaining: 222
videos remaining: 221
videos remaining: 220
videos remaining: 219
videos remaining: 218
videos remaining: 217
videos remaining: 216
videos remaining: 215
videos remaining: 214
videos remaining: 213
videos remaining: 212
videos remaining: 211
videos remaining: 210
videos remaining: 209
videos remaining: 208
videos remaining: 207
videos remaining: 206
videos remaining: 205
videos remaining: 204
videos remaining: 203
videos remaining: 202
videos remaining: 201
videos remaining: 200
videos remaining: 199
videos remaining: 198
videos remaining: 197
videos remaining: 196
videos remaining: 195
videos remaining: 194
videos

In [None]:
video_order

['Punch-Drunk.Love.2002.1080p.BluRay.x264.AAC5.1-[YTS.MX]-Scene-132.mp4',
 'Punch-Drunk.Love.2002.1080p.BluRay.x264.AAC5.1-[YTS.MX]-Scene-140.mp4',
 'Pitch-Black-2000-Scene-1033.mp4',
 'Pitch-Black-2000-Scene-0772.mp4',
 'Pitch-Black-2000-Scene-0098.mp4',
 'Pitch-Black-2000-Scene-0895.mp4',
 'Pitch-Black-2000-Scene-1456.mp4',
 'Pitch-Black-2000-Scene-1448.mp4',
 'Pitch-Black-2000-Scene-1449.mp4',
 'Pitch-Black-2000-Scene-1450.mp4',
 'Pitch-Black-2000-Scene-1005.mp4',
 'Pitch-Black-2000-Scene-0102.mp4',
 'Pitch-Black-2000-Scene-1460.mp4',
 'Pitch-Black-2000-Scene-1100.mp4',
 'Pitch-Black-2000-Scene-0154.mp4',
 'Pitch-Black-2000-Scene-0464.mp4',
 'Pitch-Black-2000-Scene-0462.mp4',
 'Pitch-Black-2000-Scene-0986.mp4',
 'Pitch-Black-2000-Scene-1458.mp4',
 'Pitch-Black-2000-Scene-1193.mp4',
 'Pitch-Black-2000-Scene-0284.mp4',
 'Pitch-Black-2000-Scene-0279.mp4',
 'Pitch-Black-2000-Scene-1192.mp4',
 'Pitch-Black-2000-Scene-0238.mp4',
 'Pitch-Black-2000-Scene-0373.mp4',
 'Pitch-Black-2000-Scene

In [None]:
import os
from datetime import datetime

def create_tmp_folder():
    now = datetime.now()
    date_time = now.strftime("%m%d%Y%H%M%S")

    #create tmp folder to save files in
    tmp = "tmp-"+date_time
    tmp_folder = os.path.join('/content',tmp)

    if not os.path.exists(tmp_folder):
        os.makedirs(tmp_folder)

    return tmp_folder

In [None]:
tmp_folder = create_tmp_folder()
output_path = "/content/drive/MyDrive/empty.mp4" #@param {type:"string"}
concat_path = os.path.join(tmp_folder,"concat.txt")

dimensions = [v[1]["dimensions"] for v in selection.items()]
widths = [w[0] for w in dimensions]
heights = [h[1] for h in dimensions]
max_width = max(widths)
max_height = max(heights)
min_width = min(widths)
min_height = min(heights)
min_ratio = min_width/min_height
max_ratio = max_width/max_height

vids = []

min_w_ratio = int(min_height * min_ratio)

for v in video_order:
    basename = v
    base_path = selection[v]["path"]
    new_path = os.path.join(tmp_folder,basename)

    w = selection[v]["dimensions"][0]
    h = selection[v]["dimensions"][1]
    # print(basename)
    # print(f'ratio: {w/h}')
    # print(f'width: {w}')
    # print(f'height: {h}')

    if ((w/h) < min_ratio):
        !ffmpeg -hide_banner -loglevel error -i "$base_path" -filter_complex "fps=24,scale={min_width}:-1,crop={min_width}:{min_height}" -vcodec libx264 -c:a aac -crf 13 "$new_path" -y
    else:
        !ffmpeg -hide_banner -loglevel error -i "$base_path" -filter_complex "fps=24,scale=-1:{min_height},crop={min_width}:{min_height}" -vcodec libx264 -c:a aac -crf 13 "$new_path" -y

    vids.append("file '" + new_path +"'")

#write to text file for concatenation
with open(concat_path, 'w') as f:
    f.write('\n'.join(vids))

# render randomly arranged video
!ffmpeg -f concat -safe 0 -i "{concat_path}" -c copy "{output_path}" -y

# cleanup, remove temp folder
!rm -r "{tmp_folder}"

ffmpeg version 4.4.2-0ubuntu0.22.04.1 Copyright (c) 2000-2021 the FFmpeg developers
  built with gcc 11 (Ubuntu 11.2.0-19ubuntu1)
  configuration: --prefix=/usr --extra-version=0ubuntu0.22.04.1 --toolchain=hardened --libdir=/usr/lib/x86_64-linux-gnu --incdir=/usr/include/x86_64-linux-gnu --arch=amd64 --enable-gpl --disable-stripping --enable-gnutls --enable-ladspa --enable-libaom --enable-libass --enable-libbluray --enable-libbs2b --enable-libcaca --enable-libcdio --enable-libcodec2 --enable-libdav1d --enable-libflite --enable-libfontconfig --enable-libfreetype --enable-libfribidi --enable-libgme --enable-libgsm --enable-libjack --enable-libmp3lame --enable-libmysofa --enable-libopenjpeg --enable-libopenmpt --enable-libopus --enable-libpulse --enable-librabbitmq --enable-librubberband --enable-libshine --enable-libsnappy --enable-libsoxr --enable-libspeex --enable-libsrt --enable-libssh --enable-libtheora --enable-libtwolame --enable-libvidstab --enable-libvorbis --enable-libvpx --enab

In [None]:
len(selection)

11823

In [None]:
total = 0.0
for i in selection.items():
    if(i[1]["duration"]):
        total = total + float(i[1]["duration"])

print(total)

KeyError: ignored

## Write to EDL file format

In [None]:
import subprocess
import shutil

def frames_to_timecode(frames, fps=24):
    ff = int(frames % fps)
    ss = int((frames // fps) % 60)
    mm = int((frames // (fps * 60)) % 60)
    hh = int(frames // (fps * 60 * 60))
    return f"{hh:02d}:{mm:02d}:{ss:02d}:{ff:02d}"

def get_video_duration(video_path):
    cmd = ["ffprobe", "-v", "error", "-select_streams", "v:0", "-show_entries", "stream=duration", "-of", "default=noprint_wrappers=1:nokey=1", video_path]
    duration = float(subprocess.check_output(cmd))
    hours, remainder = divmod(duration, 3600)
    minutes, seconds = divmod(remainder, 60)
    frames = (seconds - int(seconds)) * 24  # Assuming 24fps, adjust if needed
    return "{:02}:{:02}:{:02}:{:02}".format(int(hours), int(minutes), int(seconds), int(frames))

def get_video_frames_duration(video_path, fps=24):
    if video_path != "/content/drive/MyDrive/afi100/_clips/sullivans-travels/sullivans-travels-Scene-482.mp4":
        duration = get_video_duration(video_path)
        h, m, s, f = map(int, duration.split(':'))
        return f + (s * fps) + (m * 60 * fps) + (h * 3600 * fps)
    else:
        return 100

def create_video_edl(clips, output_edl, fps=24):
    with open(output_edl, 'w') as f:
        f.write("TITLE: NO PEOPLE\n")
        f.write("FCM: NON-DROP FRAME\n\n")

        clip_id = 1
        total_frames = 0
        for i, video_file in enumerate(clips):
            src_in = frames_to_timecode(0, fps)
            rec_in = frames_to_timecode(total_frames, fps)

            clip_frames = get_video_frames_duration(video_file, fps)
            rec_out = frames_to_timecode(total_frames + clip_frames, fps)

            f.write(f"{clip_id:03}  001  V  C        {src_in} {rec_out} {rec_in} {rec_out}\n")
            f.write(f"* FROM CLIP NAME: {os.path.basename(video_file)}\n\n")

            total_frames += clip_frames
            clip_id += 1

output_path = "/content/drive/MyDrive/no-hannibal/" #@param {type:"string"}
fps = 24 #@param {type:"number"}

if not os.path.exists(output_path):
    os.makedirs(output_path)

clips = [i[1]["path"] for i in selection.items()]
for c in clips:
    print(c)
    shutil.copy(c, os.path.join(output_path, c.split("/")[-1]))

create_video_edl(clips, os.path.join(output_path, 'no-people.edl'), fps)

/content/drive/MyDrive/future-of-tv/_clips/Hannibal.S02E08.BDRip.x264-DEMAND-Scene-091.mp4
/content/drive/MyDrive/future-of-tv/_clips/Hannibal.S02E08.BDRip.x264-DEMAND-Scene-101.mp4
/content/drive/MyDrive/future-of-tv/_clips/Hannibal.S02E08.BDRip.x264-DEMAND-Scene-109.mp4
/content/drive/MyDrive/future-of-tv/_clips/Hannibal.S02E08.BDRip.x264-DEMAND-Scene-112.mp4
/content/drive/MyDrive/future-of-tv/_clips/Hannibal.S02E08.BDRip.x264-DEMAND-Scene-113.mp4
/content/drive/MyDrive/future-of-tv/_clips/Hannibal.S02E08.BDRip.x264-DEMAND-Scene-114.mp4
/content/drive/MyDrive/future-of-tv/_clips/Hannibal.S02E08.BDRip.x264-DEMAND-Scene-118.mp4
/content/drive/MyDrive/future-of-tv/_clips/Hannibal.S02E08.BDRip.x264-DEMAND-Scene-119.mp4
/content/drive/MyDrive/future-of-tv/_clips/Hannibal.S02E08.BDRip.x264-DEMAND-Scene-120.mp4
/content/drive/MyDrive/future-of-tv/_clips/Hannibal.S02E08.BDRip.x264-DEMAND-Scene-129.mp4
/content/drive/MyDrive/future-of-tv/_clips/Hannibal.S02E08.BDRip.x264-DEMAND-Scene-133.mp4