In [9]:
!pip install opencv-python

Defaulting to user installation because normal site-packages is not writeable
Collecting opencv-python
  Downloading opencv_python-4.11.0.86-cp37-abi3-macosx_13_0_arm64.whl.metadata (20 kB)
Downloading opencv_python-4.11.0.86-cp37-abi3-macosx_13_0_arm64.whl (37.3 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m37.3/37.3 MB[0m [31m33.7 MB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
[?25hInstalling collected packages: opencv-python
Successfully installed opencv-python-4.11.0.86


### some resources
#### https://shimat.github.io/opencvsharp_docs/html/6121915d-1174-7345-bdca-789ee1373642.htm
#### https://www.geeksforgeeks.org/opencv-python-tutorial/?ref=shm 
#### https://www.geeksforgeeks.org/opencv-the-gunnar-farneback-optical-flow/

### Imports

In [25]:
import cv2
import os

### Splitting Clips Up From The Video

In [27]:
""" purpose of this function is to crop the borders equally on both sides. Tested 
different border_ratio param values and found that 0.343 best suited for 
the the compilations videos in the dataset."""
def crop_borders(frame, border_ratio = 0.343):
    height, width, _ = frame.shape
    crop_width = int(width * border_ratio)
    cropped_frame = frame[:, crop_width:-crop_width]  
    return cropped_frame

""" turns compilation into frames and compares the pixel intensities for frames after
turning it to grayscale in consecutive order and if it is greater than a certain threshold, 
func will classify it as a scene change.  
"""
def detect_scenes(video_path, threshold):
    # initialization
    cap = cv2.VideoCapture(video_path)
    scene_changes = []
    last_frame = None
    frame_index = 0

    # handling error if vid can't be opened
    if not cap.isOpened():
        print(f"Can't open video at {video_path}")
        return

    while True:
        success, frame = cap.read()
        if not success:
            break

        cropped_frame = crop_borders(frame)

        # convert to BGR, less compute + easy processing pixels
        gray_frame = cv2.cvtColor(cropped_frame, cv2.COLOR_BGR2GRAY)

        # compares pixel intensity btwn frames if last frame is present, retrieves diff in values
        if last_frame is not None:
            diff = cv2.absdiff(gray_frame, last_frame)
            mean_diff = diff.mean()

            if mean_diff > threshold:
                scene_changes.append(frame_index)

        last_frame = gray_frame
        frame_index += 1

    # avoid cv error
    cap.release()
    return scene_changes

""" takes in previous helper functions: crop_borders & detect_scenes. extracts clips with the side
borders cropped out instead of having to manually go into the vid to clip them out. """
def extract_clips(video_path, scene_changes, output_dir, border_ratio = 0.343):
    cap = cv2.VideoCapture(video_path)

    # handling error if vid can't be opened
    if not cap.isOpened():
        print(f"Can't open video at {video_path}")
        return

    # encode vid + retrieve dims
    fps = cap.get(cv2.CAP_PROP_FPS)
    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
    width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    crop_width = int(width * border_ratio)
       
    output_width = width - 2 * crop_width
    output_height = height
    
    os.makedirs(output_dir, exist_ok=True)

    # append last frame to ensure the final clip is extracted bc detect_scene doesn't account for it
    scene_changes.append(int(cap.get(cv2.CAP_PROP_FRAME_COUNT)))

    print("Starting Clip Extraction:")
    for i in range(len(scene_changes) - 1):
        # initialize start and end times based on detect_scenes)
        start_frame = scene_changes[i]
        end_frame = scene_changes[i + 1]

        output_path = os.path.join(output_dir, f"clip_{i + 1}.mp4")
        out = cv2.VideoWriter(output_path, fourcc, fps, (output_width, output_height))

        #record
        cap.set(cv2.CAP_PROP_POS_FRAMES, start_frame)

        for frame_index in range(start_frame, end_frame):
            success, frame = cap.read()
            if not success:
                break

            cropped_frame = crop_borders(frame, border_ratio)
            out.write(cropped_frame)

        out.release()
        print(f"Clip {i+1} has been saved to {output_dir}.")

    cap.release()

if __name__ == "__main__":
    video_path = "data/YouTube.mp4"  
    output_dir = "data/clips"  

    #tested these values and found that these params were
    # best suited for threshold and border_ratio
    # most likely threshold can be altered depending on the compilation
    # video we're working with but highly unlikely, border_ratio is set          
    threshold = 51                 
    border_ratio = 0.343              

    scene_changes = detect_scenes(video_path, threshold)
    print(scene_changes)
    extract_clips(video_path, scene_changes, output_dir, border_ratio)

[355, 707, 1033, 1384, 1706, 2057, 2411, 2719, 3073, 3427, 3779, 4126, 4451, 4767, 5103, 5455, 5772, 6083, 6435, 6786, 7098, 7347, 7700, 7976, 7977, 8006, 8021, 8368, 8724, 9080, 9432]
Starting Clip Extraction:
Clip 1 has been saved to data/clips.
Clip 2 has been saved to data/clips.
Clip 3 has been saved to data/clips.
Clip 4 has been saved to data/clips.
Clip 5 has been saved to data/clips.
Clip 6 has been saved to data/clips.
Clip 7 has been saved to data/clips.
Clip 8 has been saved to data/clips.
Clip 9 has been saved to data/clips.
Clip 10 has been saved to data/clips.
Clip 11 has been saved to data/clips.
Clip 12 has been saved to data/clips.
Clip 13 has been saved to data/clips.
Clip 14 has been saved to data/clips.
Clip 15 has been saved to data/clips.
Clip 16 has been saved to data/clips.
Clip 17 has been saved to data/clips.
Clip 18 has been saved to data/clips.
Clip 19 has been saved to data/clips.
Clip 20 has been saved to data/clips.
Clip 21 has been saved to data/clips.


### Scrapping Clips Outside of Average Clip Length

In [28]:
import cv2
import os

""" Purpose of function is to filter out clips within a time (seconds) deviation based on the average
clip length within the folder. This will remove clips whose pixel intensities are so high that the previous
helper functions continually clip it frame by frame and also the ending credits clip at the end of the video. 
Clips average time length will be calculated and a (arbitrary) deviation will be applied to the mean time length
and delete videos outside of that frame. 
FEEL FREE TO ALTER DEVIATION IF IT DOESN'T SUIT CLIPS AFTER MANUALLY INSPECTING """
# i.e. arbitrary val of 2 based on the "call my slimes" tiktok dance
def filtering_clips(output_dir, fps, deviation=2):
    clip_durations = []
    clip_paths = []

    for clip_name in os.listdir(output_dir):
        clip_path = os.path.join(output_dir, clip_name)
        
        cap = cv2.VideoCapture(clip_path)

        # handling error if vid can't be opened 
        if not cap.isOpened():
            print(f"Could not open {clip_path}, skipping.")
            continue
        
        frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
        duration = frame_count / fps  

        clip_durations.append(duration)
        clip_paths.append(clip_path)

        cap.release()

    # when no clip vid lens are found
    if len(clip_durations) == 0:
        print("No valid clips found.")
        return

    average_duration = sum(clip_durations) / len(clip_durations)

    min_duration = max(0, average_duration - deviation)
    max_duration = average_duration + deviation
    print(f"Average clip duration: {average_duration:.2f} seconds")
    print(f"Removing clips shorter than {min_duration:.2f} seconds")

    for clip_path, duration in zip(clip_paths, clip_durations):
        # removes those outside of the deviation range of the avg
        # i.e. within 2 seconds of the average
        if duration < min_duration or duration > max_duration:
            os.remove(clip_path)
            print(f"Deleted {clip_path} (Duration: {duration:.2f} sec)")

if __name__ == "__main__":
    video_path = "data/YouTube.mp4"  
    output_dir = "data/clips"            

    cap = cv2.VideoCapture(video_path)
    fps = cap.get(cv2.CAP_PROP_FPS)
    cap.release()
    filtering_clips(output_dir, fps)

Could not open data/clips/.DS_Store, skipping.
Average clip duration: 10.41 seconds
Removing clips shorter than 8.41 seconds
Deleted data/clips/clip_26.mp4 (Duration: 0.50 sec)
Deleted data/clips/clip_24.mp4 (Duration: 0.03 sec)
Deleted data/clips/clip_31.mp4 (Duration: 19.99 sec)
Deleted data/clips/clip_25.mp4 (Duration: 0.97 sec)
Deleted data/clips/clip_21.mp4 (Duration: 8.31 sec)


OpenCV: Couldn't read video stream from file "data/clips/.DS_Store"


### DensePose?