In [31]:
import os
import glob
from pathlib import Path
from typing import List, Any, Tuple, Optional, Union
import cv2
from PIL import Image
from sklearn.metrics.pairwise import cosine_distances

from torchvision import models, transforms
import av
from yt_dlp import YoutubeDL
import numpy as np
import torch
from moviepy import editor
from moviepy.video.io.ffmpeg_tools import ffmpeg_extract_subclip
from pytorchvideo.data.encoded_video import EncodedVideo
from transformers import (
    VideoMAEForVideoClassification,
    VideoMAEImageProcessor,
    VideoMAEModel,
)

np.random.seed(0)

In [48]:
save_videos_to = "data"
start_at_sec = 5
window = 10

## **Helper Functions**

In [32]:

class HighlightsFinder:
    def __init__(self, batch_size: int = 32) -> None:
        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        self.batch_size = batch_size
        model = models.resnet18(pretrained=True)
        self.model = torch.nn.Sequential(*(list(model.children())[:-1])).to(self.device)
        self.model.eval()

    def _get_transformations(self, will_be_saved: bool) -> List[Any]:
        transformations = [
            transforms.Resize(256),
            transforms.CenterCrop(224),
            transforms.ToTensor(),
            transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
        ]
        if will_be_saved:
            transformations.append(transforms.ToPILImage())
        return transformations

    def _preprocess_image(
        self, image: Image.Image, will_be_saved: bool = False
    ) -> Union[torch.Tensor, Image.Image]:
        transformations = self._get_transformations(will_be_saved=will_be_saved)

        transform = transforms.Compose(transformations)
        image = transform(image)
        if will_be_saved:
            return image

        image = image.unsqueeze(0)
        # print(image.shape)
        return image

    def _chunks(self, lst, n):
        """
        Yield successive n-sized chunks from lst.
        """
        for i in range(0, len(lst), n):
            yield lst[i : i + n]

    def _create_feature_vectors(self, file_paths: List[str]) -> np.ndarray:
        features = None
        for file_paths_chunk in self._chunks(file_paths, n=self.batch_size):
            # Get the data for this batch.
            imgs = [Image.open(img).convert("RGB") for img in file_paths_chunk]
            imgs = [self._preprocess_image(img) for img in imgs]
            imgs = torch.cat(imgs, dim=0).to(self.device)

            # Convert them to features.
            with torch.no_grad():
                f = self.model(imgs)

            if features is None:
                features = f.clone()
            else:
                features = torch.cat((features, f), dim=0)

        features = features.squeeze(dim=-1)
        features = features.squeeze(dim=-1)
        features = features.cpu().detach().numpy()
        return features

    def find_highlights(self, frames_path: str, num_highlights: int) -> List[str]:
        file_paths = glob.glob(os.path.join(frames_path, "*.jpg"))
        features = self._create_feature_vectors(file_paths=file_paths)
        distances = cosine_distances(features, features)
        del features
        median_distances = np.median(distances, axis=1)
        del distances
        assert median_distances.shape[0] == len(file_paths)
        idx = np.argsort(median_distances)[-num_highlights:]

        highlights = np.array(file_paths)[idx].tolist()
        return highlights

In [33]:
def extract_frames(video_path: str, images_folder: str, start_at_sec: int = 5, window: int = 10):
    os.makedirs(images_folder, exist_ok=True)
    vidcap = cv2.VideoCapture(video_path)
    success, image = vidcap.read()
    success = True
    while success:
        vidcap.set(cv2.CAP_PROP_POS_MSEC, (start_at_sec * 1000))  # One frame per second.
        success, image = vidcap.read()
        # print("Read a new frame: ", success)
        if success:
            cv2.imwrite(os.path.join(images_folder, f"sec_{start_at_sec}.jpg"), image)  # save frame as JPEG file.
        start_at_sec += window

In [None]:
def merge_timestamps(timestamps: List[Tuple]) -> List[Tuple]:
    merged_timestamps = []
    # timestamps.sort(key=lambda x: x[0])  # Sort the timestamps based on start time

    for timestamp in timestamps:
        if merged_timestamps and timestamp[0] == merged_timestamps[-1][1]:
            merged_timestamps[-1] = (merged_timestamps[-1][0], timestamp[1])  # Extend the previous timestamp
        else:
            merged_timestamps.append(timestamp)  # Add a new timestamp

    return merged_timestamps

In [None]:
# t = [(0, 5), (10, 20), (20, 30), (30, 40), (40, 120)]
# print(merge_timestamps(t))

[(0, 5), (10, 120)]


In [None]:
def convert_paths_to_timestamps(file_paths: List[str], window: int) -> List[Tuple]:
    paths = [os.path.basename(path) for path in file_paths]
    paths = [Path(path).stem for path in file_paths]
    times = [int(path.replace("sec_", "")) for path in paths]
    times = list(sorted(times))
    timestamps = [(int(time - window / 2), int(time + window / 2)) for time in times]
    timestamps = merge_timestamps(timestamps=timestamps)
    return timestamps

## **Download Videos**

In [72]:
links = [
    # "https://www.youtube.com/watch?v=d0r0vzvqeoc&ab_channel=LubenTV",
    "https://www.youtube.com/watch?v=SvV6aUki6LU&list=PLCGIzmTE4d0iCqSmha1X7F-_AqB3jjo26&index=7&ab_channel=FIFA",
]

ydl_opts = {"noplaylist": True, "outtmpl": os.path.join(save_videos_to, "%(id)s"), "format": "bv*[ext=mp4]+ba[ext=m4a]/b[ext=mp4] / bv*+ba/b"}

with YoutubeDL(ydl_opts) as ydl:
    ydl.download(links)

[youtube:tab] Extracting URL: https://www.youtube.com/watch?v=SvV6aUki6LU&list=PLCGIzmTE4d0iCqSmha1X7F-_AqB3jjo26&index=7&ab_channel=FIFA
[youtube:tab] Downloading just the video SvV6aUki6LU because of --no-playlist
[youtube] Extracting URL: https://www.youtube.com/watch?v=SvV6aUki6LU
[youtube] SvV6aUki6LU: Downloading webpage
[youtube] SvV6aUki6LU: Downloading ios player API JSON
[youtube] SvV6aUki6LU: Downloading android player API JSON
[youtube] SvV6aUki6LU: Downloading m3u8 information
[info] SvV6aUki6LU: Downloading 1 format(s): 614+140
[hlsnative] Downloading m3u8 manifest
[hlsnative] Total fragments: 1207
[download] Destination: data\SvV6aUki6LU.f614.mp4
[download] 100% of    1.70GiB in 00:04:57 at 5.84MiB/s                        
[download] Destination: data\SvV6aUki6LU.f140.m4a
[download] 100% of   94.38MiB in 00:00:08 at 10.59MiB/s    
[Merger] Merging formats into "data\SvV6aUki6LU.mp4"
Deleting original file data\SvV6aUki6LU.f140.m4a (pass -k to keep)
Deleting original fil

## **Extract frames**

In [73]:
# videos_path = "test_video.mp4"
for video_path in glob.glob(os.path.join(save_videos_to, "*.mp4")):
  if "summary" in video_path:
    continue
  clips_output_path = os.path.splitext(video_path)[0]
  print(clips_output_path)

  extract_frames(video_path=video_path, images_folder=clips_output_path, start_at_sec=start_at_sec, window=window)

data\d0r0vzvqeoc
data\d0r0vzvqeoc_summary
data\SvV6aUki6LU


## **Extract audio from a clip**

In [5]:
#Load the Video
video = editor.VideoFileClip("test_video.mp4")

#Extract the Audio
audio = video.audio

#Export the Audio
audio.write_audiofile("audio.mp3")

MoviePy - Writing audio in audio.mp3


                                                                     

MoviePy - Done.




## **Extract highlights from frames video**

In [74]:
hf = HighlightsFinder(batch_size=32)

In [75]:
frames_output_paths = [f.path for f in os.scandir(save_videos_to) if f.is_dir() and "ipynb_checkpoints" not in f.name]

In [77]:
for frames_path in frames_output_paths:
    # print(frames_path)
    highlights = hf.find_highlights(frames_path=frames_path, num_highlights=5)
    timestamps = convert_paths_to_timestamps(file_paths=highlights, window=window)
    video = editor.VideoFileClip(frames_path + ".mp4")
    clips = []
    for start_time , end_time in timestamps:
        clip = video.subclip(start_time, end_time)
        clips.append(clip)
    
    final = editor.concatenate_videoclips(clips)
    final.write_videofile(frames_path + "_summary.mp4")

[5, 15, 25, 35, 115]
[(0, 40), (110, 120)]
[(0, 40), (110, 120)]
Moviepy - Building video data\d0r0vzvqeoc_summary.mp4.
MoviePy - Writing audio in d0r0vzvqeoc_summaryTEMP_MPY_wvf_snd.mp3


                                                                     

MoviePy - Done.
Moviepy - Writing video data\d0r0vzvqeoc_summary.mp4



                                                                

Moviepy - Done !
Moviepy - video ready data\d0r0vzvqeoc_summary.mp4
[835, 1275, 3415, 4605, 5375]
[(830, 840), (1270, 1280), (3410, 3420), (4600, 4610), (5370, 5380)]
[(830, 840), (1270, 1280), (3410, 3420), (4600, 4610), (5370, 5380)]
Moviepy - Building video data\SvV6aUki6LU_summary.mp4.
MoviePy - Writing audio in SvV6aUki6LU_summaryTEMP_MPY_wvf_snd.mp3


                                                                     

MoviePy - Done.
Moviepy - Writing video data\SvV6aUki6LU_summary.mp4



                                                                

Moviepy - Done !
Moviepy - video ready data\SvV6aUki6LU_summary.mp4
