In [None]:
%pip install opencv-python-headless
%pip install scikit-image
%pip install fpdf
%pip install yt-dlp

In [None]:
import sys
from PIL import Image
from PIL import ImageFile
sys.modules['Imagefile']=ImageFile
import cv2
import os
import tempfile 
import re
from fpdf import FPDF
import yt_dlp
from skimage.metrics import structural_similarity as ssim
from IPython.display import FileLink

In [None]:
def download_video(url,filename,max_retries=3):
    ydl_opts={
        "outtmpl":filename,
        "format":"best",
    }
    r=0
    while r<max_retries:
        try:
            with yt_dlp.YoutubeDL(ydl_opts) as ydl:
                ydl.download([url])
                return filename
        except yt_dlp.utils.DownloadError as e:
            print(f"error {e}")
            r+=1
    raise Exception("failed to download after max retries")

In [None]:
def get_video_id(url):
    v_i_m=re.search(r"shorts\/(w+)",url)
    if v_i_m:
        return v_i_m.group(1)
    
    v_i_m=re.search(r"youtu\.be\/([\w\-_]+)(\?.*)?",url)
    if v_i_m:
        return v_i_m.group(1)
    
    v_i_m=re.search(r"v=([\w\-_]+)",url)
    if v_i_m:
        return v_i_m.group(1)
    
    v_i_m=re.search(r"live\/(\w+)",url)
    if v_i_m:
        return v_i_m.group(1)
    
    return None

In [None]:
def get_playlist_videos(playlist_url):
    ydl_opts = {
        "ignoreerrors" :True,
        "playlistened":1000,
        "extract_flat": True,
    }
    with yt_dlp.YoutubeDL(ydl_opts) as ydl:
        playlist_info-=ydl.extract_info(playlist_url,download=False)
        return [entry["url"] for entry in playlist_info["entries"]]

In [None]:
def extract_unique_frames(video_file,output_folder,n=3,ssim_threshold=0.6):
    cap = cv2.VideoCapture(video_file)
    fps = int(cap.get(cv2.CAP_PROP_FPS))
    last_frame=None
    saved_frame=None
    frame_number=0
    last_saved_frame_number=-1
    timestamps=[]

    while cap.isOpened():
        ret,frame=cap.read()
        if not ret:
            break

        if frame_number % n ==0:
            gray_frame = cv2.cvtColor(frame,cv2.COLOR_BGR2GRAY)
            gray_frame=cv2.resize(gray_frame,(128,72))

            if last_frame is not None:
                similarity=ssim(gray_frame,last_frame,data_range=gray_frame.max()-gray_frame.min())

                if similarity < ssim_threshold:
                    if saved_frame is not None and frame_number - last_saved_frame_number >fps:
                        frame_path =os.path.join(output_folder,f"frame{frame_number:04d}_{frame_number//fps}.png")
                        cv2.imwrite(frame_path,saved_frame)
                        timestamps.append((frame_number,frame_number//fps))
                    saved_frame=frame
                    last_saved_frame_number=frame_number
                else:
                    saved_frame=frame
            else:
                frame_path =os.path.join(output_folder,f"frame{frame_number:04d}_{frame_number//fps}.png")
                cv2.imwrite(frame_path,frame)
                timestamps.append((frame_number,frame_number//fps))
            last_frame=gray_frame
        frame_number+=1

    cap.release()
    return timestamps      

In [None]:
def convert_frames_to_pdf(input_folder,output_file,timestamps):
    frame_files=sorted(os.listdir(input_folder),key=lambda x: int(x.split("_")[0].split("frame")[-1]))
    pdf=FPDF("L")
    pdf.set_auto_page_break(0)

    for i, (frame_file,(frame_number,timestamp_seconds)) in enumerate(zip(frame_files,timestamps)):
        frame_path=os.path.join(input_folder,frame_file)
        image=Image.open(frame_path)
        pdf.add_page()

        pdf.image(frame_path,x=0,y=0,w=pdf.w,h=pdf.h)
        timestamp=f"{timestamp_seconds//3600:02d}:{(timestamp_seconds % 3600)// 60:02d}:{timestamp_seconds % 60:02d}"

        x,y, width,height=5,5,60,15
        region = image.crop((x,y,x+width,y+height)).convert("L")
        mean_pixel_value=region.resize((1,1)).getpixel((0,0))
        if mean_pixel_value < 64:
            pdf.set_text_color(255,255,255)
        else:
            pdf.set_text_color(0,0,0)
        
        pdf.set_xy(x,y)
        pdf.set_font("Arial",size=12)
        pdf.cell(0,0,timestamp)

    pdf.output(output_file)