<a href="https://www.kaggle.com/code/esracum/face-detect-with-opencv-dnn?scriptVersionId=294730056" target="_blank"><img align="left" alt="Kaggle" title="Open in Kaggle" src="https://kaggle.com/static/images/open-in-kaggle.svg"></a>

# Face Detection on YouTube Video Segments

This notebook implements a high-performance face detection pipeline using OpenCV's Deep Neural Network (DNN) module. this project leverages the ResNet-10 SSD architecture to analyze specific video intervals.

# Step 1. Setup and Asset Acquisition
In this stage, we download the necessary model configuration (.prototxt) and the pre-trained weights (.caffemodel).


In [1]:
!pip install -U yt-dlp

Collecting yt-dlp
  Downloading yt_dlp-2025.12.8-py3-none-any.whl.metadata (180 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m180.3/180.3 kB[0m [31m6.3 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading yt_dlp-2025.12.8-py3-none-any.whl (3.3 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m3.3/3.3 MB[0m [31m67.6 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: yt-dlp
Successfully installed yt-dlp-2025.12.8


# Step 2: Download Assets and Video
It can be downloaded manually from -> [GitHub Source Link](http:///github.com/opencv/opencv/tree/master/samples/dnn/face_detector)

In [2]:
import os
import cv2
import sys
import yt_dlp
from zipfile import ZipFile
from urllib.request import urlretrieve

# ========================-DOWNLOADİNG ASSETS-========================

def download_and_unzip(url, save_path):
    if not os.path.exists(save_path):
        print(f"Downloading and extracting assets....", end="")
        urlretrieve(url, save_path)
        with ZipFile(save_path) as z:
            z.extractall(os.path.split(save_path)[0])
        print("Done")

URL = r"https://www.dropbox.com/s/efitgt363ada95a/opencv_bootcamp_assets_12.zip?dl=1"
asset_zip_path = os.path.join(os.getcwd(), "opencv_bootcamp_assets_12.zip")
download_and_unzip(URL, asset_zip_path)

# --- YOUTUBE TEST VİDEO DOWNLOAD ---

youtube_url = "https://www.youtube.com/watch?v=qB6BGsCqAgA"
def download_youtube_video(url):
    
    ydl_opts = {
        'format': '18', 
        'outtmpl': 'input_video.mp4', 
        'quiet': True,
        'no_warnings': True, 
        'nocheckcertificate': True 
    }
    with yt_dlp.YoutubeDL(ydl_opts) as ydl:
        ydl.download([url])
    return "input_video.mp4"

video_path = download_youtube_video(youtube_url)
print("Preparation is complete.")

Downloading and extracting assets....Done
Preparation is complete.


# Step 3: Face Detection and Video Transcoding

.prototxt: Defines the network architecture (how the layers are arranged).

.caffemodel: Contains the trained weights (learned facial features).



---

### -----> mean = [104, 117, 123]


These numbers were not chosen randomly. The model we used was trained on a massive dataset called ImageNet, which contains millions of photos.

104: This is the average of the Blue channels in all those millions of images.

117: This is the average of the Green channels in all those images.

123: This is the average of the Red channels in all those images. (Note: Since OpenCV uses the BGR format, the order is Blue-Green-Red.)

If we remove these "average" values, we essentially ignore the overall brightness level of the image (whether it is bright or dark). Thus, the AI ​​focuses on the features and shape of the face, rather than the intensity of the light.

In [3]:
# Set video source
source = cv2.VideoCapture(video_path)

if not source.isOpened():
    print(f"Error: Video file could not be opened!")
else:
    # --- SETTİNGS ---
    start_second = 45  # From which second of the video should it start?
    duration = 15     # For how many seconds should the operation last?
    
    # Jump to starting point (In milliseconds: seconds * 1000)
    source.set(cv2.CAP_PROP_POS_MSEC, start_second * 1000)
    
    # Get video information
    frame_width = int(source.get(3))
    frame_height = int(source.get(4))
    fps = int(source.get(cv2.CAP_PROP_FPS))
    limit_frames = fps * duration # İşlenecek toplam kare sayısı
    
    # Temporary raw video file (with mp4v)
    out = cv2.VideoWriter('temp_output.mp4', cv2.VideoWriter_fourcc(*'mp4v'), fps, (frame_width, frame_height))

    net = cv2.dnn.readNetFromCaffe("deploy.prototxt", "res10_300x300_ssd_iter_140000_fp16.caffemodel")
    
    in_width, in_height = 300, 300
    mean = [104, 117, 123]
    conf_threshold = 0.7
    frame_count = 0

    print(f"Processing {duration} seconds of video starting from the {start_second}. second...")    
    while True:
        has_frame, frame = source.read()
        
        # The specified time (limit_frames) is the duration for which changes or the video ends.
        if not has_frame or frame_count > limit_frames:
            break
        
        frame_count += 1
        frame = cv2.flip(frame, 1) # Mirrors the image 
        
        # Create a 4D blob from a frame.
        blob = cv2.dnn.blobFromImage(frame, 1.0, (in_width, in_height), mean, swapRB=False, crop=False)
        net.setInput(blob)
        detections = net.forward()

        for i in range(detections.shape[2]):
            confidence = detections[0, 0, i, 2]
            if confidence > conf_threshold:
                x1 = int(detections[0, 0, i, 3] * frame_width)
                y1 = int(detections[0, 0, i, 4] * frame_height)
                x2 = int(detections[0, 0, i, 5] * frame_width)
                y2 = int(detections[0, 0, i, 6] * frame_height)
                
                # Drawing
                cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
        
        out.write(frame)

    source.release()
    out.release()
    
   
    os.system("ffmpeg -y -i temp_output.mp4 -vcodec libx264 -loglevel quiet binnaz_output.mp4 > /dev/null 2>&1")
    
    print(f"Processing complete! The segment from {start_second} to {start_second + duration} seconds is ready.")

Processing 15 seconds of video starting from the 45. second...
Processing complete! The segment from 45 to 60 seconds is ready.


# Step 4: Display Result

In [4]:
from IPython.display import Video
Video("binnaz_output.mp4", embed=True, width=700)