In [14]:
import os
import cv2
import yt_dlp
import hashlib
import time
from datetime import datetime, timedelta

In [26]:
def time_str_to_seconds(time_str):
    """Convert time string (MM:SS or HH:MM:SS) to seconds"""
    try:
        parts = list(map(int, time_str.split(':')))
        if len(parts) == 2:  # MM:SS format
            return parts[0] * 60 + parts[1]
        elif len(parts) == 3:  # HH:MM:SS format
            return parts[0] * 3600 + parts[1] * 60 + parts[2]
        else:
            raise ValueError("Invalid time format")
    except:
        raise ValueError("Time must be in MM:SS or HH:MM:SS format")

def generate_filename():
    """Generate a unique filename using timestamp hash"""
    timestamp = str(time.time()).encode('utf-8')
    random_hash = hashlib.md5(timestamp).hexdigest()[:21]
    return f'youtube_{random_hash}.jpg'

def download_and_extract_frames_range(youtube_url, timestamp_first, timestamp_last, output_dir, interval_seconds=10):
    """
    Download YouTube video and extract frames between specified timestamps at 10-second intervals
    
    Args:
        youtube_url (str): YouTube video URL
        timestamp_first (str): Start time in MM:SS or HH:MM:SS format
        timestamp_last (str): End time in MM:SS or HH:MM:SS format
        output_dir (str): Directory to save extracted frames
        interval_seconds (int): Interval between frames in seconds (default: 10)
    """
    temp_dir = 'temp_video'
    temp_video = None
    cap = None

    try:
        # Convert timestamps to seconds
        start_seconds = time_str_to_seconds(timestamp_first)
        end_seconds = time_str_to_seconds(timestamp_last)
        
        if start_seconds >= end_seconds:
            raise ValueError("End time must be after start time")

        # Create directories
        os.makedirs(temp_dir, exist_ok=True)
        os.makedirs(output_dir, exist_ok=True)

        # Configure yt-dlp
        ydl_opts = {
            'format': 'best[ext=mp4]',
            'outtmpl': os.path.join(temp_dir, '%(id)s.%(ext)s'),
            'quiet': True
        }

        # Download video
        with yt_dlp.YoutubeDL(ydl_opts) as ydl:
            info = ydl.extract_info(youtube_url, download=True)
            temp_video = os.path.join(temp_dir, f"{info['id']}.mp4")

            # Open video with OpenCV
            cap = cv2.VideoCapture(temp_video)
            if not cap.isOpened():
                raise RuntimeError("Failed to open video file")

            # Get video properties
            fps = cap.get(cv2.CAP_PROP_FPS)
            if fps <= 0:
                raise ValueError("Could not determine video FPS")
                
            total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
            duration = total_frames / fps

            # Validate timestamps
            if end_seconds > duration:
                raise ValueError(f"End time exceeds video duration ({duration:.1f} seconds)")

            # Extract frames at specified intervals
            current_second = start_seconds
            frames_extracted = 0
            
            while current_second <= end_seconds:
                # Set frame position
                frame_pos = int(current_second * fps)
                if frame_pos >= total_frames:
                    break
                    
                cap.set(cv2.CAP_PROP_POS_FRAMES, frame_pos)
                ret, frame = cap.read()
                
                if not ret:
                    print(f"Warning: Could not read frame at {current_second} seconds")
                    break

                # Save frame
                filename = generate_filename()
                output_path = os.path.join(output_dir, filename)
                cv2.imwrite(output_path, frame)
                frames_extracted += 1

                # Move to next interval
                current_second += interval_seconds

            print(f"Successfully extracted {frames_extracted} frames to: {output_dir}")

    except ValueError as e:
        print(f"Error: {str(e)}")
    except Exception as e:
        print(f"Error: {str(e)}")
    finally:
        # Cleanup
        if cap is not None:
            cap.release()
        if temp_video and os.path.exists(temp_video):
            os.remove(temp_video)
        if os.path.exists(temp_dir):
            os.rmdir(temp_dir)

# Example usage:
if __name__ == "__main__":
    youtube_url = "https://www.youtube.com/watch?v=O-8SUgKFHL8"
    timestamp_first = "0:00"  # Start time (MM:SS)
    timestamp_last = "19:40"   # End time (MM:SS)
    output_dir = '../nematode_disease_in_coffee_leaf_images'
    
    download_and_extract_frames_range(youtube_url, timestamp_first, timestamp_last, output_dir)

Successfully extracted 119 frames to: ../nematode_disease_in_coffee_leaf_images


In [28]:
# Count all the images in the directory with youtube_.jpg

def count_frames_in_dir(directory):
    """Count all the images in the directory with youtube_.jpg"""
    return len([f for f in os.listdir(directory) if f.startswith('youtube_')])

path = '../nematode_disease_in_coffee_leaf_images'
print(f"Total frames extracted: {count_frames_in_dir(path)}")

Total frames extracted: 176


In [17]:
# Delete all the images in the directory with youtube_.jpg

def delete_frames_in_dir(directory):
    """Delete all the images in the directory with youtube_.jpg"""
    for f in os.listdir(directory):
        if f.startswith('youtube_'):
            os.remove(os.path.join(directory, f))

delete_frames_in_dir(path)
print(f"Deleted all frames in directory: {path}")

Deleted all frames in directory: ../nematode_disease_in_coffee_leaf_images
