In [26]:
import cv2
import os

## Extract images from video

In [37]:
def create_HR_LR_images_from_video(
        video_path, 
        skip_seconds=(2, 2), 
        frame_interval=10, 
        scale_factor=0.5, 
        interpolation=cv2.INTER_CUBIC, 
        output_name=""):
    """
    Extracts frames from a video file, skipping the first and last few seconds,
    crops each frame to a square (width x width) region containing the main object,
    and saves them as high-resolution (HR) and low-resolution (LR) image pairs
    in separate directories. The LR images are created by resizing the HR frames
    using the specified scale factor and interpolation method. If images already
    exist in the output directory, numbering will continue from the last image.

    The cropping tries to keep the main object (assumed to be the largest contour)
    centered in the square crop, minimizing background.

    Parameters:
        video_path (str): Path to the input video file.
        skip_seconds (tuple): Seconds to skip at the start and end of the video.
        frame_interval (int): Interval at which frames are saved (0 means save all frames).
        scale_factor (float): Factor by which to scale the images for LR.
        interpolation: OpenCV interpolation method for upscaling LR images.
        output_name (str): Name for the output directory and image files.
    """
    
    def smart_square_crop(img):
        """
        Crops the image to a square (width x width) region containing the main object.
        The crop is centered on the largest contour (assumed to be the object).
        If no contour is found, crops the center square.
        """
        
        h, w = img.shape[:2]
        crop_size = min(w, h)
        gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
        
        # Threshold to find object (assume object is not background)
        _, thresh = cv2.threshold(gray, 30, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
        contours, _ = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
        
        if contours:
            # Find largest contour
            largest = max(contours, key=cv2.contourArea)
            x, y, ww, hh = cv2.boundingRect(largest)
            
            # Center crop on the object
            cx = x + ww // 2
            cy = y + hh // 2
            
            # Calculate crop box
            half = crop_size // 2
            left = max(0, cx - half)
            top = max(0, cy - half)
            
            # Ensure crop is within image
            if left + crop_size > w:
                left = w - crop_size
                
            if top + crop_size > h:
                top = h - crop_size
                
            left = max(0, left)
            top = max(0, top)
            crop = img[top:top+crop_size, left:left+crop_size]
        else:
            # Fallback: center crop
            left = (w - crop_size) // 2
            top = (h - crop_size) // 2
            crop = img[top:top+crop_size, left:left+crop_size]
        
        return crop

    if not video_path or not isinstance(video_path, str):
        raise ValueError("video_path must be a non-empty string.")
    if not os.path.exists(video_path):
        raise FileNotFoundError(f"Video file not found: {video_path}")
    if not isinstance(skip_seconds, tuple) or len(skip_seconds) != 2:
        raise ValueError("skip_seconds must be a tuple of two values (start_skip, end_skip).")
    if not isinstance(frame_interval, int) or frame_interval < 0:
        raise ValueError("frame_interval must be a non-negative integer.")
    if not isinstance(output_name, str) or not output_name:
        raise ValueError("output_name must be a non-empty string.")
    if not isinstance(scale_factor, (int, float)) or scale_factor <= 0:
        raise ValueError("scale_factor must be a positive number.")
    
    # Open the video file
    cap = cv2.VideoCapture(video_path)
    fps = cap.get(cv2.CAP_PROP_FPS)
    total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    
    # Calculate start and end frames after skipping initial/final seconds
    start_frame = int(skip_seconds[0] * fps)
    end_frame = total_frames - int(skip_seconds[1] * fps)
    if start_frame < 0 or end_frame < 0:
        raise ValueError("Skip seconds result in negative frame indices.")
    elif start_frame >= total_frames or end_frame > total_frames:
        raise ValueError("Skip seconds exceed total video duration.")
    elif start_frame >= end_frame:
        raise ValueError("Start frame must be less than end frame after skipping seconds.")

    # Create output directory
    full_HR_output_dir = f"data/images/HR/{output_name}"
    full_LR_output_dir = f"data/images/LR/{output_name}"
    os.makedirs(full_HR_output_dir, exist_ok=True)
    os.makedirs(full_LR_output_dir, exist_ok=True)
    
    # Find the last image number in the directory
    existing_files = [f for f in os.listdir(full_HR_output_dir) if f.startswith(output_name) and f.endswith('.png')]
    if existing_files:
        last_number = max([int(f.replace(output_name, "").replace(".png", "")) for f in existing_files])
        saved_count = last_number + 1
    else:
        saved_count = 0
    
    current_frame = 0

    while True:
        ret, frame = cap.read()
        if not ret:
            break
        
        if start_frame <= current_frame < end_frame:
            if (frame_interval == 0) or (current_frame - start_frame) % frame_interval == 0:
                # --- HR CROP ---
                cropped = smart_square_crop(frame)
                new_size = (int(cropped.shape[1] * scale_factor), int(cropped.shape[0] * scale_factor))
                cropped_downscaled = cv2.resize(cropped, new_size, interpolation=interpolation)
                HR_filename = os.path.join(full_HR_output_dir, f"{output_name}{saved_count}.png")
                cv2.imwrite(HR_filename, cropped_downscaled)
                
                # Create LR version of the frame
                new_size = (int(cropped_downscaled.shape[1] * scale_factor), int(cropped_downscaled.shape[0] * scale_factor))
                resized_img = cv2.resize(cropped_downscaled, new_size, interpolation=interpolation)
                
                LR_filename = os.path.join(full_LR_output_dir, f"{output_name}{saved_count}.png")
                cv2.imwrite(LR_filename, resized_img)
                
                saved_count += 1
        current_frame += 1

    cap.release()

    # Print analysis
    print("=== VIDEO ANALYSIS ===")
    print(f"Total frames: {total_frames}")
    print(f"Total frames (after skipped seconds): {end_frame - start_frame}")
    print(f"Images saved in this run: {saved_count - (last_number + 1 if 'last_number' in locals() else 0)}")
    print(f"Total images in directory: {saved_count}")
    print(f"Original HR frame size (width x height): {frame_width} x {frame_height}")
    print(f"Cropped HR frame size (width x height): {int(min(frame_width, frame_height))} x {int(min(frame_width, frame_height))}")
    print(f"Cropped Downscaled HR frame size (width x height): {cropped_downscaled.shape[1]} x {cropped_downscaled.shape[0]}")
    print(f"LR frame size (width x height): {int(cropped_downscaled.shape[1] * scale_factor)} x {int(cropped_downscaled.shape[0] * scale_factor)}")

In [38]:
create_HR_LR_images_from_video("data/videos/video_low_z_offset_1.mp4", skip_seconds=(2, 2), frame_interval=10, output_name="low_z_offset")

=== VIDEO ANALYSIS ===
Total frames: 1597
Total frames (after skipped seconds): 1477
Images saved in this run: 148
Total images in directory: 148
Original HR frame size (width x height): 478 x 850
Cropped HR frame size (width x height): 478 x 478
Cropped Downscaled HR frame size (width x height): 239 x 239
LR frame size (width x height): 119 x 119
