In [7]:
import os
import sys
import cv2
import pickle
import numpy as np

sys.path.append(os.path.abspath(os.path.join(os.getcwd(), "../")))

from data.common_methods import smart_square_crop, degrade_image

# Methods for the training datasets

In [8]:
def create_HR_LR_images_from_video(
        video_path, 
        skip_seconds=(2, 2), 
        frame_interval=10, 
        scale_factor=0.5, 
        output_name="",
        class_label=None):
    """
    Extracts frames from a video file, skipping the first and last few seconds,
    crops each frame to a square (width x width) region containing the main object,
    and saves them as high-resolution (HR) and low-resolution (LR) image pairs
    in separate directories. The LR images are created by resizing the HR frames
    using the specified scale factor and interpolation method. If images already
    exist in the output directory, numbering will continue from the last image.

    The cropping tries to keep the main object (assumed to be the largest contour)
    centered in the square crop, minimizing background.

    Parameters:
        video_path (str): Path to the input video file.
        skip_seconds (tuple): Seconds to skip at the start and end of the video.
        frame_interval (int): Interval at which frames are saved (0 means save all frames).
        scale_factor (float): Factor by which to scale the images for LR.
        interpolation: OpenCV interpolation method for upscaling LR images.
        output_name (str): Name for the output directory and image files.
        class_label (int): Classification label to assign to all HR images extracted
            from this video. A mapping of HR image basename -> class_label is stored
            in images/class_labels_map.pkl.
    """

    if not video_path or not isinstance(video_path, str):
        raise ValueError("video_path must be a non-empty string.")
    if not os.path.exists(video_path):
        raise FileNotFoundError(f"Video file not found: {video_path}")
    if not isinstance(skip_seconds, tuple) or len(skip_seconds) != 2:
        raise ValueError("skip_seconds must be a tuple of two values (start_skip, end_skip).")
    if not isinstance(frame_interval, int) or frame_interval < 0:
        raise ValueError("frame_interval must be a non-negative integer.")
    if not isinstance(output_name, str) or not output_name:
        raise ValueError("output_name must be a non-empty string.")
    if not isinstance(scale_factor, (int, float)) or scale_factor <= 0:
        raise ValueError("scale_factor must be a positive number.")
    if not isinstance(class_label, int) or class_label < 0:
        raise ValueError("class_label must be a non-negative integer.")
    
    # Open the video file
    cap = cv2.VideoCapture(video_path)
    fps = cap.get(cv2.CAP_PROP_FPS)
    total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    
    # Calculate start and end frames after skipping initial/final seconds
    start_frame = int(skip_seconds[0] * fps)
    end_frame = total_frames - int(skip_seconds[1] * fps)
    if start_frame < 0 or end_frame < 0:
        raise ValueError("Skip seconds result in negative frame indices.")
    elif start_frame >= total_frames or end_frame > total_frames:
        raise ValueError("Skip seconds exceed total video duration.")
    elif start_frame >= end_frame:
        raise ValueError("Start frame must be less than end frame after skipping seconds.")

    # Create output directory
    full_HR_output_dir = f"images/HR/{output_name}"
    full_LR_output_dir = f"images/LR/{output_name}"
    os.makedirs(full_HR_output_dir, exist_ok=True)
    os.makedirs(full_LR_output_dir, exist_ok=True)
    
    # Pickle path for interpolation mapping
    interp_map_path = f"images/interpolation_map.pkl"
    if os.path.exists(interp_map_path):
        try:
            with open(interp_map_path, 'rb') as f:
                interp_map = pickle.load(f)
        except Exception:
            interp_map = {}
    else:
        interp_map = {}

    # Pickle path for classification labels mapping (HR basename -> class)
    class_map_path = f"images/class_labels_map.pkl"
    if os.path.exists(class_map_path):
        try:
            with open(class_map_path, 'rb') as f:
                class_map = pickle.load(f)
        except Exception:
            class_map = {}
    else:
        class_map = {}

    # Find the last image number in the directory
    existing_files = [
        f for f in os.listdir(full_HR_output_dir)
        if f.startswith(output_name) and f.endswith('.png')
    ]
    
    if existing_files:
        last_number = max([
            int(f.replace(output_name, "").replace(".png", ""))
            for f in existing_files
        ])
        saved_count = last_number + 1
    else:
        saved_count = 0
    
    current_frame = 0

    while True:
        ret, frame = cap.read()
        if not ret:
            break
        
        if start_frame <= current_frame < end_frame:
            if (frame_interval == 0) or ((current_frame - start_frame) % frame_interval == 0):
                # --- HR CROP ---
                cropped = smart_square_crop(frame)
                HR_filename = os.path.join(
                    full_HR_output_dir, f"{output_name}{saved_count}.png"
                )
                cv2.imwrite(HR_filename, cropped)

                # Record classification label for HR image
                class_map[os.path.basename(HR_filename)] = class_label
                
                # Create LR version of the frame + capture interpolation
                lr_image, interp_name = degrade_image(
                    cropped, scale_factor=scale_factor
                )
                LR_filename = os.path.join(
                    full_LR_output_dir, f"{output_name}{saved_count}.png"
                )
                cv2.imwrite(LR_filename, lr_image)
                # Record interpolation method used as string
                interp_map[os.path.basename(LR_filename)] = interp_name
                
                saved_count += 1
        current_frame += 1

    cap.release()

    # Persist interpolation mapping
    try:
        with open(interp_map_path, 'wb') as f:
            pickle.dump(interp_map, f)
    except Exception as e:
        print(f"Warning: failed to save interpolation map: {e}")

    # Persist class labels mapping
    try:
        with open(class_map_path, 'wb') as f:
            pickle.dump(class_map, f)
    except Exception as e:
        print(f"Warning: failed to save class labels map: {e}")

    # Print analysis
    print("=== VIDEO ANALYSIS ===")
    print(f"Total frames: {total_frames}")
    print(f"Total frames (after skipped seconds): {end_frame - start_frame}")
    print(f"Images saved in this run: {saved_count - (last_number + 1 if 'last_number' in locals() else 0)}")
    print(f"Total images in directory: {saved_count}")
    print(f"Original HR frame size (width x height): {frame_width} x {frame_height}")
    print(f"Cropped HR frame size (width x height): {cropped.shape[1]} x {cropped.shape[0]}")
    print(f"LR frame size (width x height): {int(cropped.shape[1] * scale_factor)} x {int(cropped.shape[0] * scale_factor)}")
    print(f"Saved interpolation map entries: {len(interp_map)} -> {interp_map_path}")
    print(f"Saved class labels map entries: {len(class_map)} -> {class_map_path}")

In [9]:
video_base_dir = "videos"

folder_max_videos = {
    "low_z_offset": 12,
    "high_z_offset": 12,
}

# Specify frame interval per subfolder
folder_frame_interval = {
    "low_z_offset": 80,
    "high_z_offset": 50,
}

# Assign a class id per subfolder
folder_class_id = {
    "low_z_offset": 0,
    "high_z_offset": 1,
}

processed = {}
for subfolder, max_videos in folder_max_videos.items():
    subdir = os.path.join(video_base_dir, subfolder)
    if not os.path.isdir(subdir):
        print(f"Skipping missing folder: {subdir}")
        continue
    
    class_id = folder_class_id.get(subfolder)
    if class_id is None:
        print(f"Warning: no class id defined for {subfolder}; skipping.")
        continue

    frame_interval = folder_frame_interval.get(subfolder, 40)

    videos = [f for f in os.listdir(subdir) if f.lower().endswith(".mp4")]
    videos.sort()

    count = 0
    for video_file in videos:
        if count >= max_videos:
            break
        video_path = os.path.join(subdir, video_file)
        
        name_no_ext = os.path.splitext(video_file)[0]
        parts = name_no_ext.rsplit("_", 1)
        defect_type = parts[0] if len(parts) == 2 and parts[1].isdigit() else name_no_ext

        try:
            create_HR_LR_images_from_video(
                video_path,
                skip_seconds=(2, 2),
                frame_interval=frame_interval,
                scale_factor=0.5,
                output_name=defect_type,
                class_label=class_id,
            )
            count += 1
        except Exception as e:
            print(f"Error processing {video_path}: {e}")

    processed[subfolder] = count

print("Summary per folder:", processed)

=== VIDEO ANALYSIS ===
Total frames: 1597
Total frames (after skipped seconds): 1477
Images saved in this run: 19
Total images in directory: 19
Original HR frame size (width x height): 478 x 850
Cropped HR frame size (width x height): 478 x 478
LR frame size (width x height): 239 x 239
Saved interpolation map entries: 19 -> images/interpolation_map.pkl
Saved class labels map entries: 19 -> images/class_labels_map.pkl
=== VIDEO ANALYSIS ===
Total frames: 1136
Total frames (after skipped seconds): 1018
Images saved in this run: 13
Total images in directory: 32
Original HR frame size (width x height): 478 x 850
Cropped HR frame size (width x height): 478 x 478
LR frame size (width x height): 239 x 239
Saved interpolation map entries: 32 -> images/interpolation_map.pkl
Saved class labels map entries: 32 -> images/class_labels_map.pkl
=== VIDEO ANALYSIS ===
Total frames: 1397
Total frames (after skipped seconds): 1279
Images saved in this run: 16
Total images in directory: 48
Original HR fr

# Methods for the predictions datasets

In [4]:
def create_HR_LR_prediction_images_from_video(
        video_path,
        skip_seconds=(2, 2),
        frame_interval=10,
        scale_factor=0.5,
        output_name="",
        class_label=None
    ):
    """
    Extract frames from a video, crop each frame to a square around the main object,
    and save HR/LR image pairs into images_for_predictions/HR/<output_name>/ and
    images_for_predictions/LR/<output_name>/.

    Differences vs the training variant:
    - No interpolation map is written.
    - Optionally writes a class labels map (HR basename -> class_label) to
      images_for_predictions/class_labels_map.pkl when class_label is provided.

    Parameters:
        video_path (str): Path to the input video file.
        skip_seconds (tuple[int,int]): Seconds to skip at the start and end of the video.
        frame_interval (int): Save one frame every N frames (0 means save all frames).
        scale_factor (float): Factor to downscale HR to LR with degrade_image.
        output_name (str): Subfolder and filename prefix for saved images.
        class_label (int|None): If provided, the class id to assign to every HR image
            extracted from this video. A mapping of HR image basename -> class_label
            will be stored in images_for_predictions/class_labels_map.pkl.
    """

    if not video_path or not isinstance(video_path, str):
        raise ValueError("video_path must be a non-empty string.")
    if not os.path.exists(video_path):
        raise FileNotFoundError(f"Video file not found: {video_path}")
    if not isinstance(skip_seconds, tuple) or len(skip_seconds) != 2:
        raise ValueError("skip_seconds must be a tuple of two values (start_skip, end_skip).")
    if not isinstance(frame_interval, int) or frame_interval < 0:
        raise ValueError("frame_interval must be a non-negative integer.")
    if not isinstance(output_name, str) or not output_name:
        raise ValueError("output_name must be a non-empty string.")
    if not isinstance(scale_factor, (int, float)) or scale_factor <= 0:
        raise ValueError("scale_factor must be a positive number.")
    if class_label is not None and (not isinstance(class_label, int) or class_label < 0):
        raise ValueError("class_label must be a non-negative integer when provided.")

    cap = cv2.VideoCapture(video_path)
    fps = cap.get(cv2.CAP_PROP_FPS)
    total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))

    start_frame = int(skip_seconds[0] * fps)
    end_frame = total_frames - int(skip_seconds[1] * fps)
    if start_frame < 0 or end_frame < 0:
        raise ValueError("Skip seconds result in negative frame indices.")
    elif start_frame >= total_frames or end_frame > total_frames:
        raise ValueError("Skip seconds exceed total video duration.")
    elif start_frame >= end_frame:
        raise ValueError("Start frame must be less than end frame after skipping seconds.")

    # Output directories
    full_HR_output_dir = f"images_for_predictions/HR/{output_name}"
    full_LR_output_dir = f"images_for_predictions/LR/{output_name}"
    os.makedirs(full_HR_output_dir, exist_ok=True)
    os.makedirs(full_LR_output_dir, exist_ok=True)

    # Optional: class labels map path for predictions
    class_map = None
    class_map_path = None
    if class_label is not None:
        class_map_path = "images_for_predictions/predictions_class_labels_map.pkl"
        if os.path.exists(class_map_path):
            try:
                with open(class_map_path, 'rb') as f:
                    class_map = pickle.load(f)
            except Exception:
                class_map = {}
        else:
            class_map = {}

    # Continue numbering from existing files
    existing_files = [
        f for f in os.listdir(full_HR_output_dir)
        if f.startswith(output_name) and f.endswith('.png')
    ]
    if existing_files:
        try:
            last_number = max([
                int(f.replace(output_name, "").replace(".png", ""))
                for f in existing_files
                if f.replace(output_name, "").replace(".png", "").isdigit()
            ])
        except ValueError:
            last_number = -1
        saved_count = last_number + 1
    else:
        saved_count = 0
    start_saved_count = saved_count

    current_frame = 0

    while True:
        ret, frame = cap.read()
        if not ret:
            break

        if start_frame <= current_frame < end_frame:
            if (frame_interval == 0) or ((current_frame - start_frame) % frame_interval == 0):
                # HR crop
                cropped = smart_square_crop(frame)
                HR_filename = os.path.join(
                    full_HR_output_dir, f"{output_name}{saved_count}.png"
                )
                cv2.imwrite(HR_filename, cropped)

                # Record classification label for HR image (if provided)
                if class_map is not None:
                    class_map[os.path.basename(HR_filename)] = class_label

                # LR degrade (ignore interpolation name)
                lr_image, _ = degrade_image(cropped, scale_factor=scale_factor)
                LR_filename = os.path.join(
                    full_LR_output_dir, f"{output_name}{saved_count}.png"
                )
                cv2.imwrite(LR_filename, lr_image)
                
                saved_count += 1
        current_frame += 1

    cap.release()

    # Persist class labels mapping for predictions (if used)
    if class_map is not None and class_map_path is not None:
        try:
            with open(class_map_path, 'wb') as f:
                pickle.dump(class_map, f)
        except Exception as e:
            print(f"Warning: failed to save class labels map: {e}")

    # Summary
    print("=== VIDEO SUMMARY ===")
    print(f"Video: {os.path.basename(video_path)}")
    print(f"Total frames: {total_frames}")
    print(f"Frames considered (after skipping): {max(0, end_frame - start_frame)}")
    print(f"Images saved in this run: {saved_count - start_saved_count}")
    print(f"Total images now in directory: {saved_count}")
    print(f"Original frame size (W x H): {frame_width} x {frame_height}")
    if class_map_path is not None:
        print(f"Saved class labels map entries: {len(class_map)} -> {class_map_path}")

In [5]:
video_base_dir = "videos_for_predictions"

# Assign a class id per top-level subfolder for predictions
folder_class_id = {
    "low_z_offset": 0,
    "high_z_offset": 1,
}

for root, dirs, files in os.walk(video_base_dir):
    rel = os.path.relpath(root, video_base_dir)
    if rel == ".":
        continue

    # Determine class id from the first-level folder name
    top_folder = rel.split(os.sep)[0]
    class_id = folder_class_id.get(top_folder)
    if class_id is None:
        print(f"Warning: no class id defined for '{top_folder}' (path: {rel}); skipping.")
        continue

    videos = [f for f in files if f.lower().endswith(".mp4")]
    videos.sort()
    
    for video_file in videos:
        video_path = os.path.join(root, video_file)
        name_no_ext = os.path.splitext(video_file)[0]
        parts = name_no_ext.rsplit("_", 1)
        defect_type = parts[0] if len(parts) == 2 and parts[1].isdigit() else name_no_ext

        try:
            create_HR_LR_prediction_images_from_video(
                video_path,
                skip_seconds=(2, 2),
                frame_interval=40,
                scale_factor=0.5,
                output_name=defect_type,
                class_label=class_id,
            )
        except Exception as e:
            print(f"Error processing {video_path}: {e}")

=== VIDEO SUMMARY ===
Video: high_z_offset_1.mp4
Total frames: 664
Frames considered (after skipping): 544
Images saved in this run: 14
Total images now in directory: 14
Original frame size (W x H): 478 x 850
Saved class labels map entries: 14 -> images_for_predictions/predictions_class_labels_map.pkl
=== VIDEO SUMMARY ===
Video: high_z_offset_2.mp4
Total frames: 635
Frames considered (after skipping): 515
Images saved in this run: 13
Total images now in directory: 27
Original frame size (W x H): 478 x 850
Saved class labels map entries: 27 -> images_for_predictions/predictions_class_labels_map.pkl
=== VIDEO SUMMARY ===
Video: high_z_offset_3.mp4
Total frames: 717
Frames considered (after skipping): 597
Images saved in this run: 15
Total images now in directory: 42
Original frame size (W x H): 478 x 850
Saved class labels map entries: 42 -> images_for_predictions/predictions_class_labels_map.pkl
=== VIDEO SUMMARY ===
Video: high_z_offset_4.mp4
Total frames: 661
Frames considered (after