In [2]:
import glob
import cv2
import os
from pathlib import Path
import numpy as np
import matplotlib.pyplot as plt
import sys
import torch
from multiprocessing import freeze_support
import subprocess


def extract_frames(video_path, output_dir):
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

    video_capture = cv2.VideoCapture(video_path)
    frame_count = 0

    while True:
        ret, frame = video_capture.read()
        if not ret:
            break
        frame_filename = os.path.join(output_dir, f"{frame_count:05d}.jpg")
        cv2.imwrite(frame_filename, frame)
        frame_count += 1

    video_capture.release()
    print(f"Extracted {frame_count} frames from {video_path} to {output_dir}.")

def move_to_folder(src, dst_path, dst_name):
    source = Path(rf"{src}")
    destination = Path(rf"{dst_path}/{dst_name}")
    destination.parent.mkdir(parents=True, exist_ok=True)
    source.rename(destination)

def rotate_images_in_folder(folder_path):
    image_extensions = ['.jpg', '.jpeg', '.png', '.bmp', '.tiff', '.tif']
    files = os.listdir(folder_path)

    rotated_count = 0

    for file in files:
        if any(file.lower().endswith(ext) for ext in image_extensions):
            file_path = os.path.join(folder_path, file)
            
            img = cv2.imread(file_path)
            
            if img is not None:
                # Rotate the image -90 degrees (counterclockwise)
                rotated_img = cv2.rotate(img, cv2.ROTATE_90_COUNTERCLOCKWISE)
                
                # Save the rotated image, overwriting the original
                cv2.imwrite(file_path, rotated_img)
                rotated_count += 1
                #print(f"Rotated: {file}")
    
    print(f"Completed! Rotated {rotated_count} images in {folder_path}")

def detect_hsv_red_change(images, pixel_position, pixel_size, saturation_threshold: float = 30) -> int:
    baseline_hsv = {}
    
    x, y = pixel_position
    h_sum = s_sum = v_sum = 0.0
    
    for i, image_path in enumerate(images[:5]):
        img = cv2.imread(image_path)
        if y < img.shape[0] and x < img.shape[1]:
            pixel_bgr = img[y-pixel_size:y+pixel_size, x-pixel_size:x+pixel_size]
            pixel_hsv = cv2.cvtColor(pixel_bgr, cv2.COLOR_BGR2HSV)[0, 0]
            h, s, v = pixel_hsv
            h_sum += float(h)
            s_sum += float(s)
            v_sum += float(v)
    
    baseline_hsv = {
        'h': h_sum / 5,
        's': s_sum / 5,
        'v': v_sum / 5
    }

    start_frame_index = -1
    
    for frame_idx, image_path in enumerate(images):
        image = cv2.imread(image_path)

        x, y = pixel_position
            
        if y >= image.shape[0] or x >= image.shape[1]:
            continue
        
        pixel_bgr = image[y-pixel_size:y+pixel_size, x-pixel_size:x+pixel_size]
        h, s, v = cv2.cvtColor(pixel_bgr, cv2.COLOR_BGR2HSV)[0, 0]
        
        is_red_hue = (h <= 10) or (h >= 170)
        saturation_increase = s - baseline_hsv['s']
        brightness_increase = v - baseline_hsv['v']
        
        if (is_red_hue and 
            saturation_increase > saturation_threshold and 
            brightness_increase > 10):
            
            start_frame_index = frame_idx
            break

    stop_frame_index = -1
    if start_frame_index != -1:
        for frame_idx, image_path in enumerate(images[start_frame_index+150:]):
            image = cv2.imread(image_path)

            x, y = pixel_position
                
            if y >= image.shape[0] or x >= image.shape[1]:
                continue
            
            pixel_bgr = image[y-pixel_size:y+pixel_size, x-pixel_size:x+pixel_size]
            h, s, v = cv2.cvtColor(pixel_bgr, cv2.COLOR_BGR2HSV)[0, 0]
            
            is_red_hue = (h <= 10) or (h >= 170)
            saturation_increase = s - baseline_hsv['s']
            brightness_increase = v - baseline_hsv['v']
            
            if (is_red_hue and 
                saturation_increase > saturation_threshold and 
                brightness_increase > 10):
                
                stop_frame_index = frame_idx
                break
            
    if start_frame_index != -1 and stop_frame_index != -1:
        return [start_frame_index, stop_frame_index + start_frame_index + 150]
    return -1

def rs_align_with_xmp(rs_path, import_path, export_path, xml_path):
    cmd = [
        rs_path, "-headless",
        "-addFolder", str(import_path),
        "-align",
        "-exportRegistration", f"{str(export_path)}/placeholder.txt", str(xml_path),
        "-quit"
    ]
    
    result = subprocess.run(cmd, capture_output=True, text=True)

    if result.returncode != 0:
        print(f"Error running command: {' '.join(cmd)}")
        print(f"stdout: {result.stdout}")
        print(f"stderr: {result.stderr}")
        raise RuntimeError(f"COLMAP command failed with return code {result.returncode}")
        return False
    else:
        return True

In [2]:
videos_by_view = r"N:\shared\yaojie\250728-Capture\views-raw-9"
soar_sequence = r"N:\shared\yaojie\250728-Capture\raw-col"

rgb_sequence_by_view_output = r"N:\shared\yaojie\250728-Capture\rgb_sequence_output"
frames_to_train = r"N:\shared\yaojie\250728-Capture\to_train"

In [14]:
image_extensions = ['*.jpg', '*.jpeg', '*.png']

soar_frames_by_view = []
images = []
for ext in image_extensions:
    images.extend(glob.glob(os.path.join(soar_sequence, ext)))

images = sorted(images)

print(f"Found {len(images)} images in {soar_sequence}")

view_names = []
i = -1

for image_path in images:
    filename = os.path.basename(image_path)
    
    parts = filename.split('.')
    view_name = parts[0]
    
    if view_name not in view_names:
        view_names.append(view_name)
        soar_frames_by_view.append([rf"{image_path}"])
        i += 1
    else:
        soar_frames_by_view[i].append(rf"{image_path}")

for i, view in enumerate(soar_frames_by_view):
    print(f"View: {i}, Name: {view_names[i]}, Number of images: {len(view)}")


Found 7424 images in N:\shared\yaojie\250728-Capture\raw-col
View: 0, Name: take3_175889972_000070704912, Number of images: 928
View: 1, Name: take3_175889972_000079504912, Number of images: 928
View: 2, Name: take3_175889972_000123120312, Number of images: 928
View: 3, Name: take3_175889972_000147211512, Number of images: 928
View: 4, Name: take3_175889972_000927310812, Number of images: 928
View: 5, Name: take3_175889972_000984794512, Number of images: 928
View: 6, Name: take3_175889972_001420795012, Number of images: 928
View: 7, Name: take3_175889972_001430695012, Number of images: 928


In [15]:
soar_rgb_sequence_folders = []
for i in range(len(soar_frames_by_view)):
    folder_name = f"soar_view_{i}"
    output_path = os.path.join(rgb_sequence_by_view_output, folder_name)
    os.makedirs(output_path, exist_ok=True)
    soar_rgb_sequence_folders.append(output_path)

for i, frame in enumerate(soar_frames_by_view):
    for j, view_image in enumerate(frame):
        if view_image:
            move_to_folder(view_image, soar_rgb_sequence_folders[i], f"{j:05d}.jpg")

In [None]:
# Not needed.
for view in soar_rgb_sequence_folders:
    rotate_images_in_folder(view)

In [18]:
video_extensions = ['*.mp4', '*.avi', '*.mov', '*.mkv', '*.wmv', '*.flv', '*.webm', '*.m4v', '*.3gp', '*.mpg', '*.mpeg']

videos = []
for ext in video_extensions:
    videos.extend(glob.glob(os.path.join(videos_by_view, ext)))

# Sort the videos for consistent ordering
videos = sorted(videos)

# Print all videos found
print(f"Found {len(videos)} videos:")
for video in videos:
    print(video)

# Process each video
for i, video in enumerate(videos):
    print(f"Processing video {i}/{len(videos)}: {video}")
    output_dir = os.path.join(rgb_sequence_by_view_output, f"recorded_{i:02d}")
    extract_frames(video, output_dir)


Found 9 videos:
N:\shared\yaojie\250728-Capture\views-raw-9\A001_07281413_C011.mov
N:\shared\yaojie\250728-Capture\views-raw-9\A001_07281414_C008 2.mov
N:\shared\yaojie\250728-Capture\views-raw-9\A001_07281414_C008.mov
N:\shared\yaojie\250728-Capture\views-raw-9\A001_07281414_C011.mov
N:\shared\yaojie\250728-Capture\views-raw-9\A001_07281415_C008.mov
N:\shared\yaojie\250728-Capture\views-raw-9\GX010031.MP4
N:\shared\yaojie\250728-Capture\views-raw-9\GX010127.MP4
N:\shared\yaojie\250728-Capture\views-raw-9\a cam001_07281413_C003.mov
N:\shared\yaojie\250728-Capture\views-raw-9\temp_video_for_share.mp4
Processing video 0/9: N:\shared\yaojie\250728-Capture\views-raw-9\A001_07281413_C011.mov
Extracted 2144 frames from N:\shared\yaojie\250728-Capture\views-raw-9\A001_07281413_C011.mov to N:\shared\yaojie\250728-Capture\rgb_sequence_output\recorded_00.
Processing video 1/9: N:\shared\yaojie\250728-Capture\views-raw-9\A001_07281414_C008 2.mov
Extracted 2024 frames from N:\shared\yaojie\250728-

In [3]:
rgb_sequence_by_view_folders = [d for d in os.listdir(rgb_sequence_by_view_output) if os.path.isdir(os.path.join(rgb_sequence_by_view_output, d))]
rgb_sequence_by_view_folders = sorted(rgb_sequence_by_view_folders)
rgb_sequence_by_view_folders = [os.path.join(rgb_sequence_by_view_output, folder) for folder in rgb_sequence_by_view_folders]

for folder in rgb_sequence_by_view_folders:
    print(f"Folder: {folder}")

Folder: N:\shared\yaojie\250728-Capture\rgb_sequence_output\recorded_00
Folder: N:\shared\yaojie\250728-Capture\rgb_sequence_output\recorded_01
Folder: N:\shared\yaojie\250728-Capture\rgb_sequence_output\recorded_02
Folder: N:\shared\yaojie\250728-Capture\rgb_sequence_output\recorded_03
Folder: N:\shared\yaojie\250728-Capture\rgb_sequence_output\recorded_04
Folder: N:\shared\yaojie\250728-Capture\rgb_sequence_output\recorded_05
Folder: N:\shared\yaojie\250728-Capture\rgb_sequence_output\recorded_06
Folder: N:\shared\yaojie\250728-Capture\rgb_sequence_output\recorded_07
Folder: N:\shared\yaojie\250728-Capture\rgb_sequence_output\recorded_08


In [56]:
led_roi = [
    [169, 1708],
    [456, 1672],
    [534, 1892],
    [469, 1720],
    [794, 1806],
    [717, 1630],
    [808, 1862],
    [668, 1810],
    [862, 1402],
    [1896, 691],
    [1759, 1029],
    [1721, 386],
    [1831, 767],
    [1568, 721],
    [1633, 767],
    [1894, 721],
    [1653, 170]
]

In [None]:
image_extensions = ['*.jpg', '*.jpeg', '*.png']

for i, view_folder in enumerate(rgb_sequence_by_view_folders):
    images = []
    for ext in image_extensions:
        images.extend(glob.glob(os.path.join(view_folder, ext)))

    images = sorted(images)

    bframe = detect_hsv_red_change(images, led_roi[i], 1)
    print(f"View {i}: Count: {bframe[1] - bframe[0]}, from({images[bframe[0]]}) to({images[bframe[1]]})")
    for i in range(bframe[0]):
        # delete images[i]
        os.remove(images[i])

    images = []
    for ext in image_extensions:
        images.extend(glob.glob(os.path.join(view_folder, ext)))

    images = sorted(images)
    # raname images after removing the first frames name format: 00000.jpg, 00001.jpg, ...
    for j, image in enumerate(images):
        new_name = f"{j:05d}.jpg"
        new_path = os.path.join(view_folder, new_name)
        os.rename(image, new_path)


In [None]:
import cv2
import numpy as np

def extract_brightness_contrast(reference_image_path):
    """
    Extract brightness and contrast statistics from a reference image
    
    Parameters:
    reference_image_path: path to the reference image
    
    Returns:
    dict containing brightness and contrast data
    """
    
    # Load the reference image
    image = cv2.imread(reference_image_path)
    image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    
    # Convert to grayscale for global statistics
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    
    # Calculate brightness (mean intensity)
    target_brightness = np.mean(gray)
    
    # Calculate contrast (standard deviation)
    target_contrast = np.std(gray)
    
    # Calculate per-channel statistics for color correction
    channel_means = [np.mean(image_rgb[:, :, i]) for i in range(3)]
    channel_stds = [np.std(image_rgb[:, :, i]) for i in range(3)]
    
    brightness_contrast_data = {
        'target_brightness': target_brightness,
        'target_contrast': target_contrast,
        'channel_means': channel_means,  # [R, G, B]
        'channel_stds': channel_stds,    # [R, G, B]
        'reference_image_path': reference_image_path
    }
    
    print(f"Reference image: {reference_image_path}")
    print(f"Target brightness: {target_brightness:.2f}")
    print(f"Target contrast: {target_contrast:.2f}")
    print(f"Channel means (R,G,B): [{channel_means[0]:.2f}, {channel_means[1]:.2f}, {channel_means[2]:.2f}]")
    print(f"Channel stds (R,G,B): [{channel_stds[0]:.2f}, {channel_stds[1]:.2f}, {channel_stds[2]:.2f}]")
    
    return brightness_contrast_data

def apply_brightness_contrast(input_image_path, brightness_contrast_data):
    """
    Apply brightness and contrast correction to match reference image and replace the original
    
    Parameters:
    input_image_path: path to the image to be corrected (will be replaced)
    brightness_contrast_data: data extracted from reference image
    """
    
    # Load the input image
    image = cv2.imread(input_image_path)
    image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    
    # Get target values
    target_brightness = brightness_contrast_data['target_brightness']
    target_contrast = brightness_contrast_data['target_contrast']
    target_means = brightness_contrast_data['channel_means']
    target_stds = brightness_contrast_data['channel_stds']
    
    # Calculate current image statistics
    current_gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    current_brightness = np.mean(current_gray)
    current_contrast = np.std(current_gray)
    
    current_means = [np.mean(image_rgb[:, :, i]) for i in range(3)]
    current_stds = [np.std(image_rgb[:, :, i]) for i in range(3)]
    
    # Create corrected image
    corrected_image = image_rgb.copy().astype(np.float32)
    
    # Apply correction to each channel
    for channel in range(3):
        if current_stds[channel] > 0:  # Avoid division by zero
            # Normalize current channel to have mean=0, std=1
            corrected_image[:, :, channel] = (corrected_image[:, :, channel] - current_means[channel]) / current_stds[channel]
            
            # Scale to target std and shift to target mean
            corrected_image[:, :, channel] = corrected_image[:, :, channel] * target_stds[channel] + target_means[channel]
    
    # Clip values to valid range [0, 255]
    corrected_image = np.clip(corrected_image, 0, 255).astype(np.uint8)
    
    # Convert back to BGR for saving
    corrected_bgr = cv2.cvtColor(corrected_image, cv2.COLOR_RGB2BGR)
    
    # Replace the original image
    cv2.imwrite(input_image_path, corrected_bgr)
    
    # Print statistics
    corrected_gray = cv2.cvtColor(corrected_bgr, cv2.COLOR_BGR2GRAY)
    final_brightness = np.mean(corrected_gray)
    final_contrast = np.std(corrected_gray)
    
    print(f"\nProcessed: {input_image_path}")
    print(f"Brightness: {current_brightness:.2f} -> {final_brightness:.2f} (Target: {target_brightness:.2f})")
    print(f"Contrast: {current_contrast:.2f} -> {final_contrast:.2f} (Target: {target_contrast:.2f})")
    print(f"✓ Original image replaced with corrected version")

# Example usage:

# Step 1: Extract brightness/contrast data from reference image
reference_path = r"C:\Users\jeffr\Desktop\test\01416.jpg"
bc_data = extract_brightness_contrast(reference_path)

print("\n" + "="*50)

image_extensions = ['*.jpg', '*.jpeg', '*.png']

image_paths = []
for ext in image_extensions:
    image_paths.extend(glob.glob(os.path.join(r"C:\Users\jeffr\Desktop\test", ext)))

for img_path in image_paths:
    apply_brightness_contrast(img_path, bc_data)
    print("-" * 30)

print("\n✓ All images have been processed and replaced with corrected versions!")

In [68]:
# generate training data - first frame

frames_by_views = []
for i, view_folder in enumerate(rgb_sequence_by_view_folders):
    images = []
    for ext in image_extensions:
        images.extend(glob.glob(os.path.join(view_folder, ext)))

    images = sorted(images)
    frames_by_views.append(images)

first_frame_folder = os.path.join(frames_to_train, "frame_0")
for i in range(len(frames_by_views)):
    move_to_folder(frames_by_views[i][0], first_frame_folder, f"{i:05d}.jpg")

In [4]:
# Remove background

rvm_path = r"C:\Repo\RobustVideoMatting"
sys.path.append(rvm_path)

model = torch.hub.load("PeterL1n/RobustVideoMatting", "mobilenetv3").cuda()
convert_video = torch.hub.load("PeterL1n/RobustVideoMatting", "converter")

temp_folder = r"N:\shared\yaojie\250728-Capture\temp"
if not os.path.exists(temp_folder):
    os.makedirs(temp_folder)

frames_by_views = []
for i, view_folder in enumerate(rgb_sequence_by_view_folders):
    temp_sequence_folder = os.path.join(temp_folder, f"view_{i:02d}")
    convert_video(
        model,                           # The loaded model, can be on any device (cpu or cuda).
        input_source=view_folder,        # A video file or an image sequence directory.
        downsample_ratio=None,           # [Optional] If None, make downsampled max size be 512px.
        output_type='png_sequence',             # Choose "video" or "png_sequence"
        output_composition=temp_sequence_folder,    # File path if video; directory path if png sequence.
        #output_alpha=f"{output}/pha.mp4",          # [Optional] Output the raw alpha prediction.
        #output_foreground=f"{output}/fgr.mp4",     # [Optional] Output the raw foreground prediction.
        #output_video_mbps=4,             # Output video mbps. Not needed for png sequence.
        seq_chunk=15,                    # Process n frames at once for better parallelism.
        num_workers=5,                   # Only for image sequence input. Reader threads.
        progress=True                    # Print conversion progress.
    )

Using cache found in C:\Users\jeffr/.cache\torch\hub\PeterL1n_RobustVideoMatting_master
Using cache found in C:\Users\jeffr/.cache\torch\hub\PeterL1n_RobustVideoMatting_master
  from .autonotebook import tqdm as notebook_tqdm
100%|██████████| 1923/1923 [02:50<00:00, 11.29it/s]
100%|██████████| 1923/1923 [02:50<00:00, 11.25it/s]
100%|██████████| 1923/1923 [02:38<00:00, 12.10it/s]
100%|██████████| 1923/1923 [02:22<00:00, 13.46it/s]
100%|██████████| 1923/1923 [02:30<00:00, 12.79it/s]
100%|██████████| 1923/1923 [02:25<00:00, 13.19it/s]
100%|██████████| 1923/1923 [02:33<00:00, 12.57it/s]
100%|██████████| 1923/1923 [02:33<00:00, 12.55it/s]
100%|██████████| 1923/1923 [02:18<00:00, 13.91it/s]
