## From video, we can check the crop image.

In [None]:
from utils.video_utils import extract_frames_fast

# --- CONFIGURATION ---
VIDEO_PATH    = '/media/holidayj/Documents/Data/Platform/Chungmuro/chungmuro_hasun_20221019_f1729_t2030/chungmuro_hasun_20221019T172940_20221019T203040.mp4'
OUTPUT_FOLDER = 'frames/chungmuro_hasun_20221019_f1729_t2030_1frame_700'

INTERVAL_SEC  = 1/3   # Extract 3 frames per second (approx)
CROP_SIZE     = 700
MARGIN_RIGHT  = 400
MARGIN_TOP    = 30
FILE_PREFIX   = "chungmuro_frame"

def main():
    extract_frames_fast(
        video_path=VIDEO_PATH,
        output_folder=OUTPUT_FOLDER,
        interval_sec=INTERVAL_SEC,
        crop_size=CROP_SIZE,
        margin_right=MARGIN_RIGHT,
        margin_top=MARGIN_TOP,
        file_prefix=FILE_PREFIX
    )

if __name__ == '__main__':
    main()

'/media/holidayj/Documents/github/ML/Python/annotation'

## From hd images, crop images.

In [1]:
import cv2
import os
import glob
from multiprocessing import Pool, cpu_count
from tqdm import tqdm

# --- Configuration ---
SOURCE_FOLDER = "/media/holidayj/Documents/Data/Platform/Chungmuro/chungmuro_hasun_20221019_f1729_t2030/frames/chungmuro_hasun_10frame_1920_train_arrival"
OUTPUT_FOLDER = "./cropped_images_700"

# Crop Parameters
CROP_SIZE = 700
MARGIN_RIGHT = 400
MARGIN_TOP = 30

def save_image_worker(args):
    """
    Independent worker function to save the image.
    (Reused from utils/video_utils.py pattern)
    """
    img_data, save_path = args
    cv2.imwrite(save_path, img_data)

def process_existing_images(source_folder, output_folder, crop_size, margin_right, margin_top):
    # 1. Setup
    if not os.path.exists(source_folder):
        print(f"Error: Source folder not found at {source_folder}")
        return

    os.makedirs(output_folder, exist_ok=True)

    # Get list of images (assuming .jpg, add .png if needed)
    image_paths = sorted(glob.glob(os.path.join(source_folder, "*.jpg")))
    
    if not image_paths:
        print("No .jpg images found in source folder.")
        return

    # 2. Calculate Crop Coordinates based on the first image
    first_img = cv2.imread(image_paths[0])
    if first_img is None:
        print("Error reading the first image.")
        return

    h, width, _ = first_img.shape
    
    # Logic: x_start = width - margin_right - crop_size
    x_start = width - margin_right - crop_size
    y_start = margin_top
    
    # Boundary checks
    if x_start < 0: x_start = 0
    if y_start < 0: y_start = 0

    print(f"--- Processing {len(image_paths)} Images ---")
    print(f"Original Size: {width}x{h}")
    print(f"Crop: {crop_size}x{crop_size} at ({x_start}, {y_start})")
    
    # 3. Initialize Worker Pool
    worker_count = max(1, cpu_count() - 1)
    print(f"Using {worker_count} background processes for saving.")
    
    pool = Pool(processes=worker_count)
    saved_count = 0

    # 4. Processing Loop
    print("Starting batch crop...")
    
    for img_path in tqdm(image_paths, unit="img"):
        # Read image in main process
        frame = cv2.imread(img_path)
        
        if frame is None:
            continue

        # Crop Logic
        cropped = frame[y_start : y_start + crop_size, 
                        x_start : x_start + crop_size]
        
        # Construct filename (keep original name)
        filename = os.path.basename(img_path)
        save_path = os.path.join(output_folder, filename)
        
        # Async Save
        pool.apply_async(save_image_worker, args=((cropped, save_path),))
        saved_count += 1

    # 5. Cleanup
    print("\nProcessing finished. Waiting for file writes to complete...")
    pool.close()
    pool.join()
    print(f"Done! Saved {saved_count} images to '{output_folder}'.")

if __name__ == "__main__":
    process_existing_images(SOURCE_FOLDER, OUTPUT_FOLDER, CROP_SIZE, MARGIN_RIGHT, MARGIN_TOP)

--- Processing 4161 Images ---
Original Size: 1920x1080
Crop: 700x700 at (820, 30)
Using 7 background processes for saving.
Starting batch crop...


100%|██████████| 4161/4161 [02:11<00:00, 31.58img/s]


Processing finished. Waiting for file writes to complete...
Done! Saved 4161 images to './cropped_images_700'.





Scanning reference directory...
Found 4161 images in Source.
Found 2769 files in Reference.
Starting copy process...


  0%|          | 0/4161 [00:00<?, ?file/s]

100%|██████████| 4161/4161 [00:00<00:00, 4760.19file/s]


--- Summary ---
Total processed: 4161
Copied to TEMP1 (Matched): 1384
Copied to TEMP2 (Rest):    2777
Done.





## Saving 1 frame.


## Finding Cropping area from Video

In [None]:
import cv2
import os

# --- CONFIGURATION ---
video_path      = '/media/holidayj/Documents/Data/Platform/Chungmuro/chungmuro_sangsun_20221019_f1729_t2029/chungmuro_sangsun_20221019T172940-20221019T202940.mp4'
output_folder   = 'output_frames'
output_filename = 'cropped_700.jpg'
# crop_size       = 600
crop_size       = 700

# Cropping Margins
margin_top   = 30    # Move down pixels from the top edge
margin_right = 400  # Move left 120 pixels from the right edge
# ---------------------

os.makedirs(output_folder, exist_ok=True)

cap = cv2.VideoCapture(video_path)

if not cap.isOpened():
    print(f"Error: Could not open video at {video_path}")
else:
    # 1. Search for the first valid frame (Fix for the [h264] error)
    frame_found = False
    max_attempts = 100
    
    print("Searching for a valid Keyframe...")
    
    for i in range(max_attempts):
        ret, frame = cap.read()
        if ret:
            print(f"Success: Valid frame found at index {i}")
            
            # --- CROP LOGIC STARTS HERE ---
            
            # 2. Get Dimensions
            height, width, _ = frame.shape
            
            # 3. Calculate Coordinates
            # Y: Start at top margin
            y_start = margin_top
            y_end = y_start + crop_size

            # X: Start from right side (width) - margin - crop_size
            x_end = width - margin_right
            x_start = x_end - crop_size

            print(f"Original Resolution: {width}x{height}")
            print(f"Cropping Area -> X: {x_start} to {x_end}, Y: {y_start} to {y_end}")

            # 4. Perform Crop
            cropped_frame = frame[y_start:y_end, x_start:x_end]

            # 5. Save
            full_save_path = os.path.join(output_folder, output_filename)
            cv2.imwrite(full_save_path, cropped_frame)
            print(f"Saved cropped image to: {full_save_path}")
            
            frame_found = True
            break # Stop after saving the first valid frame
            
            # --- CROP LOGIC ENDS HERE ---

    if not frame_found:
        print("Error: Could not find any valid frames in the beginning of the video.")

cap.release()

Searching for a valid Keyframe...
Success: Valid frame found at index 1
Original Resolution: 1920x1080
Cropping Area -> X: 1280 to 1920, Y: 0 to 640
Saved cropped image to: output_frames/cropped_700.jpg


[h264 @ 0x3a1d3240] missing picture in access unit with size 40
[h264 @ 0x3a1d3240] no frame!
[h264 @ 0x3a0c2180] no frame!


# Cropping area from the full frame images

In [4]:
import os
import cv2
from tqdm import tqdm  # Optional: for a progress bar, run 'pip install tqdm' if missing

# 1. Configuration
source_dir = "/media/holidayj/Documents/Data/Platform/Chungmuro/chungmuro_sangsun_20221019_f1729_t2029/chungmuro_sangsun_10frames_1920_train_arrival"
output_dir = os.path.join(source_dir, "cropped")
CROP_W, CROP_H = 640, 640

# 2. Setup
os.makedirs(output_dir, exist_ok=True)
image_extensions = ('.jpg', '.jpeg', '.png', '.bmp', '.tiff')

# Get list of images
files = [f for f in os.listdir(source_dir) if f.lower().endswith(image_extensions)]
print(f"Found {len(files)} images. Processing...")

# 3. Processing Loop
count = 0
for filename in tqdm(files):
    file_path = os.path.join(source_dir, filename)
    
    # Read Image
    img = cv2.imread(file_path)
    if img is None:
        print(f"Warning: Could not read {filename}")
        continue
    
    h, w, _ = img.shape
    
    # Check if image is large enough
    if w < CROP_W or h < CROP_H:
        print(f"Skipping {filename}: Image smaller than crop size ({w}x{h})")
        continue

    # 4. Calculate Top-Right Coordinates
    # Y: Starts at 0, ends at 640
    # X: Starts at (Width - 640), ends at Width
    x_start = w - CROP_W
    y_start = 0
    
    # Crop: img[y:y+h, x:x+w]
    cropped_img = img[y_start : y_start + CROP_H, x_start : x_start + CROP_W]
    
    # 5. Save
    save_path = os.path.join(output_dir, filename)
    cv2.imwrite(save_path, cropped_img)
    count += 1

print(f"\nDone! {count} images saved to:\n{output_dir}")

Found 4945 images. Processing...


100%|██████████| 4945/4945 [02:52<00:00, 28.66it/s]


Done! 4945 images saved to:
/media/holidayj/Documents/Data/Platform/Chungmuro/chungmuro_sangsun_20221019_f1729_t2029/chungmuro_sangsun_10frames_1920_train_arrival/cropped





## Put cropped images into set1 to set5 folders. (Round Robin)

In [6]:
import os
import shutil

# 1. Configuration
base_dir = "/media/holidayj/Documents/Data/Platform/Chungmuro/chungmuro_sangsun_20221019_f1729_t2029/chungmuro_sangsun_10frames_1920_train_arrival/cropped"
num_sets = 5

# 2. Get and Sort Files
valid_exts = ('.jpg', '.jpeg', '.png', '.bmp', '.tiff')
files = [f for f in os.listdir(base_dir) if f.lower().endswith(valid_exts)]
files.sort() # Important to keep the sequence (1st, 2nd, 3rd...)

print(f"Found {len(files)} images. Distributing cyclically into {num_sets} sets...")

# Create the set folders first
for i in range(1, num_sets + 1):
    os.makedirs(os.path.join(base_dir, f"set{i}"), exist_ok=True)

# 3. Distribute Files Round-Robin
for index, filename in enumerate(files):
    # Calculate which set (0 to 4) -> (1 to 5)
    # 0 % 5 = 0 -> set1
    # 1 % 5 = 1 -> set2
    # ...
    # 5 % 5 = 0 -> set1
    set_num = (index % num_sets) + 1
    
    src_path = os.path.join(base_dir, filename)
    dst_path = os.path.join(base_dir, f"set{set_num}", filename)
    
    shutil.move(src_path, dst_path)

print("Done! Distribution complete.")

Found 4945 images. Distributing cyclically into 5 sets...
Done! Distribution complete.


## Extracting full frames

In [2]:
import cv2
import os
import math
from datetime import datetime, timedelta
from multiprocessing import Pool, cpu_count
from tqdm import tqdm

# --- CONFIGURATION ---
VIDEO_PATH    = '/media/holidayj/Documents/Data/Platform/Euljiro/Euljiro_inner_20251111_f0700_t1000/euljiro_inner_20251111_f0700_t1000.mp4'
OUTPUT_FOLDER = '/media/holidayj/Documents/Data/Platform/Euljiro/Euljiro_inner_20251111_f0700_t1000/10frames'

# Video Start Time (Hour, Minute, Second)
START_TIME_STR = "07:00:00"

FRAME_STEP    = 10
CROP_SIZE     = 320
MARGIN_RIGHT  = 120
MARGIN_TOP    = 0
# ---------------------

def extract_frames_worker(args):
    """
    Worker function to be run by each CPU core.
    Now accepts 'fps' and 'start_time_obj' to calculate timestamps.
    """
    video_path, frames_to_process, (dir_crop, dir_orig), (x_start, y_start), fps, start_dt = args
    
    cap = cv2.VideoCapture(video_path)
    count = 0
    
    for frame_idx in frames_to_process:
        cap.set(cv2.CAP_PROP_POS_FRAMES, frame_idx)
        ret, frame = cap.read()
        
        if ret:
            # --- Calculate Timestamp ---
            # Seconds elapsed = frame_number / fps
            seconds_elapsed = frame_idx / fps
            
            # Add to start time
            current_time = start_dt + timedelta(seconds=seconds_elapsed)
            
            # Format: HHMMSS (e.g., 070001)
            time_str = current_time.strftime("%H%M%S")
            
            # New Filename: euljiro_070001_frame_000030.jpg
            filename = f"euljiro_{time_str}_frame_{frame_idx:06d}.jpg"

            # 1. Save Original
            path_orig = os.path.join(dir_orig, filename)
            cv2.imwrite(path_orig, frame)

            # 2. Save Cropped
            cropped = frame[y_start : y_start + CROP_SIZE, 
                            x_start : x_start + CROP_SIZE]
            
            path_crop = os.path.join(dir_crop, filename)
            cv2.imwrite(path_crop, cropped)
            count += 1
    
    cap.release()
    return count

def main():
    # 1. Setup Folders
    dir_crop = os.path.join(OUTPUT_FOLDER, 'cropped')
    dir_orig = os.path.join(OUTPUT_FOLDER, 'original')
    os.makedirs(dir_crop, exist_ok=True)
    os.makedirs(dir_orig, exist_ok=True)

    # 2. Parse Start Time
    # We use a dummy date (today) because timedelta requires a datetime object
    start_dt = datetime.strptime(START_TIME_STR, "%H:%M:%S")

    # 3. Analyze Video Metadata
    cap = cv2.VideoCapture(VIDEO_PATH)
    if not cap.isOpened():
        print(f"Error: Cannot open video at {VIDEO_PATH}")
        return

    total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    fps = cap.get(cv2.CAP_PROP_FPS)
    width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    cap.release()

    # 4. Config
    x_start = width - MARGIN_RIGHT - CROP_SIZE
    y_start = MARGIN_TOP
    
    print(f"Video FPS: {fps}")
    print(f"Start Time: {START_TIME_STR}")
    print(f"Total Frames: {total_frames}")
    print(f"Saving to: {OUTPUT_FOLDER}")

    # 5. Generate Target Indices (Every 10 frames)
    target_indices = list(range(0, total_frames, FRAME_STEP))
    
    print(f"Extracting {len(target_indices)} frames (Step: {FRAME_STEP}) using {cpu_count()} CPUs...")

    # 6. Distribute work
    num_cpus = cpu_count()
    chunk_size = math.ceil(len(target_indices) / num_cpus)
    
    tasks = []
    for i in range(0, len(target_indices), chunk_size):
        chunk = target_indices[i : i + chunk_size]
        # Pass fps and start_dt to worker
        tasks.append((VIDEO_PATH, chunk, (dir_crop, dir_orig), (x_start, y_start), fps, start_dt))

    # 7. Execute
    with Pool(processes=num_cpus) as pool:
        with tqdm(total=len(target_indices), unit="img") as pbar:
            for saved_count in pool.imap_unordered(extract_frames_worker, tasks):
                pbar.update(saved_count)

    print("Done! Files saved with timestamp in name (e.g., euljiro_070001_frame_xxxxxx.jpg)")

if __name__ == '__main__':
    main()

Video FPS: 29.99988184680194
Start Time: 07:00:00
Total Frames: 329994
Saving to: /media/holidayj/Documents/Data/Platform/Euljiro/Euljiro_inner_20251111_f0700_t1000/10frames
Extracting 33000 frames (Step: 10) using 8 CPUs...


100%|██████████| 33000/33000 [24:03<00:00, 22.86img/s] 

Done! Files saved with timestamp in name (e.g., euljiro_070001_frame_xxxxxx.jpg)





## Extracting frames and crop

In [None]:
'''
# chungmuro hasun config.
VIDEO_PATH    = '/media/holidayj/Documents/Videos/videos/platform/euljiro/euljoro_20251111_070000.mp4'
OUTPUT_FOLDER = '/media/holidayj/Documents/data/frames/euljiro_rush_20251111'
INTERVAL_SEC  = 0.2
CROP_SIZE     = 600
MARGIN_RIGHT  = 400
MARGIN_TOP    = 10
'''

import cv2
import numpy as np
import os
from multiprocessing import Pool, cpu_count
from tqdm import tqdm

# --- CONFIGURATION ---
VIDEO_PATH    = '/media/holidayj/Documents/Data/Platform/Euljiro/Euljiro_inner_20221101_f1700_t2000/euljiro_inner_20221101_f1700_t2000.mp4'
# VIDEO_PATH    = '/media/holidayj/Documents/Data/Platform/Chungmuro/chungmuro_hasun_20221019_f1729_t2030/chungmuro_hasun_20221019T172940_20221019T203040.mp4'
# VIDEO_PATH    = '/home/holidayj/Videos/videos/platform/chungmuro/chungmuro_sangsun_20221019T172940-20221019T202940/chungmuro_sangsun_20221019T172940-20221019T202940.mp4'

OUTPUT_FOLDER = 'frames/euljiro_inner_20221101_f1700_t2000_1sec'
INTERVAL_SEC  = 1/3
CROP_SIZE     = 700
MARGIN_RIGHT  = 400
MARGIN_TOP    = 30



# OUTPUT_FOLDER = 'frames/chungmuro_hasun_6frames_700'
# INTERVAL_SEC  = 0.2
# CROP_SIZE     = 700
# MARGIN_RIGHT  = 400
# MARGIN_TOP    = 30
# ---------------------

def save_image_worker(args):
    """
    Independent worker function to save the image.
    This runs on separate CPUs.
    """
    img_data, save_path = args
    cv2.imwrite(save_path, img_data)

def main():
    # 1. Setup
    os.makedirs(OUTPUT_FOLDER, exist_ok=True)
    
    cap = cv2.VideoCapture(VIDEO_PATH)
    if not cap.isOpened():
        print("Error: Cannot open video.")
        return

    # 2. Metadata
    total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    fps = np.round(cap.get(cv2.CAP_PROP_FPS))
    # print("fps =", np.round(fps))
    
    width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    
    # 3. Crop Config
    x_start = width - MARGIN_RIGHT - CROP_SIZE
    y_start = MARGIN_TOP
    frame_step = int(fps * INTERVAL_SEC)
    if frame_step < 1: frame_step = 1

    print(f"FPS: {fps} | Step: {frame_step}")
    print(f"Using {cpu_count()} CPUs for saving images.")

    # 4. Initialize the Worker Pool (For saving only)
    # We use roughly 80% of CPUs to leave room for the main reader process
    worker_count = max(1, cpu_count() - 1) 
    pool = Pool(processes=worker_count)
    
    current_idx = 0
    saved_count = 0

    # 5. Fast Reader Loop
    # The main loop now NEVER waits for disk I/O. 
    # It just throws the image to the pool and immediately reads the next one.
    with tqdm(total=total_frames, unit="frame") as pbar:
        while True:
            ret, frame = cap.read()

            if not ret:
                if current_idx < 100: # Skip initial corruption
                    current_idx += 1
                    pbar.update(1)
                    continue
                else:
                    break

            if current_idx % frame_step == 0:
                # Crop
                cropped = frame[0:1080,
                                0:1920]
                cropped = frame[y_start : y_start + CROP_SIZE, 
                                x_start : x_start + CROP_SIZE]
                
                # Construct path
                filename = f"chungmuro_frame_{current_idx:06d}.jpg"
                save_path = os.path.join(OUTPUT_FOLDER, filename)
                
                # --- ASYNC SAVE ---
                # Fire and forget. The main loop continues immediately.
                pool.apply_async(save_image_worker, args=((cropped, save_path),))
                saved_count += 1

            current_idx += 1
            pbar.update(1)

    cap.release()
    
    print("\nReading finished. Waiting for remaining file writes to complete...")
    pool.close()
    pool.join() # Wait for the background workers to finish saving
    print(f"Done! Saved {saved_count} images.")

if __name__ == '__main__':
    main()

FPS: 30.0 | Step: 10
Using 8 CPUs for saving images.


[h264 @ 0x3e6630c0] missing picture in access unit with size 40
[h264 @ 0x3e6630c0] no frame!
  0%|          | 0/325817 [00:00<?, ?frame/s][h264 @ 0x3dde2580] no frame!
  0%|          | 881/325817 [00:02<17:25, 310.94frame/s]Process ForkPoolWorker-18:
Process ForkPoolWorker-20:
Process ForkPoolWorker-15:
Process ForkPoolWorker-21:
Process ForkPoolWorker-19:
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
  File "/home/holidayj/anaconda3/envs/hj/lib/python3.13/multiprocessing/process.py", line 313, in _bootstrap
    self.run()
    ~~~~~~~~^^
  File "/home/holidayj/anaconda3/envs/hj/lib/python3.13/multiprocessing/process.py", line 313, in _bootstrap
    self.run()
    ~~~~~~~~^^
  File "/home/holidayj/anaconda3/envs/hj/lib/python3.13/multiprocessing/process.py", line 108, in run
    self._target(*self._args, **self._kwargs)
    ~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
Traceback (most recent call 

KeyboardInterrupt: 

# This code select frames only those divisible by 30, and crops to get the dataset.

In [1]:
import cv2
import os
import glob
from multiprocessing import Pool, cpu_count
from tqdm import tqdm

# --- CONFIGURATION ---
SOURCE_FOLDER = '/media/holidayj/Documents/github/ML/Python/annotation/frames/chungmuro_hasun_10frame_1920_train_arrival'
OUTPUT_FOLDER = os.path.join(SOURCE_FOLDER, '30_frames_crop')

# Filter Condition: Every 30 frames (0, 30, 60, 90...)
TARGET_FRAME_STEP = 30

# Crop Configuration
CROP_SIZE     = 700   # 700x700 square
MARGIN_RIGHT  = 400
MARGIN_TOP    = 30
# ---------------------

def crop_worker(args):
    """
    Worker function to read an image, crop it, and save it.
    args: (file_path, save_path, crop_coords)
    crop_coords: (y_start, y_end, x_start, x_end)
    """
    file_path, save_path, (y_s, y_e, x_s, x_e) = args
    
    img = cv2.imread(file_path)
    if img is None:
        return False

    # Crop the image using numpy slicing [y:y+h, x:x+w]
    cropped_img = img[y_s:y_e, x_s:x_e]
    
    cv2.imwrite(save_path, cropped_img)
    return True

def main():
    # 1. Setup Folders
    os.makedirs(OUTPUT_FOLDER, exist_ok=True)
    
    # 2. Get list of all images
    print(f"Scanning files in: {SOURCE_FOLDER}")
    all_files = glob.glob(os.path.join(SOURCE_FOLDER, "*.jpg"))
    
    if not all_files:
        print("Error: No images found in source folder.")
        return

    # 3. Calculate Crop Coordinates (Based on the first image found)
    # We assume all images have the same resolution (likely 1920x1080)
    sample_img = cv2.imread(all_files[0])
    img_h, img_w = sample_img.shape[:2]
    
    # Logic: Start X = Width - Margin_Right - Crop_Size
    x_start = img_w - MARGIN_RIGHT - CROP_SIZE
    x_end   = x_start + CROP_SIZE
    y_start = MARGIN_TOP
    y_end   = y_start + CROP_SIZE
    
    crop_coords = (y_start, y_end, x_start, x_end)
    
    print(f"Image Size: {img_w}x{img_h}")
    print(f"Crop X: {x_start} ~ {x_end} (Width: {CROP_SIZE})")
    print(f"Crop Y: {y_start} ~ {y_end} (Height: {CROP_SIZE})")

    # 4. Filter files: Only keep frames where number % 30 == 0
    tasks = []
    print(f"Filtering for every {TARGET_FRAME_STEP}th frame...")
    
    for file_path in all_files:
        filename = os.path.basename(file_path)
        
        # Parse frame number from "chungmuro_frame_002060.jpg"
        try:
            # Split by '_' take last part, remove .jpg extension
            frame_part = filename.split('_')[-1] 
            frame_str = frame_part.split('.')[0]
            frame_num = int(frame_str)
            
            # CHECK CONDITION
            if frame_num % TARGET_FRAME_STEP == 0:
                save_path = os.path.join(OUTPUT_FOLDER, filename)
                tasks.append((file_path, save_path, crop_coords))
                
        except ValueError:
            # Skip files that don't match the naming pattern
            continue

    print(f"Found {len(tasks)} frames to process.")
    
    # 5. Execute Parallel Processing
    num_cpus = cpu_count()
    print(f"Processing with {num_cpus} CPUs...")
    
    with Pool(processes=num_cpus) as pool:
        # Use imap to show progress bar
        list(tqdm(pool.imap(crop_worker, tasks), total=len(tasks), unit="img"))

    print(f"\nSuccess! Cropped images saved to: {OUTPUT_FOLDER}")

if __name__ == '__main__':
    main()

Scanning files in: /media/holidayj/Documents/github/ML/Python/annotation/frames/chungmuro_hasun_10frame_1920_train_arrival
Image Size: 1920x1080
Crop X: 820 ~ 1520 (Width: 700)
Crop Y: 30 ~ 730 (Height: 700)
Filtering for every 30th frame...
Found 1384 frames to process.
Processing with 8 CPUs...


100%|██████████| 1384/1384 [00:15<00:00, 90.24img/s] 


Success! Cropped images saved to: /media/holidayj/Documents/github/ML/Python/annotation/frames/chungmuro_hasun_10frame_1920_train_arrival/30_frames_crop





# Applying CLAHE
Too much noise? Decrease CLIP_LIMIT from 3.0 to 2.0.

Still too dark? Increase CLIP_LIMIT to 4.0 or 5.0.

In [3]:
import cv2
import os
import glob
from multiprocessing import Pool, cpu_count
from tqdm import tqdm

# --- CONFIGURATION ---
# The folder containing the already cropped images
INPUT_FOLDER = '/media/holidayj/Documents/github/ML/Python/annotation/frames/chungmuro_hasun_10frame_1920_train_arrival/30_frames_crop'

# The new subfolder for CLAHE images
OUTPUT_FOLDER = os.path.join(INPUT_FOLDER, 'clahe')

# CLAHE Settings
# clipLimit: Higher = more contrast (and more noise). 2.0 to 4.0 is standard.
# tileGridSize: Size of the local area to inspect. (8,8) is standard.
CLIP_LIMIT = 5.0 
GRID_SIZE = (8, 8)
# ---------------------

def clahe_worker(args):
    """
    Reads an image, applies CLAHE to the Lightness channel, and saves it.
    """
    file_path, save_path = args
    
    img = cv2.imread(file_path)
    if img is None:
        return False

    # 1. Convert BGR to LAB color space
    lab = cv2.cvtColor(img, cv2.COLOR_BGR2LAB)

    # 2. Split into L, A, B channels
    l_channel, a, b = cv2.split(lab)

    # 3. Apply CLAHE to L-channel
    clahe = cv2.createCLAHE(clipLimit=CLIP_LIMIT, tileGridSize=GRID_SIZE)
    cl = clahe.apply(l_channel)

    # 4. Merge the CLAHE enhanced L-channel with the original A and B channels
    merged_lab = cv2.merge((cl, a, b))

    # 5. Convert back to BGR
    final_img = cv2.cvtColor(merged_lab, cv2.COLOR_LAB2BGR)
    
    cv2.imwrite(save_path, final_img)
    return True

def main():
    # 1. Setup Folders
    os.makedirs(OUTPUT_FOLDER, exist_ok=True)
    
    # 2. Get list of cropped images
    print(f"Scanning files in: {INPUT_FOLDER}")
    all_files = glob.glob(os.path.join(INPUT_FOLDER, "*.jpg"))
    
    if not all_files:
        print("Error: No images found. Make sure you ran the crop script first.")
        return

    print(f"Found {len(all_files)} images. Applying CLAHE (ClipLimit={CLIP_LIMIT})...")

    # 3. Prepare Tasks
    tasks = []
    for file_path in all_files:
        filename = os.path.basename(file_path)
        save_path = os.path.join(OUTPUT_FOLDER, filename)
        tasks.append((file_path, save_path))
    
    # 4. Execute Parallel Processing
    num_cpus = cpu_count()
    print(f"Processing with {num_cpus} CPUs...")
    
    with Pool(processes=num_cpus) as pool:
        list(tqdm(pool.imap(clahe_worker, tasks), total=len(tasks), unit="img"))

    print(f"\nDone! Enhanced images saved to: {OUTPUT_FOLDER}")

if __name__ == '__main__':
    main()

Scanning files in: /media/holidayj/Documents/github/ML/Python/annotation/frames/chungmuro_hasun_10frame_1920_train_arrival/30_frames_crop
Found 1384 images. Applying CLAHE (ClipLimit=5.0)...
Processing with 8 CPUs...


100%|██████████| 1384/1384 [00:30<00:00, 45.89img/s]



Done! Enhanced images saved to: /media/holidayj/Documents/github/ML/Python/annotation/frames/chungmuro_hasun_10frame_1920_train_arrival/30_frames_crop/clahe


# Moving images into 2 folders
Frame 0: Index 0 (Even) $\rightarrow$ set_1

Frame 30: Index 1 (Odd) $\rightarrow$ set_2

Frame 60: Index 2 (Even) $\rightarrow$ set_1

Frame 90: Index 3 (Odd) $\rightarrow$ set_2

In [4]:
import os
import glob
import shutil

# --- CONFIGURATION ---
# The folder containing the CLAHE images
INPUT_FOLDER = '/media/holidayj/Documents/github/ML/Python/annotation/frames/chungmuro_hasun_10frame_1920_train_arrival/30_frames_crop/clahe'

# The two output folders
FOLDER_1 = os.path.join(INPUT_FOLDER, 'set_1')
FOLDER_2 = os.path.join(INPUT_FOLDER, 'set_2')
# ---------------------

def main():
    # 1. Create output folders
    os.makedirs(FOLDER_1, exist_ok=True)
    os.makedirs(FOLDER_2, exist_ok=True)

    # 2. Get list of files
    # We must sort them to ensure the "alternating" logic follows the frame order
    print(f"Scanning files in: {INPUT_FOLDER}")
    files = glob.glob(os.path.join(INPUT_FOLDER, "*.jpg"))
    files.sort()  # Crucial: Ensures we process frame_0, frame_1, frame_2 in order

    if not files:
        print("Error: No images found to split.")
        return

    print(f"Found {len(files)} images. Splitting...")

    count_1 = 0
    count_2 = 0

    # 3. Iterate and Move
    for i, file_path in enumerate(files):
        filename = os.path.basename(file_path)
        
        # If index is Even (0, 2, 4...) -> Set 1
        # If index is Odd  (1, 3, 5...) -> Set 2
        if i % 2 == 0:
            dest_path = os.path.join(FOLDER_1, filename)
            count_1 += 1
        else:
            dest_path = os.path.join(FOLDER_2, filename)
            count_2 += 1
            
        shutil.move(file_path, dest_path)
        # Use shutil.copy(file_path, dest_path) if you don't want to delete originals

    print("-" * 30)
    print(f"Total processed: {len(files)}")
    print(f"Moved to Set 1:  {count_1} images")
    print(f"Moved to Set 2:  {count_2} images")
    print("-" * 30)
    print(f"Location 1: {FOLDER_1}")
    print(f"Location 2: {FOLDER_2}")

if __name__ == '__main__':
    main()

Scanning files in: /media/holidayj/Documents/github/ML/Python/annotation/frames/chungmuro_hasun_10frame_1920_train_arrival/30_frames_crop/clahe
Found 1384 images. Splitting...
------------------------------
Total processed: 1384
Moved to Set 1:  692 images
Moved to Set 2:  692 images
------------------------------
Location 1: /media/holidayj/Documents/github/ML/Python/annotation/frames/chungmuro_hasun_10frame_1920_train_arrival/30_frames_crop/clahe/set_1
Location 2: /media/holidayj/Documents/github/ML/Python/annotation/frames/chungmuro_hasun_10frame_1920_train_arrival/30_frames_crop/clahe/set_2


# Counting class

In [5]:
import os
import glob
from collections import defaultdict

# Define the path to your dataset
dataset_path = '/media/holidayj/Documents/data/euljiro_2nd/with_descending'

# Find all .txt files in the directory
label_files = glob.glob(os.path.join(dataset_path, '*.txt'))

print(f"Found {len(label_files)} label files in {dataset_path}")

# Initialize a dictionary to count objects per class
class_counts = defaultdict(int)

# Iterate through each label file
for file_path in label_files:
    try:
        with open(file_path, 'r') as f:
            lines = f.readlines()
            for line in lines:
                parts = line.strip().split()
                # Ensure the line is not empty
                if parts:
                    # In YOLO format, the first element is the class ID
                    class_id = int(parts[0])
                    class_counts[class_id] += 1
    except Exception as e:
        print(f"Error reading {file_path}: {e}")

# Print the results
print("\nObject counts per class:")
# Sort by class ID for cleaner output
for class_id in sorted(class_counts.keys()):
    print(f"Class {class_id}: {class_counts[class_id]}")

Found 2741 label files in /media/holidayj/Documents/data/euljiro_2nd/with_descending
Error reading /media/holidayj/Documents/data/euljiro_2nd/with_descending/classes.txt: invalid literal for int() with base 10: 'U'

Object counts per class:
Class 0: 2600
Class 1: 3473
Class 2: 5312


In [2]:
import os
import shutil
import glob

# --- Configuration ---
source_dir = '/media/holidayj/Documents/data/euljiro_2nd'
target_folder_name = 'without_descending'
target_dir = os.path.join(source_dir, target_folder_name)
target_class = 1  # The class ID for "descending"

# --- Setup ---
# Create the destination directory if it doesn't exist
if not os.path.exists(target_dir):
    os.makedirs(target_dir)
    print(f"Created directory: {target_dir}")

# Get list of all label files
label_files = glob.glob(os.path.join(source_dir, '*.txt'))
moved_count = 0

print(f"Scanning {len(label_files)} files...")

# --- Processing ---
for label_path in label_files:
    filename = os.path.basename(label_path)
    file_base_name = os.path.splitext(filename)[0]
    
    has_descending = False
    
    try:
        with open(label_path, 'r') as f:
            lines = f.readlines()
            for line in lines:
                parts = line.strip().split()
                if len(parts) > 0:
                    class_id = int(parts[0])
                    if class_id == target_class:
                        has_descending = True
                        break  # Stop checking this file if we found class 1
        
        # If NO descending class was found, move the files
        if not has_descending:
            # 1. Move the Label file
            shutil.move(label_path, os.path.join(target_dir, filename))
            
            # 2. Find and Move the Image file
            # We check common image extensions
            image_extensions = ['.jpg', '.jpeg', '.png', '.bmp']
            image_found = False
            
            for ext in image_extensions:
                image_name = file_base_name + ext
                src_image_path = os.path.join(source_dir, image_name)
                
                if os.path.exists(src_image_path):
                    shutil.move(src_image_path, os.path.join(target_dir, image_name))
                    image_found = True
                    break # Stop checking extensions once image is found
            
            moved_count += 1
            if not image_found:
                print(f"Warning: Moved label {filename}, but could not find corresponding image.")

    except Exception as e:
        print(f"Error processing {filename}: {e}")

print("---")
print(f"Process complete. Moved {moved_count} pairs to '{target_dir}'.")

Scanning 5072 files...
Error processing classes.txt: invalid literal for int() with base 10: 'U'
---
Process complete. Moved 2533 pairs to '/media/holidayj/Documents/data/euljiro_2nd/without_descending'.


In [None]:
import os
import shutil

# Define the directories
# Where the JPGs are currently located (and where TXTs should go)
target_dir = '/media/holidayj/Documents/data/euljiro_2nd/without_descending/descent'

# Where the TXTs are currently located
source_txt_dir = '/media/holidayj/Documents/data/euljiro_2nd/without_descending'

# Counter to track progress
count = 0

# Iterate through all files in the target directory (the descent folder)
for filename in os.listdir(target_dir):
    # Check if the file is a JPG
    if filename.lower().endswith('.jpg'):
        
        # Extract the filename without the extension (e.g., 'image_01')
        file_root = os.path.splitext(filename)[0]
        
        # Construct the expected text file name
        txt_filename = file_root + '.txt'
        
        # Define the full path for the source text file
        src_txt_path = os.path.join(source_txt_dir, txt_filename)
        
        # Define the full destination path
        dst_txt_path = os.path.join(target_dir, txt_filename)
        
        # Check if the corresponding text file exists in the source directory
        if os.path.exists(src_txt_path):
            try:
                # Move the file
                shutil.move(src_txt_path, dst_txt_path)
                print(f"Matched and Moved: {txt_filename}")
                count += 1
            except Exception as e:
                print(f"Error moving {txt_filename}: {e}")

print(f"---")
print(f"Operation Complete. Total files moved: {count}")

In [11]:
import cv2
import os
import glob
from tqdm import tqdm  # Progress bar (optional, install with `pip install tqdm`)

# 1. Define paths
input_folder = '/media/holidayj/Documents/github/ML/Python/annotation/frames/chungmuro_hasun_60frames_700'
output_folder = os.path.join(input_folder, 'equalized')

# 2. Create output directory if it doesn't exist
os.makedirs(output_folder, exist_ok=True)

# 3. Get list of all jpg images
image_files = glob.glob(os.path.join(input_folder, '*.jpg'))
print(f"Found {len(image_files)} images.")

# 4. Process images
for img_path in tqdm(image_files, desc="Processing"):
    # Read the image
    img = cv2.imread(img_path)
    if img is None:
        print(f"Failed to read: {img_path}")
        continue

    # Convert from BGR to YCrCb (we want to equalize luminance 'Y', not colors)
    img_yuv = cv2.cvtColor(img, cv2.COLOR_BGR2YCrCb)

    # Equalize the histogram of the Y channel
    img_yuv[:,:,0] = cv2.equalizeHist(img_yuv[:,:,0])

    # Convert back to BGR
    img_output = cv2.cvtColor(img_yuv, cv2.COLOR_YCrCb2BGR)

    # Save the result
    filename = os.path.basename(img_path)
    save_path = os.path.join(output_folder, filename)
    cv2.imwrite(save_path, img_output)

print(f"\nProcessing complete! \nImages saved to: {output_folder}")

Found 5430 images.


Processing: 100%|██████████| 5430/5430 [03:13<00:00, 28.09it/s]


Processing complete! 
Images saved to: /media/holidayj/Documents/github/ML/Python/annotation/frames/chungmuro_hasun_60frames_700/equalized





In [4]:
import cv2
import os
import glob
import pandas as pd
import numpy as np
from multiprocessing import Pool, cpu_count
from tqdm import tqdm

# --- CONFIGURATION ---
VIDEO_PATH     = '/media/holidayj/Documents/Data/Platform/Euljiro/Euljiro_inner_20221101_f1700_t2000/euljiro_inner_20221101_f1700_t2000.mp4'
SOURCE_IMG_DIR = '/media/holidayj/Documents/Data/Platform/Euljiro/Euljiro_inner_20221101_f1700_t2000/euljiro_2nd/with_descending_ung_1120'
OUTPUT_ROOT    = '/media/holidayj/Documents/Data/Platform/Euljiro/Euljiro_inner_20221101_f1700_t2000/extraction_1sec_re'

# Gap Thresholds (Frames)
# "Approx 58~59 frames". We need a small buffer.
# If gap is > MAX_GAP, we assume "No Train" and skip the middle frame.
MIN_GAP = 50 
MAX_GAP = 70  # Allow slight jitter above 60 (e.g. 61-62) but block long pauses

# Crop Config
CROP_SIZE     = 320
MARGIN_RIGHT  = 120
MARGIN_TOP    = 0
# ---------------------

def parse_frame_number(filename):
    """Extracts 120 from 'euljiro_frame_000120.jpg'"""
    try:
        name_no_ext = os.path.splitext(filename)[0]
        # flexible parsing: grabs the last chunk after underscore
        num_str = name_no_ext.split('_')[-1]
        return int(num_str)
    except:
        return None

def extract_frames_worker(args):
    """
    Worker: Opens video, extracts listed frames, saves Original & Crop.
    """
    video_path, frames_to_process, (dir_orig, dir_crop), (x_start, y_start) = args
    
    # Sort for efficient seeking
    frames_to_process.sort()
    
    cap = cv2.VideoCapture(video_path)
    count = 0
    
    for frame_idx in frames_to_process:
        cap.set(cv2.CAP_PROP_POS_FRAMES, frame_idx)
        ret, frame = cap.read()
        
        if ret:
            filename = f"euljiro_frame_{frame_idx:06d}.jpg"

            # 1. Save Original
            cv2.imwrite(os.path.join(dir_orig, filename), frame)

            # 2. Crop & Save
            cropped = frame[y_start : y_start + CROP_SIZE, 
                            x_start : x_start + CROP_SIZE]
            cv2.imwrite(os.path.join(dir_crop, filename), cropped)
            count += 1
            
    cap.release()
    return count

def main():
    # 1. Setup
    dir_crop = os.path.join(OUTPUT_ROOT, 'cropped')
    dir_orig = os.path.join(OUTPUT_ROOT, 'original')
    os.makedirs(dir_crop, exist_ok=True)
    os.makedirs(dir_orig, exist_ok=True)

    # 2. Scan Existing Frames
    print(f"Scanning source: {SOURCE_IMG_DIR}")
    src_files = glob.glob(os.path.join(SOURCE_IMG_DIR, "*.jpg"))
    existing_frames = []
    for f in src_files:
        num = parse_frame_number(os.path.basename(f))
        if num is not None:
            existing_frames.append(num)
    
    existing_frames.sort()
    print(f"Found {len(existing_frames)} existing reference frames.")

    # 3. Analyze Gaps & Build Plan
    extraction_list = [] # Final list of all frames to extract
    csv_data = []        # For reporting

    print(f"Analyzing gaps (Target: {MIN_GAP} < gap < {MAX_GAP})...")

    for i in range(len(existing_frames) - 1):
        curr_fr = existing_frames[i]
        next_fr = existing_frames[i+1]
        gap = next_fr - curr_fr
        
        # Always add the 'current' existing frame to the extraction list
        extraction_list.append(curr_fr)

        # Logic: Check if we should insert a middle frame
        middle_fr = 0
        status = "Skipped (Gap too large/small)"
        
        if MIN_GAP <= gap <= MAX_GAP:
            middle_fr = curr_fr + (gap // 2)
            extraction_list.append(middle_fr)
            status = "Middle Frame Added"
        
        # Add to CSV Report
        csv_data.append({
            'Existing_A': curr_fr,
            'Existing_B': next_fr,
            'Gap': gap,
            'Middle_Frame': middle_fr if status == "Middle Frame Added" else "N/A",
            'Status': status
        })

    # Don't forget the very last existing frame
    if existing_frames:
        extraction_list.append(existing_frames[-1])

    # Remove duplicates just in case
    extraction_list = sorted(list(set(extraction_list)))

    # 4. Save CSV
    csv_path = os.path.join(OUTPUT_ROOT, 'extraction_plan.csv')
    df = pd.DataFrame(csv_data)
    df.to_csv(csv_path, index=False)
    
    print("-" * 40)
    print(f"Plan saved to: {csv_path}")
    print(f"Total frames to extract: {len(extraction_list)}")
    print(df.head()) # Show preview
    print("-" * 40)

    # 5. Get Video Metadata for Crop
    cap = cv2.VideoCapture(VIDEO_PATH)
    width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    cap.release()

    x_start = width - MARGIN_RIGHT - CROP_SIZE
    y_start = MARGIN_TOP
    crop_params = (x_start, y_start)

    # 6. Parallel Extraction
    print("Starting extraction from VIDEO...")
    
    num_cpus = cpu_count()
    # Batch size: give each CPU a reasonable chunk of frames to process
    chunk_size = int(np.ceil(len(extraction_list) / num_cpus))
    if chunk_size < 1: chunk_size = 1

    tasks = []
    for i in range(0, len(extraction_list), chunk_size):
        chunk = extraction_list[i : i + chunk_size]
        tasks.append((VIDEO_PATH, chunk, (dir_orig, dir_crop), crop_params))

    with Pool(processes=num_cpus) as pool:
        with tqdm(total=len(extraction_list), unit="frame") as pbar:
            for count in pool.imap_unordered(extract_frames_worker, tasks):
                pbar.update(count)

    print(f"\nDone! CSV and images saved to: {OUTPUT_ROOT}")

if __name__ == '__main__':
    main()

Scanning source: /media/holidayj/Documents/Data/Platform/Euljiro/Euljiro_inner_20221101_f1700_t2000/euljiro_2nd/with_descending_ung_1120
Found 0 existing reference frames.
Analyzing gaps (Target: 50 < gap < 70)...
----------------------------------------
Plan saved to: /media/holidayj/Documents/Data/Platform/Euljiro/Euljiro_inner_20221101_f1700_t2000/extraction_1sec_re/extraction_plan.csv
Total frames to extract: 0
Empty DataFrame
Columns: []
Index: []
----------------------------------------
Starting extraction from VIDEO...


0frame [00:00, ?frame/s]


Done! CSV and images saved to: /media/holidayj/Documents/Data/Platform/Euljiro/Euljiro_inner_20221101_f1700_t2000/extraction_1sec_re





Video Info: 331751 frames | 29.999683531044475 FPS
Generated 33176 frames based on 1/3s interval.
Scanning existing frames in: /media/holidayj/Documents/Data/Platform/Euljiro/Euljiro_inner_20221101_f1700_t2000/euljiro_2nd/with_descending_ung_1120
Found 3333 existing frames to preserve.
----------------------------------------
Plan saved to: /media/holidayj/Documents/Data/Platform/Euljiro/Euljiro_inner_20221101_f1700_t2000/extraction_plan_full_video.csv
Total frames to extract: 36174
----------------------------------------
Preview (First 15 rows):
    Frame_Number           Source  Timestamp_Sec
0              0  New (1/3s Grid)           0.00
1             10  New (1/3s Grid)           0.33
2             20  New (1/3s Grid)           0.67
3             30  New (1/3s Grid)           1.00
4             40  New (1/3s Grid)           1.33
5             50  New (1/3s Grid)           1.67
6             60  New (1/3s Grid)           2.00
7             70  New (1/3s Grid)           2.33
8    

In [3]:
import cv2
import os
import glob
from multiprocessing import Pool, cpu_count
from tqdm import tqdm

# --- CONFIGURATION ---
SOURCE_DIR = '/media/holidayj/Documents/Data/Platform/Euljiro/Euljiro_inner_20221101_f1700_t2000/extraction_10_frames/60_frames'
DEST_DIR   = os.path.join(SOURCE_DIR, 'cropped')

# Crop Config
CROP_SIZE     = 320
MARGIN_RIGHT  = 120
MARGIN_TOP    = 0
# ---------------------

def crop_worker(args):
    """
    Worker function to read, crop, and save a single image.
    args: (file_path, save_path, crop_coords)
    """
    file_path, save_path, (x_start, y_start) = args
    
    img = cv2.imread(file_path)
    if img is None:
        return False

    # Crop logic: img[y:y+h, x:x+w]
    cropped = img[y_start : y_start + CROP_SIZE, 
                  x_start : x_start + CROP_SIZE]
    
    cv2.imwrite(save_path, cropped)
    return True

def main():
    # 1. Setup Folders
    os.makedirs(DEST_DIR, exist_ok=True)

    # 2. Scan Files
    print(f"Scanning images in: {SOURCE_DIR}")
    files = glob.glob(os.path.join(SOURCE_DIR, "*.jpg"))
    
    if not files:
        print("Error: No images found.")
        return

    print(f"Found {len(files)} images to crop.")

    # 3. Calculate Crop Coordinates (Based on first image)
    # We need to read one image to get the width (for right margin calculation)
    sample_img = cv2.imread(files[0])
    if sample_img is None:
        print("Error: Could not read the first image.")
        return

    img_h, img_w = sample_img.shape[:2]
    
    # Calculate X Start (Width - Margin - CropSize)
    x_start = img_w - MARGIN_RIGHT - CROP_SIZE
    y_start = MARGIN_TOP
    
    print(f"Image Size: {img_w}x{img_h}")
    print(f"Crop Area: X[{x_start}:{x_start+CROP_SIZE}], Y[{y_start}:{y_start+CROP_SIZE}]")

    # 4. Prepare Parallel Tasks
    tasks = []
    for file_path in files:
        filename = os.path.basename(file_path)
        save_path = os.path.join(DEST_DIR, filename)
        tasks.append((file_path, save_path, (x_start, y_start)))

    # 5. Execute
    num_cpus = cpu_count()
    print(f"Cropping with {num_cpus} CPUs...")
    
    with Pool(processes=num_cpus) as pool:
        # Use imap to show progress bar
        list(tqdm(pool.imap(crop_worker, tasks), total=len(tasks), unit="img"))

    print("-" * 30)
    print(f"Done! Cropped images saved to: {DEST_DIR}")

if __name__ == '__main__':
    main()

Scanning images in: /media/holidayj/Documents/Data/Platform/Euljiro/Euljiro_inner_20221101_f1700_t2000/extraction_10_frames/60_frames
Found 5530 images to crop.
Image Size: 1920x1080
Crop Area: X[1480:1800], Y[0:320]
Cropping with 8 CPUs...


100%|██████████| 5530/5530 [00:47<00:00, 115.36img/s]

------------------------------
Done! Cropped images saved to: /media/holidayj/Documents/Data/Platform/Euljiro/Euljiro_inner_20221101_f1700_t2000/extraction_10_frames/60_frames/cropped





In [1]:
import os
import glob
import shutil
from tqdm import tqdm

# --- CONFIGURATION ---
SOURCE_DIR = '/home/holidayj/Downloads/train_arrival'
DEST_DIR   = os.path.join(SOURCE_DIR, '30_frames')

# Target Interval to Move
# You have frames every 10 (0, 10, 20, 30...).
# You want to move frames divisible by 30 (0, 30, 60...).
TARGET_STEP = 30
# ---------------------

def parse_frame_number(filename):
    """
    Extracts number from filename. 
    Example: 'euljiro_frame_000030.jpg' -> 30
    """
    try:
        # Remove extension
        name_no_ext = os.path.splitext(filename)[0]
        # Split by '_' and take the last part
        num_str = name_no_ext.split('_')[-1]
        return int(num_str)
    except ValueError:
        return None

def main():
    # 1. Create Destination Folder
    os.makedirs(DEST_DIR, exist_ok=True)

    # 2. Scan Files
    print(f"Scanning files in: {SOURCE_DIR}")
    # Assumes .jpg files. Change to *.png if needed.
    files = glob.glob(os.path.join(SOURCE_DIR, "*.jpg"))
    
    if not files:
        print("Error: No images found.")
        return

    print(f"Found {len(files)} total files.")

    # 3. Filter and Move
    moved_count = 0
    
    print(f"Moving frames matching 'frame_number % {TARGET_STEP} == 0'...")
    
    for file_path in tqdm(files, unit="file"):
        filename = os.path.basename(file_path)
        frame_num = parse_frame_number(filename)
        
        if frame_num is not None:
            # Check if it is a "30th" frame (0, 30, 60, 90...)
            if frame_num % TARGET_STEP == 0:
                dest_path = os.path.join(DEST_DIR, filename)
                shutil.move(file_path, dest_path)
                moved_count += 1

    print("-" * 30)
    print(f"Done! Moved {moved_count} images.")
    print(f"Location: {DEST_DIR}")

if __name__ == '__main__':
    main()

Scanning files in: /home/holidayj/Downloads/train_arrival
Found 3905 total files.
Moving frames matching 'frame_number % 30 == 0'...


100%|██████████| 3905/3905 [00:00<00:00, 66465.75file/s]

------------------------------
Done! Moved 1226 images.
Location: /home/holidayj/Downloads/train_arrival/30_frames





In [2]:
import os
import glob
import pandas as pd
import numpy as np

# --- CONFIGURATION ---
SOURCE_DIR = '/media/holidayj/Documents/Data/Platform/Euljiro/Euljiro_inner_20221101_f1700_t2000/extraction_10_frames'
OUTPUT_CSV = os.path.join(SOURCE_DIR, 'frame_analysis_report.csv')

# Expected Grid Interval
INTERVAL = 10
# ---------------------

def parse_frame_number(filename):
    """
    Extracts 120 from 'euljiro_frame_000120.jpg' or 'euljiro_070001_frame_000120.jpg'
    """
    try:
        name_no_ext = os.path.splitext(filename)[0]
        # Always take the last segment after splitting by underscore
        num_str = name_no_ext.split('_')[-1]
        return int(num_str)
    except ValueError:
        return None

def main():
    # 1. Scan Files
    print(f"Scanning files in: {SOURCE_DIR}")
    files = glob.glob(os.path.join(SOURCE_DIR, "*.jpg"))
    
    if not files:
        print("Error: No images found.")
        return

    # 2. Parse Data
    data = []
    frame_set = set()
    
    for f in files:
        fname = os.path.basename(f)
        fnum = parse_frame_number(fname)
        
        if fnum is not None:
            data.append({'filename': fname, 'frame_number': fnum})
            frame_set.add(fnum)
    
    # Sort by frame number
    df = pd.DataFrame(data)
    df = df.sort_values(by='frame_number')
    
    if df.empty:
        print("Error: Could not parse any frame numbers.")
        return

    # 3. Analyze Range
    min_frame = df['frame_number'].min()
    max_frame = df['frame_number'].max()
    total_files = len(df)
    
    print("-" * 40)
    print(f"First Frame: {min_frame}")
    print(f"Last Frame:  {max_frame}")
    print(f"Total Files: {total_files}")
    print("-" * 40)

    # 4. Check for Missing Frames (Based on 10-frame interval)
    # We expect frames: min, min+10, min+20 ... max
    # Note: We enforce the grid to start at 'min_frame' found.
    expected_grid = set(range(min_frame, max_frame + 1, INTERVAL))
    
    # Missing = Expected - Existing
    missing_frames = sorted(list(expected_grid - frame_set))
    
    if missing_frames:
        print(f"WARNING: Found {len(missing_frames)} MISSING frames in the 10-frame grid.")
        print(f"First 5 missing: {missing_frames[:5]}")
        if len(missing_frames) > 5: print("...")
    else:
        print("SUCCESS: No missing frames in the 10-frame grid.")

    # 5. Check for Extra Frames (Not in the 10-frame grid)
    # Extras = Existing - Expected
    # Note: This logic assumes 'min_frame' aligns with the grid. 
    # If min_frame is 3 and interval is 10, grid is 3, 13, 23...
    extra_frames = sorted(list(frame_set - expected_grid))
    
    if extra_frames:
        print(f"NOTE: Found {len(extra_frames)} EXTRA frames (not part of the regular interval).")
        print(f"First 5 extras: {extra_frames[:5]}")
    else:
        print("NOTE: No extra frames found (Pure 10-frame dataset).")

    # 6. Save CSV
    # We save the simple sorted list of files
    df.to_csv(OUTPUT_CSV, index=False)
    print("-" * 40)
    print(f"Sorted file list saved to: {OUTPUT_CSV}")

if __name__ == '__main__':
    main()

Scanning files in: /media/holidayj/Documents/Data/Platform/Euljiro/Euljiro_inner_20221101_f1700_t2000/extraction_10_frames
----------------------------------------
First Frame: 0
Last Frame:  331750
Total Files: 36174
----------------------------------------
SUCCESS: No missing frames in the 10-frame grid.
NOTE: Found 2998 EXTRA frames (not part of the regular interval).
First 5 extras: [944, 1003, 1062, 1121, 1239]
----------------------------------------
Sorted file list saved to: /media/holidayj/Documents/Data/Platform/Euljiro/Euljiro_inner_20221101_f1700_t2000/extraction_10_frames/frame_analysis_report.csv


In [1]:
import os
import glob
import shutil
from tqdm import tqdm

# --- CONFIGURATION ---
# The folder you asked me to analyze in the previous turn
SOURCE_DIR = '/media/holidayj/Documents/Data/Platform/Chungmuro/chungmuro_sangsun_20221019_f1729_t2029/chungmuro_sangsun_10frames_1920_train_arrival/cropped'
DEST_DIR   = os.path.join(SOURCE_DIR, '60_frames')

# Target Interval: Copy only frames divisible by 60
TARGET_STEP = 30
# ---------------------

def parse_frame_number(filename):
    """
    Robustly extracts the frame number from filenames like:
    - euljiro_frame_000120.jpg
    - euljiro_070005_frame_000120.jpg
    """
    try:
        name_no_ext = os.path.splitext(filename)[0]
        # Always take the last segment after splitting by underscore
        num_str = name_no_ext.split('_')[-1]
        return int(num_str)
    except ValueError:
        return None

def main():
    # 1. Setup Destination Folder
    os.makedirs(DEST_DIR, exist_ok=True)

    # 2. Scan Files
    print(f"Scanning files in: {SOURCE_DIR}")
    files = glob.glob(os.path.join(SOURCE_DIR, "*.jpg"))
    
    if not files:
        print("Error: No images found.")
        return

    print(f"Found {len(files)} total images.")
    print(f"Copying frames where 'frame_number % {TARGET_STEP} == 0'...")

    # 3. Filter and Copy
    copied_count = 0
    
    for file_path in tqdm(files, unit="img"):
        filename = os.path.basename(file_path)
        frame_num = parse_frame_number(filename)
        
        if frame_num is not None:
            # Check if it matches the 60-frame interval
            if frame_num % TARGET_STEP == 0:
                dest_path = os.path.join(DEST_DIR, filename)
                
                # copy2 preserves file metadata (timestamps)
                shutil.copy2(file_path, dest_path)
                copied_count += 1

    print("-" * 30)
    print(f"Done! Copied {copied_count} images.")
    print(f"Target Folder: {DEST_DIR}")

if __name__ == '__main__':
    main()

Scanning files in: /media/holidayj/Documents/Data/Platform/Chungmuro/chungmuro_sangsun_20221019_f1729_t2029/chungmuro_sangsun_10frames_1920_train_arrival/cropped
Found 4945 total images.
Copying frames where 'frame_number % 30 == 0'...


100%|██████████| 4945/4945 [00:01<00:00, 2729.88img/s]

------------------------------
Done! Copied 1651 images.
Target Folder: /media/holidayj/Documents/Data/Platform/Chungmuro/chungmuro_sangsun_20221019_f1729_t2029/chungmuro_sangsun_10frames_1920_train_arrival/cropped/60_frames





In [5]:
import os
import shutil
from pathlib import Path

def filter_yolo_dataset(source_dir, dest_subfolder_name="valid_subset"):
    source_path = Path(source_dir)
    dest_path = source_path / dest_subfolder_name
    
    # Create destination directory if it doesn't exist
    dest_path.mkdir(exist_ok=True)
    
    # Image extensions to look for
    valid_image_extensions = {'.jpg', '.jpeg', '.png', '.bmp', '.tif', '.tiff'}
    
    count_copied = 0
    
    print(f"Scanning: {source_path}")
    
    # List all .txt files in the directory
    txt_files = list(source_path.glob("*.txt"))
    
    for txt_file in txt_files:
        # 1. Check if annotation file is valid (has at least one line)
        has_annotation = False
        try:
            with open(txt_file, 'r') as f:
                lines = [line.strip() for line in f.readlines() if line.strip()]
                if len(lines) > 0:
                    has_annotation = True
        except Exception as e:
            print(f"Error reading {txt_file.name}: {e}")
            continue

        if not has_annotation:
            continue
            
        # 2. Find matching image
        base_name = txt_file.stem
        image_found = None
        
        for ext in valid_image_extensions:
            potential_img = source_path / (base_name + ext)
            if potential_img.exists():
                image_found = potential_img
                break
        
        # 3. Copy both if image exists and annotation is valid
        if image_found:
            try:
                shutil.copy2(txt_file, dest_path / txt_file.name)
                shutil.copy2(image_found, dest_path / image_found.name)
                count_copied += 1
            except Exception as e:
                print(f"Error copying {base_name}: {e}")

    print("-" * 30)
    print(f"Process complete.")
    print(f"Files copied: {count_copied} pairs")
    print(f"Destination: {dest_path}")

if __name__ == "__main__":
    # Your specific folder path
    # folder_path = "/media/holidayj/Documents/Data/Platform/Chungmuro/chungmuro_sangsun_20221019_f1729_t2029/chungmuro_sangsun_10frames_1920_train_arrival/cropped/30_frames_kuni_20251227"
    folder_path = "/media/holidayj/Documents/Data/Platform/Chungmuro/chungmuro_hasun_20221019_f1729_t2030/frames/chungmuro_hasun_10frame_1920_train_arrival/cropped_700/30_frames_crop/set_all_done"

    filter_yolo_dataset(folder_path)

Scanning: /media/holidayj/Documents/Data/Platform/Chungmuro/chungmuro_hasun_20221019_f1729_t2030/frames/chungmuro_hasun_10frame_1920_train_arrival/cropped_700/30_frames_crop/set_all_done
------------------------------
Process complete.
Files copied: 1332 pairs
Destination: /media/holidayj/Documents/Data/Platform/Chungmuro/chungmuro_hasun_20221019_f1729_t2030/frames/chungmuro_hasun_10frame_1920_train_arrival/cropped_700/30_frames_crop/set_all_done/valid_subset


In [1]:
import os
import re
import pandas as pd

# 1. Define the path
folder_path = '/media/holidayj/Documents/Data/Platform/Euljiro/Euljiro_inner_20251111_f0700_t1000/Ung_euljiro2025_inner_20260101/done'

# 2. Get list of image files
try:
    files = sorted([f for f in os.listdir(folder_path) if f.lower().endswith(('.jpg', '.png', '.jpeg', '.bmp', '.tif'))])
except FileNotFoundError:
    print(f"Error: The folder path does not exist:\n{folder_path}")
    exit()

if not files:
    print("No image files found in the folder.")
else:
    print(f"Found {len(files)} images. Checking intervals...")

    # 3. Extract frame numbers from filenames
    # Assumes format like "image_000120.jpg" where the last number is the frame
    frame_numbers = []
    for f in files:
        # Regex: Find the last sequence of digits before the file extension
        match = re.search(r'(\d+)(?=\.\w+$)', f)
        if match:
            frame_numbers.append(int(match.group(1)))
    
    if len(frame_numbers) < 2:
        print("Not enough images to calculate intervals.")
    else:
        frame_numbers.sort()
        
        # 4. Calculate differences (intervals)
        # diffs = [next - current]
        diffs = [frame_numbers[i+1] - frame_numbers[i] for i in range(len(frame_numbers)-1)]
        
        # 5. Analyze results
        diff_counts = pd.Series(diffs).value_counts().sort_index()
        
        print("\n--- Interval Analysis ---")
        print(diff_counts)
        
        # Check specific condition
        most_common_interval = diff_counts.idxmax()
        consistency = (diff_counts.max() / len(diffs)) * 100
        
        print(f"\nMost common interval: {most_common_interval} frames")
        print(f"Consistency: {consistency:.2f}% of gaps are {most_common_interval} frames.")
        
        if most_common_interval == 60 and consistency == 100:
            print("\n✅ SUCCESS: All images are exactly 60 frames (2 seconds) apart.")
        elif most_common_interval == 60:
            print("\n⚠️  WARNING: Mostly 60 frames, but some gaps exist (see table above).")
        else:
            print(f"\n❌ FAIL: The interval is not 60 frames. It appears to be {most_common_interval}.")

Found 1419 images. Checking intervals...

--- Interval Analysis ---
60     1098
120     320
Name: count, dtype: int64

Most common interval: 60 frames
Consistency: 77.43% of gaps are 60 frames.



In [3]:
import os
import re
import shutil

# 1. Define the path (Update if needed)
folder_path = '/media/holidayj/Documents/Data/Platform/Euljiro/Euljiro_inner_20251111_f0700_t1000/Ung_euljiro2025_inner_20260101/done'
target_subfolder = 'interval_120_gap'

# 2. Create the subfolder if it doesn't exist
destination_path = os.path.join(folder_path, target_subfolder)
if not os.path.exists(destination_path):
    os.makedirs(destination_path)
    print(f"Created subfolder: {destination_path}")

# 3. Get and sort list of image files
# Extensions to check
img_extensions = ('.jpg', '.png', '.jpeg', '.bmp', '.tif')
files = sorted([f for f in os.listdir(folder_path) if f.lower().endswith(img_extensions)])

if not files:
    print("No image files found.")
    exit()

print(f"Scanning {len(files)} files for 120-frame gaps...")

moved_count = 0
frame_map = [] # List of tuples: (frame_number, filename)

# 4. Extract frame numbers
for f in files:
    match = re.search(r'(\d+)(?=\.\w+$)', f)
    if match:
        frame_map.append((int(match.group(1)), f))

# Sort by frame number ensures correct interval calculation
frame_map.sort(key=lambda x: x[0])

# 5. Iterate and Move
# We start from index 1 because we need to compare with the previous file
for i in range(1, len(frame_map)):
    current_frame, current_file = frame_map[i]
    prev_frame, prev_file = frame_map[i-1]
    
    interval = current_frame - prev_frame
    
    if interval == 120:
        # This current file is 120 frames away from the previous one
        # Move Image
        src_img = os.path.join(folder_path, current_file)
        dst_img = os.path.join(destination_path, current_file)
        
        try:
            shutil.move(src_img, dst_img)
            print(f"Moved Image: {current_file} (Gap: {interval})")
            
            # Move corresponding Text file
            # Assuming txt has same name but .txt extension
            base_name = os.path.splitext(current_file)[0]
            txt_file = base_name + ".txt"
            src_txt = os.path.join(folder_path, txt_file)
            dst_txt = os.path.join(destination_path, txt_file)
            
            if os.path.exists(src_txt):
                shutil.move(src_txt, dst_txt)
                print(f"   Moved Txt: {txt_file}")
            
            moved_count += 1
            
        except Exception as e:
            print(f"Error moving {current_file}: {e}")

print(f"\nOperation Complete.")
print(f"Total files moved: {moved_count}")

Created subfolder: /media/holidayj/Documents/Data/Platform/Euljiro/Euljiro_inner_20251111_f0700_t1000/Ung_euljiro2025_inner_20260101/done/interval_120_gap
Scanning 1419 files for 120-frame gaps...
Moved Image: euljiro_073640_frame_066000.jpg (Gap: 120)
   Moved Txt: euljiro_073640_frame_066000.txt
Moved Image: euljiro_073644_frame_066120.jpg (Gap: 120)
   Moved Txt: euljiro_073644_frame_066120.txt
Moved Image: euljiro_073648_frame_066240.jpg (Gap: 120)
   Moved Txt: euljiro_073648_frame_066240.txt
Moved Image: euljiro_073652_frame_066360.jpg (Gap: 120)
   Moved Txt: euljiro_073652_frame_066360.txt
Moved Image: euljiro_073656_frame_066480.jpg (Gap: 120)
   Moved Txt: euljiro_073656_frame_066480.txt
Moved Image: euljiro_073700_frame_066600.jpg (Gap: 120)
   Moved Txt: euljiro_073700_frame_066600.txt
Moved Image: euljiro_073704_frame_066720.jpg (Gap: 120)
   Moved Txt: euljiro_073704_frame_066720.txt
Moved Image: euljiro_073708_frame_066840.jpg (Gap: 120)
   Moved Txt: euljiro_073708_fram

In [5]:
import os
import re
import shutil

# 1. Define paths
folder_path = '/media/holidayj/Documents/Data/Platform/Euljiro/Euljiro_inner_20251111_f0700_t1000/Ung_euljiro2025_inner_20260101/done'
target_subfolder = 'frames_multiple_of_120'

# 2. Create destination folder
destination_path = os.path.join(folder_path, target_subfolder)
if not os.path.exists(destination_path):
    os.makedirs(destination_path)
    print(f"Created subfolder: {destination_path}")

# 3. List images
img_extensions = ('.jpg', '.png', '.jpeg', '.bmp', '.tif')
files = [f for f in os.listdir(folder_path) if f.lower().endswith(img_extensions)]

print(f"Scanning {len(files)} files...")

moved_count = 0

for filename in files:
    # Extract frame number (looks for last digits in filename)
    # e.g., "euljiro_070002_frame_000060.jpg" -> 60
    match = re.search(r'(\d+)(?=\.\w+$)', filename)
    
    if match:
        frame_number = int(match.group(1))
        
        # CHECK: Is this frame number a multiple of 120? (0, 120, 240...)
        if frame_number % 120 == 0:
            
            # Move Image
            src_img = os.path.join(folder_path, filename)
            dst_img = os.path.join(destination_path, filename)
            
            try:
                shutil.move(src_img, dst_img)
                # print(f"Moved: {filename}") # Uncomment to see every move
                
                # Move corresponding Text file if it exists
                base_name = os.path.splitext(filename)[0]
                txt_file = base_name + ".txt"
                src_txt = os.path.join(folder_path, txt_file)
                dst_txt = os.path.join(destination_path, txt_file)
                
                if os.path.exists(src_txt):
                    shutil.move(src_txt, dst_txt)
                
                moved_count += 1
                
            except Exception as e:
                print(f"Error moving {filename}: {e}")

print(f"\nOperation Complete.")
print(f"Total files moved: {moved_count}")

Created subfolder: /media/holidayj/Documents/Data/Platform/Euljiro/Euljiro_inner_20251111_f0700_t1000/Ung_euljiro2025_inner_20260101/done/frames_multiple_of_120
Scanning 1099 files...

Operation Complete.
Total files moved: 550


In [1]:
# Crop 을지로
import cv2
import os

# --- Configuration ---
file_path = "/media/holidayj/Documents/Data/Platform/Euljiro/Euljiro_inner_20221101_f1700_t2000/temp/euljiro_frame_132000.jpg"
CROP_SIZE = 320
MARGIN_RIGHT = 120
MARGIN_TOP = 0

# --- Load Image ---
img = cv2.imread(file_path)

if img is None:
    print(f"Error: Image not found at {file_path}")
else:
    h, w, _ = img.shape

    # --- Calculate Coordinates ---
    # X coordinates (Horizontal)
    x_end = w - MARGIN_RIGHT
    x_start = x_end - CROP_SIZE
    
    # Y coordinates (Vertical)
    y_start = MARGIN_TOP
    y_end = y_start + CROP_SIZE

    # --- Perform Crop ---
    # Ensure coordinates are within image bounds
    if x_start < 0 or y_end > h:
        print("Warning: Crop region exceeds image dimensions.")
    
    crop_img = img[y_start:y_end, x_start:x_end]

    # --- Save ---
    # Saves to the same directory with a prefix
    dir_name = os.path.dirname(file_path)
    file_name = os.path.basename(file_path)
    save_path = os.path.join(dir_name, f"cropped_{file_name}")
    
    cv2.imwrite(save_path, crop_img)
    print(f"Success! Saved to: {save_path}")
    print(f"Crop Area: x[{x_start}:{x_end}], y[{y_start}:{y_end}]")

Success! Saved to: /media/holidayj/Documents/Data/Platform/Euljiro/Euljiro_inner_20221101_f1700_t2000/temp/cropped_euljiro_frame_132000.jpg
Crop Area: x[1480:1800], y[0:320]
