In [1]:
pwd

'/media/holidayj/Documents/github/ML/Python/annotation'

## Saving 1 frame.


## Finding Cropping area from Video

In [None]:
import cv2
import os

# --- CONFIGURATION ---
video_path      = '/media/holidayj/Documents/Data/Platform/Chungmuro/chungmuro_sangsun_20221019_f1729_t2029/chungmuro_sangsun_20221019T172940-20221019T202940.mp4'
output_folder   = 'output_frames'
output_filename = 'cropped_700.jpg'
# crop_size       = 600
crop_size       = 700

# Cropping Margins
margin_top   = 30    # Move down pixels from the top edge
margin_right = 400  # Move left 120 pixels from the right edge
# ---------------------

os.makedirs(output_folder, exist_ok=True)

cap = cv2.VideoCapture(video_path)

if not cap.isOpened():
    print(f"Error: Could not open video at {video_path}")
else:
    # 1. Search for the first valid frame (Fix for the [h264] error)
    frame_found = False
    max_attempts = 100
    
    print("Searching for a valid Keyframe...")
    
    for i in range(max_attempts):
        ret, frame = cap.read()
        if ret:
            print(f"Success: Valid frame found at index {i}")
            
            # --- CROP LOGIC STARTS HERE ---
            
            # 2. Get Dimensions
            height, width, _ = frame.shape
            
            # 3. Calculate Coordinates
            # Y: Start at top margin
            y_start = margin_top
            y_end = y_start + crop_size

            # X: Start from right side (width) - margin - crop_size
            x_end = width - margin_right
            x_start = x_end - crop_size

            print(f"Original Resolution: {width}x{height}")
            print(f"Cropping Area -> X: {x_start} to {x_end}, Y: {y_start} to {y_end}")

            # 4. Perform Crop
            cropped_frame = frame[y_start:y_end, x_start:x_end]

            # 5. Save
            full_save_path = os.path.join(output_folder, output_filename)
            cv2.imwrite(full_save_path, cropped_frame)
            print(f"Saved cropped image to: {full_save_path}")
            
            frame_found = True
            break # Stop after saving the first valid frame
            
            # --- CROP LOGIC ENDS HERE ---

    if not frame_found:
        print("Error: Could not find any valid frames in the beginning of the video.")

cap.release()

Searching for a valid Keyframe...
Success: Valid frame found at index 1
Original Resolution: 1920x1080
Cropping Area -> X: 1280 to 1920, Y: 0 to 640
Saved cropped image to: output_frames/cropped_700.jpg


[h264 @ 0x3a1d3240] missing picture in access unit with size 40
[h264 @ 0x3a1d3240] no frame!
[h264 @ 0x3a0c2180] no frame!


# Cropping area from the full frame images

In [4]:
import os
import cv2
from tqdm import tqdm  # Optional: for a progress bar, run 'pip install tqdm' if missing

# 1. Configuration
source_dir = "/media/holidayj/Documents/Data/Platform/Chungmuro/chungmuro_sangsun_20221019_f1729_t2029/chungmuro_sangsun_10frames_1920_train_arrival"
output_dir = os.path.join(source_dir, "cropped")
CROP_W, CROP_H = 640, 640

# 2. Setup
os.makedirs(output_dir, exist_ok=True)
image_extensions = ('.jpg', '.jpeg', '.png', '.bmp', '.tiff')

# Get list of images
files = [f for f in os.listdir(source_dir) if f.lower().endswith(image_extensions)]
print(f"Found {len(files)} images. Processing...")

# 3. Processing Loop
count = 0
for filename in tqdm(files):
    file_path = os.path.join(source_dir, filename)
    
    # Read Image
    img = cv2.imread(file_path)
    if img is None:
        print(f"Warning: Could not read {filename}")
        continue
    
    h, w, _ = img.shape
    
    # Check if image is large enough
    if w < CROP_W or h < CROP_H:
        print(f"Skipping {filename}: Image smaller than crop size ({w}x{h})")
        continue

    # 4. Calculate Top-Right Coordinates
    # Y: Starts at 0, ends at 640
    # X: Starts at (Width - 640), ends at Width
    x_start = w - CROP_W
    y_start = 0
    
    # Crop: img[y:y+h, x:x+w]
    cropped_img = img[y_start : y_start + CROP_H, x_start : x_start + CROP_W]
    
    # 5. Save
    save_path = os.path.join(output_dir, filename)
    cv2.imwrite(save_path, cropped_img)
    count += 1

print(f"\nDone! {count} images saved to:\n{output_dir}")

Found 4945 images. Processing...


100%|██████████| 4945/4945 [02:52<00:00, 28.66it/s]


Done! 4945 images saved to:
/media/holidayj/Documents/Data/Platform/Chungmuro/chungmuro_sangsun_20221019_f1729_t2029/chungmuro_sangsun_10frames_1920_train_arrival/cropped





## Put cropped images into set1 to set5 folders. (Round Robin)

In [6]:
import os
import shutil

# 1. Configuration
base_dir = "/media/holidayj/Documents/Data/Platform/Chungmuro/chungmuro_sangsun_20221019_f1729_t2029/chungmuro_sangsun_10frames_1920_train_arrival/cropped"
num_sets = 5

# 2. Get and Sort Files
valid_exts = ('.jpg', '.jpeg', '.png', '.bmp', '.tiff')
files = [f for f in os.listdir(base_dir) if f.lower().endswith(valid_exts)]
files.sort() # Important to keep the sequence (1st, 2nd, 3rd...)

print(f"Found {len(files)} images. Distributing cyclically into {num_sets} sets...")

# Create the set folders first
for i in range(1, num_sets + 1):
    os.makedirs(os.path.join(base_dir, f"set{i}"), exist_ok=True)

# 3. Distribute Files Round-Robin
for index, filename in enumerate(files):
    # Calculate which set (0 to 4) -> (1 to 5)
    # 0 % 5 = 0 -> set1
    # 1 % 5 = 1 -> set2
    # ...
    # 5 % 5 = 0 -> set1
    set_num = (index % num_sets) + 1
    
    src_path = os.path.join(base_dir, filename)
    dst_path = os.path.join(base_dir, f"set{set_num}", filename)
    
    shutil.move(src_path, dst_path)

print("Done! Distribution complete.")

Found 4945 images. Distributing cyclically into 5 sets...
Done! Distribution complete.


## Extracting full frames

In [1]:
import cv2
import numpy as np
import os
from multiprocessing import Pool, cpu_count
from tqdm import tqdm

# --- CONFIGURATION ---
VIDEO_PATH    = '/media/holidayj/Documents/Videos/videos/platform/euljiro/euljoro_20251111_070000.mp4'
OUTPUT_FOLDER = '/media/holidayj/Documents/data/frames/euljiro_rush_20251111'
INTERVAL_SEC  = 0.2
# ---------------------

def save_image_worker(args):
    """
    Independent worker function to save the image.
    This runs on separate CPUs to prevent the video reader from slowing down.
    """
    img_data, save_path = args
    cv2.imwrite(save_path, img_data)

def main():
    # 1. Setup
    # Create the output directory if it doesn't exist
    os.makedirs(OUTPUT_FOLDER, exist_ok=True)
    
    cap = cv2.VideoCapture(VIDEO_PATH)
    if not cap.isOpened():
        print(f"Error: Cannot open video at {VIDEO_PATH}")
        return

    # 2. Metadata
    total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    fps = cap.get(cv2.CAP_PROP_FPS)
    width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    
    # Calculate how many frames to skip based on interval
    # e.g., if FPS is 30 and Interval is 0.2s, step is 6 frames
    frame_step = int(np.round(fps * INTERVAL_SEC))
    if frame_step < 1: frame_step = 1

    print(f"Video Resolution: {width}x{height}")
    print(f"FPS: {fps} | Interval: {INTERVAL_SEC}s | Step: Every {frame_step} frames")
    print(f"Using {cpu_count()} CPUs for saving images.")

    # 3. Initialize the Worker Pool (For saving only)
    # We use roughly 80% of CPUs to leave room for the main reader process
    worker_count = max(1, cpu_count() - 1) 
    pool = Pool(processes=worker_count)
    
    current_idx = 0
    saved_count = 0

    # 4. Fast Reader Loop
    print("Starting extraction...")
    with tqdm(total=total_frames, unit="frame") as pbar:
        while True:
            ret, frame = cap.read()

            if not ret:
                # Handle potential initial corruption or end of file
                if current_idx < 100 and total_frames > 200: 
                    current_idx += 1
                    pbar.update(1)
                    continue
                else:
                    break

            # Check if this frame is one we want to save
            if current_idx % frame_step == 0:
                
                # Construct filename (e.g., euljiro_frame_000120.jpg)
                # Using a generic prefix "frame_" or extracting from video name
                filename = f"euljiro_frame_{current_idx:06d}.jpg"
                save_path = os.path.join(OUTPUT_FOLDER, filename)
                
                # --- ASYNC SAVE ---
                # Pass the WHOLE frame. No slicing [y:y, x:x] needed.
                pool.apply_async(save_image_worker, args=((frame, save_path),))
                saved_count += 1

            current_idx += 1
            pbar.update(1)

    cap.release()
    
    print("\nReading finished. Waiting for remaining file writes to complete...")
    pool.close()
    pool.join() # Wait for the background workers to finish saving
    print(f"Done! Saved {saved_count} full-size images to: {OUTPUT_FOLDER}")

if __name__ == '__main__':
    main()

Video Resolution: 1920x1080
FPS: 29.99988184680194 | Interval: 0.2s | Step: Every 6 frames
Using 8 CPUs for saving images.
Starting extraction...


100%|██████████| 329994/329994 [22:54<00:00, 240.13frame/s]



Reading finished. Waiting for remaining file writes to complete...
Done! Saved 54999 full-size images to: /media/holidayj/Documents/data/frames/euljiro_rush_20251111


## Extracting frames and crop

In [None]:
'''
# chungmuro hasun config.
VIDEO_PATH    = '/media/holidayj/Documents/Videos/videos/platform/euljiro/euljoro_20251111_070000.mp4'
OUTPUT_FOLDER = '/media/holidayj/Documents/data/frames/euljiro_rush_20251111'
INTERVAL_SEC  = 0.2
CROP_SIZE     = 600
MARGIN_RIGHT  = 400
MARGIN_TOP    = 10
'''
import cv2
import numpy as np
import os
from multiprocessing import Pool, cpu_count
from tqdm import tqdm

# --- CONFIGURATION ---
# VIDEO_PATH    = '/home/holidayj/Videos/videos/platform/chungmuro/chungmuro_hasun_20221019T172940_20221019T203040/chungmuro_hasun_20221019T172940_20221019T203040.mp4'
VIDEO_PATH    = '/home/holidayj/Videos/videos/platform/chungmuro/chungmuro_sangsun_20221019T172940-20221019T202940/chungmuro_sangsun_20221019T172940-20221019T202940.mp4'

OUTPUT_FOLDER = 'frames/chungmuro_sangsun_10frames_1920'
INTERVAL_SEC  = 1/3
CROP_SIZE     = 700
MARGIN_RIGHT  = 400
MARGIN_TOP    = 30



# OUTPUT_FOLDER = 'frames/chungmuro_hasun_6frames_700'
# INTERVAL_SEC  = 0.2
# CROP_SIZE     = 700
# MARGIN_RIGHT  = 400
# MARGIN_TOP    = 30
# ---------------------

def save_image_worker(args):
    """
    Independent worker function to save the image.
    This runs on separate CPUs.
    """
    img_data, save_path = args
    cv2.imwrite(save_path, img_data)

def main():
    # 1. Setup
    os.makedirs(OUTPUT_FOLDER, exist_ok=True)
    
    cap = cv2.VideoCapture(VIDEO_PATH)
    if not cap.isOpened():
        print("Error: Cannot open video.")
        return

    # 2. Metadata
    total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    fps = np.round(cap.get(cv2.CAP_PROP_FPS))
    # print("fps =", np.round(fps))
    
    width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    
    # 3. Crop Config
    x_start = width - MARGIN_RIGHT - CROP_SIZE
    y_start = MARGIN_TOP
    frame_step = int(fps * INTERVAL_SEC)
    if frame_step < 1: frame_step = 1

    print(f"FPS: {fps} | Step: {frame_step}")
    print(f"Using {cpu_count()} CPUs for saving images.")

    # 4. Initialize the Worker Pool (For saving only)
    # We use roughly 80% of CPUs to leave room for the main reader process
    worker_count = max(1, cpu_count() - 1) 
    pool = Pool(processes=worker_count)
    
    current_idx = 0
    saved_count = 0

    # 5. Fast Reader Loop
    # The main loop now NEVER waits for disk I/O. 
    # It just throws the image to the pool and immediately reads the next one.
    with tqdm(total=total_frames, unit="frame") as pbar:
        while True:
            ret, frame = cap.read()

            if not ret:
                if current_idx < 100: # Skip initial corruption
                    current_idx += 1
                    pbar.update(1)
                    continue
                else:
                    break

            if current_idx % frame_step == 0:
                # Crop
                cropped = frame[0:1080,
                                0:1920]
                # cropped = frame[y_start : y_start + CROP_SIZE, 
                #                 x_start : x_start + CROP_SIZE]
                
                # Construct path
                filename = f"chungmuro_frame_{current_idx:06d}.jpg"
                save_path = os.path.join(OUTPUT_FOLDER, filename)
                
                # --- ASYNC SAVE ---
                # Fire and forget. The main loop continues immediately.
                pool.apply_async(save_image_worker, args=((cropped, save_path),))
                saved_count += 1

            current_idx += 1
            pbar.update(1)

    cap.release()
    
    print("\nReading finished. Waiting for remaining file writes to complete...")
    pool.close()
    pool.join() # Wait for the background workers to finish saving
    print(f"Done! Saved {saved_count} images.")

if __name__ == '__main__':
    main()

FPS: 30.0 | Step: 10
Using 8 CPUs for saving images.


[h264 @ 0x67e06c0] missing picture in access unit with size 40
[h264 @ 0x67e06c0] no frame!
  0%|          | 0/323996 [00:00<?, ?frame/s][h264 @ 0x67df040] no frame!
100%|██████████| 323996/323996 [17:45<00:00, 304.20frame/s]



Reading finished. Waiting for remaining file writes to complete...
Done! Saved 32399 images.


# This code select frames only those divisible by 30, and crops to get the dataset.

In [1]:
import cv2
import os
import glob
from multiprocessing import Pool, cpu_count
from tqdm import tqdm

# --- CONFIGURATION ---
SOURCE_FOLDER = '/media/holidayj/Documents/github/ML/Python/annotation/frames/chungmuro_hasun_10frame_1920_train_arrival'
OUTPUT_FOLDER = os.path.join(SOURCE_FOLDER, '30_frames_crop')

# Filter Condition: Every 30 frames (0, 30, 60, 90...)
TARGET_FRAME_STEP = 30

# Crop Configuration
CROP_SIZE     = 700   # 700x700 square
MARGIN_RIGHT  = 400
MARGIN_TOP    = 30
# ---------------------

def crop_worker(args):
    """
    Worker function to read an image, crop it, and save it.
    args: (file_path, save_path, crop_coords)
    crop_coords: (y_start, y_end, x_start, x_end)
    """
    file_path, save_path, (y_s, y_e, x_s, x_e) = args
    
    img = cv2.imread(file_path)
    if img is None:
        return False

    # Crop the image using numpy slicing [y:y+h, x:x+w]
    cropped_img = img[y_s:y_e, x_s:x_e]
    
    cv2.imwrite(save_path, cropped_img)
    return True

def main():
    # 1. Setup Folders
    os.makedirs(OUTPUT_FOLDER, exist_ok=True)
    
    # 2. Get list of all images
    print(f"Scanning files in: {SOURCE_FOLDER}")
    all_files = glob.glob(os.path.join(SOURCE_FOLDER, "*.jpg"))
    
    if not all_files:
        print("Error: No images found in source folder.")
        return

    # 3. Calculate Crop Coordinates (Based on the first image found)
    # We assume all images have the same resolution (likely 1920x1080)
    sample_img = cv2.imread(all_files[0])
    img_h, img_w = sample_img.shape[:2]
    
    # Logic: Start X = Width - Margin_Right - Crop_Size
    x_start = img_w - MARGIN_RIGHT - CROP_SIZE
    x_end   = x_start + CROP_SIZE
    y_start = MARGIN_TOP
    y_end   = y_start + CROP_SIZE
    
    crop_coords = (y_start, y_end, x_start, x_end)
    
    print(f"Image Size: {img_w}x{img_h}")
    print(f"Crop X: {x_start} ~ {x_end} (Width: {CROP_SIZE})")
    print(f"Crop Y: {y_start} ~ {y_end} (Height: {CROP_SIZE})")

    # 4. Filter files: Only keep frames where number % 30 == 0
    tasks = []
    print(f"Filtering for every {TARGET_FRAME_STEP}th frame...")
    
    for file_path in all_files:
        filename = os.path.basename(file_path)
        
        # Parse frame number from "chungmuro_frame_002060.jpg"
        try:
            # Split by '_' take last part, remove .jpg extension
            frame_part = filename.split('_')[-1] 
            frame_str = frame_part.split('.')[0]
            frame_num = int(frame_str)
            
            # CHECK CONDITION
            if frame_num % TARGET_FRAME_STEP == 0:
                save_path = os.path.join(OUTPUT_FOLDER, filename)
                tasks.append((file_path, save_path, crop_coords))
                
        except ValueError:
            # Skip files that don't match the naming pattern
            continue

    print(f"Found {len(tasks)} frames to process.")
    
    # 5. Execute Parallel Processing
    num_cpus = cpu_count()
    print(f"Processing with {num_cpus} CPUs...")
    
    with Pool(processes=num_cpus) as pool:
        # Use imap to show progress bar
        list(tqdm(pool.imap(crop_worker, tasks), total=len(tasks), unit="img"))

    print(f"\nSuccess! Cropped images saved to: {OUTPUT_FOLDER}")

if __name__ == '__main__':
    main()

Scanning files in: /media/holidayj/Documents/github/ML/Python/annotation/frames/chungmuro_hasun_10frame_1920_train_arrival
Image Size: 1920x1080
Crop X: 820 ~ 1520 (Width: 700)
Crop Y: 30 ~ 730 (Height: 700)
Filtering for every 30th frame...
Found 1384 frames to process.
Processing with 8 CPUs...


100%|██████████| 1384/1384 [00:15<00:00, 90.24img/s] 


Success! Cropped images saved to: /media/holidayj/Documents/github/ML/Python/annotation/frames/chungmuro_hasun_10frame_1920_train_arrival/30_frames_crop





# Applying CLAHE
Too much noise? Decrease CLIP_LIMIT from 3.0 to 2.0.

Still too dark? Increase CLIP_LIMIT to 4.0 or 5.0.

In [3]:
import cv2
import os
import glob
from multiprocessing import Pool, cpu_count
from tqdm import tqdm

# --- CONFIGURATION ---
# The folder containing the already cropped images
INPUT_FOLDER = '/media/holidayj/Documents/github/ML/Python/annotation/frames/chungmuro_hasun_10frame_1920_train_arrival/30_frames_crop'

# The new subfolder for CLAHE images
OUTPUT_FOLDER = os.path.join(INPUT_FOLDER, 'clahe')

# CLAHE Settings
# clipLimit: Higher = more contrast (and more noise). 2.0 to 4.0 is standard.
# tileGridSize: Size of the local area to inspect. (8,8) is standard.
CLIP_LIMIT = 5.0 
GRID_SIZE = (8, 8)
# ---------------------

def clahe_worker(args):
    """
    Reads an image, applies CLAHE to the Lightness channel, and saves it.
    """
    file_path, save_path = args
    
    img = cv2.imread(file_path)
    if img is None:
        return False

    # 1. Convert BGR to LAB color space
    lab = cv2.cvtColor(img, cv2.COLOR_BGR2LAB)

    # 2. Split into L, A, B channels
    l_channel, a, b = cv2.split(lab)

    # 3. Apply CLAHE to L-channel
    clahe = cv2.createCLAHE(clipLimit=CLIP_LIMIT, tileGridSize=GRID_SIZE)
    cl = clahe.apply(l_channel)

    # 4. Merge the CLAHE enhanced L-channel with the original A and B channels
    merged_lab = cv2.merge((cl, a, b))

    # 5. Convert back to BGR
    final_img = cv2.cvtColor(merged_lab, cv2.COLOR_LAB2BGR)
    
    cv2.imwrite(save_path, final_img)
    return True

def main():
    # 1. Setup Folders
    os.makedirs(OUTPUT_FOLDER, exist_ok=True)
    
    # 2. Get list of cropped images
    print(f"Scanning files in: {INPUT_FOLDER}")
    all_files = glob.glob(os.path.join(INPUT_FOLDER, "*.jpg"))
    
    if not all_files:
        print("Error: No images found. Make sure you ran the crop script first.")
        return

    print(f"Found {len(all_files)} images. Applying CLAHE (ClipLimit={CLIP_LIMIT})...")

    # 3. Prepare Tasks
    tasks = []
    for file_path in all_files:
        filename = os.path.basename(file_path)
        save_path = os.path.join(OUTPUT_FOLDER, filename)
        tasks.append((file_path, save_path))
    
    # 4. Execute Parallel Processing
    num_cpus = cpu_count()
    print(f"Processing with {num_cpus} CPUs...")
    
    with Pool(processes=num_cpus) as pool:
        list(tqdm(pool.imap(clahe_worker, tasks), total=len(tasks), unit="img"))

    print(f"\nDone! Enhanced images saved to: {OUTPUT_FOLDER}")

if __name__ == '__main__':
    main()

Scanning files in: /media/holidayj/Documents/github/ML/Python/annotation/frames/chungmuro_hasun_10frame_1920_train_arrival/30_frames_crop
Found 1384 images. Applying CLAHE (ClipLimit=5.0)...
Processing with 8 CPUs...


100%|██████████| 1384/1384 [00:30<00:00, 45.89img/s]



Done! Enhanced images saved to: /media/holidayj/Documents/github/ML/Python/annotation/frames/chungmuro_hasun_10frame_1920_train_arrival/30_frames_crop/clahe


# Moving images into 2 folders
Frame 0: Index 0 (Even) $\rightarrow$ set_1

Frame 30: Index 1 (Odd) $\rightarrow$ set_2

Frame 60: Index 2 (Even) $\rightarrow$ set_1

Frame 90: Index 3 (Odd) $\rightarrow$ set_2

In [4]:
import os
import glob
import shutil

# --- CONFIGURATION ---
# The folder containing the CLAHE images
INPUT_FOLDER = '/media/holidayj/Documents/github/ML/Python/annotation/frames/chungmuro_hasun_10frame_1920_train_arrival/30_frames_crop/clahe'

# The two output folders
FOLDER_1 = os.path.join(INPUT_FOLDER, 'set_1')
FOLDER_2 = os.path.join(INPUT_FOLDER, 'set_2')
# ---------------------

def main():
    # 1. Create output folders
    os.makedirs(FOLDER_1, exist_ok=True)
    os.makedirs(FOLDER_2, exist_ok=True)

    # 2. Get list of files
    # We must sort them to ensure the "alternating" logic follows the frame order
    print(f"Scanning files in: {INPUT_FOLDER}")
    files = glob.glob(os.path.join(INPUT_FOLDER, "*.jpg"))
    files.sort()  # Crucial: Ensures we process frame_0, frame_1, frame_2 in order

    if not files:
        print("Error: No images found to split.")
        return

    print(f"Found {len(files)} images. Splitting...")

    count_1 = 0
    count_2 = 0

    # 3. Iterate and Move
    for i, file_path in enumerate(files):
        filename = os.path.basename(file_path)
        
        # If index is Even (0, 2, 4...) -> Set 1
        # If index is Odd  (1, 3, 5...) -> Set 2
        if i % 2 == 0:
            dest_path = os.path.join(FOLDER_1, filename)
            count_1 += 1
        else:
            dest_path = os.path.join(FOLDER_2, filename)
            count_2 += 1
            
        shutil.move(file_path, dest_path)
        # Use shutil.copy(file_path, dest_path) if you don't want to delete originals

    print("-" * 30)
    print(f"Total processed: {len(files)}")
    print(f"Moved to Set 1:  {count_1} images")
    print(f"Moved to Set 2:  {count_2} images")
    print("-" * 30)
    print(f"Location 1: {FOLDER_1}")
    print(f"Location 2: {FOLDER_2}")

if __name__ == '__main__':
    main()

Scanning files in: /media/holidayj/Documents/github/ML/Python/annotation/frames/chungmuro_hasun_10frame_1920_train_arrival/30_frames_crop/clahe
Found 1384 images. Splitting...
------------------------------
Total processed: 1384
Moved to Set 1:  692 images
Moved to Set 2:  692 images
------------------------------
Location 1: /media/holidayj/Documents/github/ML/Python/annotation/frames/chungmuro_hasun_10frame_1920_train_arrival/30_frames_crop/clahe/set_1
Location 2: /media/holidayj/Documents/github/ML/Python/annotation/frames/chungmuro_hasun_10frame_1920_train_arrival/30_frames_crop/clahe/set_2


# Counting class

In [5]:
import os
import glob
from collections import defaultdict

# Define the path to your dataset
dataset_path = '/media/holidayj/Documents/data/euljiro_2nd/with_descending'

# Find all .txt files in the directory
label_files = glob.glob(os.path.join(dataset_path, '*.txt'))

print(f"Found {len(label_files)} label files in {dataset_path}")

# Initialize a dictionary to count objects per class
class_counts = defaultdict(int)

# Iterate through each label file
for file_path in label_files:
    try:
        with open(file_path, 'r') as f:
            lines = f.readlines()
            for line in lines:
                parts = line.strip().split()
                # Ensure the line is not empty
                if parts:
                    # In YOLO format, the first element is the class ID
                    class_id = int(parts[0])
                    class_counts[class_id] += 1
    except Exception as e:
        print(f"Error reading {file_path}: {e}")

# Print the results
print("\nObject counts per class:")
# Sort by class ID for cleaner output
for class_id in sorted(class_counts.keys()):
    print(f"Class {class_id}: {class_counts[class_id]}")

Found 2741 label files in /media/holidayj/Documents/data/euljiro_2nd/with_descending
Error reading /media/holidayj/Documents/data/euljiro_2nd/with_descending/classes.txt: invalid literal for int() with base 10: 'U'

Object counts per class:
Class 0: 2600
Class 1: 3473
Class 2: 5312


In [2]:
import os
import shutil
import glob

# --- Configuration ---
source_dir = '/media/holidayj/Documents/data/euljiro_2nd'
target_folder_name = 'without_descending'
target_dir = os.path.join(source_dir, target_folder_name)
target_class = 1  # The class ID for "descending"

# --- Setup ---
# Create the destination directory if it doesn't exist
if not os.path.exists(target_dir):
    os.makedirs(target_dir)
    print(f"Created directory: {target_dir}")

# Get list of all label files
label_files = glob.glob(os.path.join(source_dir, '*.txt'))
moved_count = 0

print(f"Scanning {len(label_files)} files...")

# --- Processing ---
for label_path in label_files:
    filename = os.path.basename(label_path)
    file_base_name = os.path.splitext(filename)[0]
    
    has_descending = False
    
    try:
        with open(label_path, 'r') as f:
            lines = f.readlines()
            for line in lines:
                parts = line.strip().split()
                if len(parts) > 0:
                    class_id = int(parts[0])
                    if class_id == target_class:
                        has_descending = True
                        break  # Stop checking this file if we found class 1
        
        # If NO descending class was found, move the files
        if not has_descending:
            # 1. Move the Label file
            shutil.move(label_path, os.path.join(target_dir, filename))
            
            # 2. Find and Move the Image file
            # We check common image extensions
            image_extensions = ['.jpg', '.jpeg', '.png', '.bmp']
            image_found = False
            
            for ext in image_extensions:
                image_name = file_base_name + ext
                src_image_path = os.path.join(source_dir, image_name)
                
                if os.path.exists(src_image_path):
                    shutil.move(src_image_path, os.path.join(target_dir, image_name))
                    image_found = True
                    break # Stop checking extensions once image is found
            
            moved_count += 1
            if not image_found:
                print(f"Warning: Moved label {filename}, but could not find corresponding image.")

    except Exception as e:
        print(f"Error processing {filename}: {e}")

print("---")
print(f"Process complete. Moved {moved_count} pairs to '{target_dir}'.")

Scanning 5072 files...
Error processing classes.txt: invalid literal for int() with base 10: 'U'
---
Process complete. Moved 2533 pairs to '/media/holidayj/Documents/data/euljiro_2nd/without_descending'.


In [6]:
import os
import shutil

# Define the directories
# Where the JPGs are currently located (and where TXTs should go)
target_dir = '/media/holidayj/Documents/data/euljiro_2nd/without_descending/descent'

# Where the TXTs are currently located
source_txt_dir = '/media/holidayj/Documents/data/euljiro_2nd/without_descending'

# Counter to track progress
count = 0

# Iterate through all files in the target directory (the descent folder)
for filename in os.listdir(target_dir):
    # Check if the file is a JPG
    if filename.lower().endswith('.jpg'):
        
        # Extract the filename without the extension (e.g., 'image_01')
        file_root = os.path.splitext(filename)[0]
        
        # Construct the expected text file name
        txt_filename = file_root + '.txt'
        
        # Define the full path for the source text file
        src_txt_path = os.path.join(source_txt_dir, txt_filename)
        
        # Define the full destination path
        dst_txt_path = os.path.join(target_dir, txt_filename)
        
        # Check if the corresponding text file exists in the source directory
        if os.path.exists(src_txt_path):
            try:
                # Move the file
                shutil.move(src_txt_path, dst_txt_path)
                print(f"Matched and Moved: {txt_filename}")
                count += 1
            except Exception as e:
                print(f"Error moving {txt_filename}: {e}")

print(f"---")
print(f"Operation Complete. Total files moved: {count}")

Matched and Moved: euljiro_frame_247505.txt
Matched and Moved: euljiro_frame_183903.txt
Matched and Moved: euljiro_frame_124608.txt
Matched and Moved: euljiro_frame_189567.txt
Matched and Moved: euljiro_frame_269866.txt
Matched and Moved: euljiro_frame_151276.txt
Matched and Moved: euljiro_frame_074753.txt
Matched and Moved: euljiro_frame_246266.txt
Matched and Moved: euljiro_frame_256178.txt
Matched and Moved: euljiro_frame_217415.txt
Matched and Moved: euljiro_frame_240543.txt
Matched and Moved: euljiro_frame_214760.txt
Matched and Moved: euljiro_frame_330577.txt
Matched and Moved: euljiro_frame_067142.txt
Matched and Moved: euljiro_frame_328217.txt
Matched and Moved: euljiro_frame_293938.txt
Matched and Moved: euljiro_frame_086317.txt
Matched and Moved: euljiro_frame_154639.txt
Matched and Moved: euljiro_frame_207267.txt
Matched and Moved: euljiro_frame_086907.txt
Matched and Moved: euljiro_frame_295944.txt
Matched and Moved: euljiro_frame_266621.txt
Matched and Moved: euljiro_frame

In [11]:
import cv2
import os
import glob
from tqdm import tqdm  # Progress bar (optional, install with `pip install tqdm`)

# 1. Define paths
input_folder = '/media/holidayj/Documents/github/ML/Python/annotation/frames/chungmuro_hasun_60frames_700'
output_folder = os.path.join(input_folder, 'equalized')

# 2. Create output directory if it doesn't exist
os.makedirs(output_folder, exist_ok=True)

# 3. Get list of all jpg images
image_files = glob.glob(os.path.join(input_folder, '*.jpg'))
print(f"Found {len(image_files)} images.")

# 4. Process images
for img_path in tqdm(image_files, desc="Processing"):
    # Read the image
    img = cv2.imread(img_path)
    if img is None:
        print(f"Failed to read: {img_path}")
        continue

    # Convert from BGR to YCrCb (we want to equalize luminance 'Y', not colors)
    img_yuv = cv2.cvtColor(img, cv2.COLOR_BGR2YCrCb)

    # Equalize the histogram of the Y channel
    img_yuv[:,:,0] = cv2.equalizeHist(img_yuv[:,:,0])

    # Convert back to BGR
    img_output = cv2.cvtColor(img_yuv, cv2.COLOR_YCrCb2BGR)

    # Save the result
    filename = os.path.basename(img_path)
    save_path = os.path.join(output_folder, filename)
    cv2.imwrite(save_path, img_output)

print(f"\nProcessing complete! \nImages saved to: {output_folder}")

Found 5430 images.


Processing: 100%|██████████| 5430/5430 [03:13<00:00, 28.09it/s]


Processing complete! 
Images saved to: /media/holidayj/Documents/github/ML/Python/annotation/frames/chungmuro_hasun_60frames_700/equalized





In [1]:
import os
import shutil

# Define the directory paths
base_path = "/media/holidayj/Documents/data/frames/chungmuro_hasun_10frame_1920_train_arrival"

# 1. The reference folder (where we read filenames from)
ref_dir = os.path.join(base_path, "30_frames_crop/set_1/clahe")

# 2. The source folder (where we look for the original files)
src_dir = os.path.join(base_path, "30_frames_crop")

# 3. The destination folder (where we copy files to)
dst_dir = os.path.join(base_path, "30_frames_crop/set_1/orig")

def copy_matching_files():
    # Create the destination directory if it doesn't exist
    if not os.path.exists(dst_dir):
        os.makedirs(dst_dir)
        print(f"Created directory: {dst_dir}")

    # List all files in the reference directory
    try:
        files = os.listdir(ref_dir)
    except FileNotFoundError:
        print(f"Error: Reference directory not found: {ref_dir}")
        return

    count = 0
    
    print("Starting file copy process...")

    for filename in files:
        # Check if the file is a JPG
        if filename.lower().endswith(".jpg"):
            
            # Construct the full path for the source file to copy
            src_file_path = os.path.join(src_dir, filename)
            
            # Check if this file exists in the source folder (the parent 30_frames_crop)
            if os.path.exists(src_file_path):
                # Construct the full destination path
                dst_file_path = os.path.join(dst_dir, filename)
                
                # Copy the file
                shutil.copy2(src_file_path, dst_file_path)
                count += 1
                # Optional: Print progress for every 100 files
                if count % 100 == 0:
                    print(f"Copied {count} files...")
            else:
                print(f"Warning: File {filename} found in 'clahe' but missing in '30_frames_crop'")

    print("-" * 30)
    print(f"Process complete.")
    print(f"Total files copied to '{dst_dir}': {count}")

if __name__ == "__main__":
    copy_matching_files()

Starting file copy process...
Copied 100 files...
Copied 200 files...
Copied 300 files...
Copied 400 files...
Copied 500 files...
Copied 600 files...
------------------------------
Process complete.
Total files copied to '/media/holidayj/Documents/data/frames/chungmuro_hasun_10frame_1920_train_arrival/30_frames_crop/set_1/orig': 692
