In [2]:
import os
import json
import glob

# Path to the directory containing JSON files
path = '\\\\192.168.100.79\\fishduke\\home\\data2\\bear\\labels\\'

def convert_bbox_to_yolo(size, box):
    dw = 1. / size[0]
    dh = 1. / size[1]
    x = (box[0] + box[2]) / 2.0 - 1
    y = (box[1] + box[3]) / 2.0 - 1
    w = box[2] - box[0]
    h = box[3] - box[1]
    x = x * dw
    w = w * dw
    y = y * dh
    h = h * dh
    return (x, y, w, h)

def process_json_files(path):
    # Make sure the path exists
    if not os.path.exists(path):
        raise Exception(f"The specified path does not exist: {path}")

    # List all JSON files in the directory
    json_files = glob.glob(os.path.join(path, '*.json'))
    total_files = len(json_files)
    
    # Check if there are any files to process
    if total_files == 0:
        print("No JSON files found to process.")
        return

    # Process each JSON file
    for index, json_file_path in enumerate(json_files, start=1):
        with open(json_file_path, 'r') as file:
            data = json.load(file)

        # Extract image dimensions
        image_width = data['images'][0]['width']
        image_height = data['images'][0]['height']

        # Open the corresponding txt file for writing
        txt_file_path = json_file_path.replace('.json', '.txt')
        with open(txt_file_path, 'w') as txt_file:
            # Iterate over each annotation (object)
            for annotation in data['annotations']:
                bbox = annotation['bbox']
                try:
                    x_min, y_min, x_max, y_max = bbox[0][0], bbox[0][1], bbox[1][0], bbox[1][1]

                    # Convert bbox to YOLO format
                    yolo_bbox = convert_bbox_to_yolo((image_width, image_height), (x_min, y_min, x_max, y_max))

                    # Write the YOLO format to the txt file
                    txt_file.write(f"0 {yolo_bbox[0]} {yolo_bbox[1]} {yolo_bbox[2]} {yolo_bbox[3]}\n")
                except:
                    txt_file.write("")

        # Print progress on the same line
        percentage_complete = (index / total_files) * 100
        print(f"Processed {index} of {total_files} files ({percentage_complete:.2f}% complete)", end='\r')

    # Ensure the final message is on a new line after completion
    print()

# Run the processing function
process_json_files(path)

Processed 14960 of 14960 files (100.00% complete)


In [1]:
import os
import shutil

def is_valid_yolo_format(file_path):
    try:
        with open(file_path, 'r') as file:
            lines = file.readlines()
            for line in lines:
                parts = line.strip().split()
                if len(parts) == 5 and all(part.replace('.', '', 1).isdigit() for part in parts[1:]):
                    return True
    except:
        pass
    return False

def count_total_images(base_path):
    total_images = 0
    for root, _, files in os.walk(base_path):
        if root == base_path:
            continue
        total_images += len([f for f in files if f.lower().endswith('.jpg')])
    return total_images

def process_folders(base_path):
    labeled_folder = os.path.join(base_path, 'labeled')
    
    # 'labeled' 폴더 생성 또는 비우기
    if os.path.exists(labeled_folder):
        shutil.rmtree(labeled_folder)
    os.makedirs(labeled_folder)

    total_images = count_total_images(base_path)
    processed_images = 0

    for root, dirs, files in os.walk(base_path):
        if root == base_path or root == labeled_folder:
            continue

        jpg_files = [f for f in files if f.lower().endswith('.jpg')]
        txt_files = [f for f in files if f.lower().endswith('.txt')]

        for jpg_file in jpg_files:
            processed_images += 1
            txt_file = jpg_file[:-4] + '.txt'
            if txt_file in txt_files:
                txt_path = os.path.join(root, txt_file)
                if is_valid_yolo_format(txt_path):
                    jpg_path = os.path.join(root, jpg_file)
                    # jpg 파일 복사
                    shutil.copy2(jpg_path, labeled_folder)
                    # txt 파일 복사
                    shutil.copy2(txt_path, labeled_folder)
                    print(f"Copied {jpg_file} and {txt_file} to labeled folder")
            
            # 진행률 표시
            progress = (processed_images / total_images) * 100
            print(f"\rProgress: {processed_images}/{total_images} ({progress:.2f}%)", end="", flush=True)

    print("\nProcessing complete.")
    print(f"Total images processed: {processed_images}")
    print(f"Images with valid YOLO labels copied: {len(os.listdir(labeled_folder))//2}")

# 실행
base_path = r'\\192.168.100.79/data2/bear/'
process_folders(base_path)

Copied A06_G01_G001_G_200507_4001_10S_000002.087.jpg and A06_G01_G001_G_200507_4001_10S_000002.087.txt to labeled folder
Progress: 1/71293 (0.00%)Copied A06_G01_G001_G_200507_4001_10S_000003.740.jpg and A06_G01_G001_G_200507_4001_10S_000003.740.txt to labeled folder
Progress: 2/71293 (0.00%)Copied A06_G01_G001_G_200507_4001_10S_000004.801.jpg and A06_G01_G001_G_200507_4001_10S_000004.801.txt to labeled folder
Progress: 3/71293 (0.00%)Copied A06_G01_G001_G_200507_4001_10S_000005.662.jpg and A06_G01_G001_G_200507_4001_10S_000005.662.txt to labeled folder
Progress: 4/71293 (0.01%)Copied A06_G01_G001_G_200507_4001_10S_000006.584.jpg and A06_G01_G001_G_200507_4001_10S_000006.584.txt to labeled folder
Progress: 5/71293 (0.01%)Copied A06_G01_G001_G_200507_4001_10S_000008.520.jpg and A06_G01_G001_G_200507_4001_10S_000008.520.txt to labeled folder
Progress: 6/71293 (0.01%)Copied A06_G01_G001_G_200916_4002_10S_000002.596.jpg and A06_G01_G001_G_200916_4002_10S_000002.596.txt to labeled folder
Pro

In [None]:
import os
import shutil

def is_valid_yolo_format(file_path):
    try:
        with open(file_path, 'r') as file:
            lines = file.readlines()
            for line in lines:
                parts = line.strip().split()
                if len(parts) == 5 and all(part.replace('.', '', 1).isdigit() for part in parts[1:]):
                    return True
    except:
        pass
    return False

def count_total_images(base_path):
    total_images = 0
    for root, _, files in os.walk(base_path):
        if root == base_path:
            continue
        total_images += len([f for f in files if f.lower().endswith('.jpg')])
    return total_images

def process_folders(base_path):
    labeled_folder = os.path.join(base_path, 'labeled')
    
    # 'labeled' 폴더 생성 또는 비우기
    if os.path.exists(labeled_folder):
        shutil.rmtree(labeled_folder)
    os.makedirs(labeled_folder)

    total_images = count_total_images(base_path)
    processed_images = 0

    for root, dirs, files in os.walk(base_path):
        if root == base_path or root == labeled_folder:
            continue

        folder_name = os.path.basename(root)  # 현재 폴더 이름 가져오기
        jpg_files = [f for f in files if f.lower().endswith('.jpg')]
        txt_files = [f for f in files if f.lower().endswith('.txt')]

        for jpg_file in jpg_files:
            processed_images += 1
            txt_file = jpg_file[:-4] + '.txt'
            if txt_file in txt_files:
                txt_path = os.path.join(root, txt_file)
                if is_valid_yolo_format(txt_path):
                    jpg_path = os.path.join(root, jpg_file)
                    
                    # 새 파일 이름 지정 (폴더 이름 + 원본 파일 이름)
                    new_jpg_name = f"{folder_name}_{jpg_file.lower()}"
                    new_txt_name = f"{folder_name}_{txt_file.lower()}"
                    
                    # 복사 경로 설정
                    new_jpg_path = os.path.join(labeled_folder, new_jpg_name)
                    new_txt_path = os.path.join(labeled_folder, new_txt_name)
                    
                    # jpg 파일 복사
                    shutil.copy2(jpg_path, new_jpg_path)
                    # txt 파일 복사
                    shutil.copy2(txt_path, new_txt_path)
                    print(f"Copied {new_jpg_name} and {new_txt_name} to labeled folder")
            
            # 진행률 표시
            progress = (processed_images / total_images) * 100
            print(f"\rProgress: {processed_images}/{total_images} ({progress:.2f}%)", end="", flush=True)

    print("\nProcessing complete.")
    print(f"Total images processed: {processed_images}")
    print(f"Images with valid YOLO labels copied: {len(os.listdir(labeled_folder))//2}")

# 실행
base_path = '/data2/bear/'
process_folders(base_path)


# 일정 객체 크기 미만 필터

In [4]:
import os
import shutil

def is_valid_yolo_format(file_path, min_area=0.002):
    try:
        with open(file_path, 'r') as file:
            lines = file.readlines()
            valid_objects = []
            for line in lines:
                parts = line.strip().split()
                if len(parts) == 5 and all(part.replace('.', '', 1).isdigit() for part in parts[1:]):
                    _, x, y, width, height = map(float, parts)
                    area = width * height
                    if area >= min_area:
                        valid_objects.append(line.strip())
            
            if valid_objects:
                return True, valid_objects
    except Exception as e:
        print(f"Error processing file {file_path}: {e}")
        pass
    return False, []

def count_total_images(base_path):
    total_images = 0
    for root, _, files in os.walk(base_path):
        if root == base_path:
            continue
        total_images += len([f for f in files if f.lower().endswith('.jpg')])
    return total_images

def process_folders(base_path):
    labeled_folder = os.path.join(base_path, 'labeled')
    
    # 'labeled' 폴더 생성 또는 비우기
    if os.path.exists(labeled_folder):
        shutil.rmtree(labeled_folder)
    os.makedirs(labeled_folder)

    total_images = count_total_images(base_path)
    processed_images = 0

    for root, dirs, files in os.walk(base_path):
        if root == base_path or root == labeled_folder:
            continue

        folder_name = os.path.basename(root)  # 현재 폴더 이름 가져오기
        jpg_files = [f for f in files if f.lower().endswith('.jpg')]
        txt_files = [f for f in files if f.lower().endswith('.txt')]

        for jpg_file in jpg_files:
            processed_images += 1
            txt_file = jpg_file[:-4] + '.txt'
            if txt_file in txt_files:
                txt_path = os.path.join(root, txt_file)
                is_valid, valid_lines = is_valid_yolo_format(txt_path)
                if is_valid:
                    jpg_path = os.path.join(root, jpg_file)
                    
                    # 새 파일 이름 지정 (폴더 이름 + 원본 파일 이름)
                    new_jpg_name = f"{folder_name}_{jpg_file.lower()}"
                    new_txt_name = f"{folder_name}_{txt_file.lower()}"
                    
                    # 복사 경로 설정
                    new_jpg_path = os.path.join(labeled_folder, new_jpg_name)
                    new_txt_path = os.path.join(labeled_folder, new_txt_name)
                    
                    # jpg 파일 복사
                    shutil.copy2(jpg_path, new_jpg_path)
                    
                    # txt 파일 복사 및 유효한 라인만 저장
                    with open(new_txt_path, 'w') as new_txt_file:
                        new_txt_file.write("\n".join(valid_lines) + "\n")
                    
                    # print(f"Copied {new_jpg_name} and filtered {new_txt_name} to labeled folder")
            
            # 진행률 표시
            # progress = (processed_images / total_images) * 100
            # print(f"\rProgress: {processed_images}/{total_images} ({progress:.2f}%)", end="", flush=True)

    print("\nProcessing complete.")
    print(f"Total images processed: {processed_images}")
    print(f"Images with valid YOLO labels copied: {len(os.listdir(labeled_folder))//2}")

# 실행
base_path = '/data2/bear/'
process_folders(base_path)



Processing complete.
Total images processed: 71293
Images with valid YOLO labels copied: 40544


# width/height ratio , size filter

In [7]:
import os
import shutil

def is_valid_yolo_format(file_path, min_area=0.003, max_aspect_ratio=3.0):
    try:
        with open(file_path, 'r') as file:
            lines = file.readlines()
            valid_objects = []
            for line in lines:
                parts = line.strip().split()
                if len(parts) == 5 and all(part.replace('.', '', 1).isdigit() for part in parts[1:]):
                    _, x, y, width, height = map(float, parts)
                    area = width * height
                    aspect_ratio = width / height
                    
                    # 면적과 가로 세로 비율을 기준으로 필터링
                    if area >= min_area and aspect_ratio <= max_aspect_ratio:
                        valid_objects.append(line.strip())
            
            if valid_objects:
                return True, valid_objects
    except Exception as e:
        print(f"Error processing file {file_path}: {e}")
        pass
    return False, []

def count_total_images(base_path):
    total_images = 0
    for root, _, files in os.walk(base_path):
        if root == base_path:
            continue
        total_images += len([f for f in files if f.lower().endswith('.jpg')])
    return total_images

def process_folders(base_path):
    labeled_folder = os.path.join(base_path, labeled)
    
    # 'labeled' 폴더 생성 또는 비우기
    if os.path.exists(labeled_folder):
        shutil.rmtree(labeled_folder)
    os.makedirs(labeled_folder)

    total_images = count_total_images(base_path)
    processed_images = 0

    for root, dirs, files in os.walk(base_path):
        if root == base_path or root == labeled_folder:
            continue

        folder_name = os.path.basename(root)  # 현재 폴더 이름 가져오기
        jpg_files = [f for f in files if f.lower().endswith('.jpg')]
        txt_files = [f for f in files if f.lower().endswith('.txt')]

        for jpg_file in jpg_files:
            processed_images += 1
            txt_file = jpg_file[:-4] + '.txt'
            if txt_file in txt_files:
                txt_path = os.path.join(root, txt_file)
                is_valid, valid_lines = is_valid_yolo_format(txt_path)
                if is_valid:
                    jpg_path = os.path.join(root, jpg_file)
                    
                    # 새 파일 이름 지정 (폴더 이름 + 원본 파일 이름)
                    new_jpg_name = f"{folder_name}_{jpg_file.lower()}"
                    new_txt_name = f"{folder_name}_{txt_file.lower()}"
                    
                    # 복사 경로 설정
                    new_jpg_path = os.path.join(labeled_folder, new_jpg_name)
                    new_txt_path = os.path.join(labeled_folder, new_txt_name)
                    
                    # jpg 파일 복사
                    shutil.copy2(jpg_path, new_jpg_path)
                    
                    # txt 파일 복사 및 유효한 라인만 저장
                    with open(new_txt_path, 'w') as new_txt_file:
                        new_txt_file.write("\n".join(valid_lines) + "\n")
                    
                    # print(f"Copied {new_jpg_name} and filtered {new_txt_name} to labeled folder")
            
            # # 진행률 표시
            # progress = (processed_images / total_images) * 100
            # print(f"\rProgress: {processed_images}/{total_images} ({progress:.2f}%)", end="", flush=True)

    print("\nProcessing complete.")
    print(f"Total images processed: {processed_images}")
    print(f"Images with valid YOLO labels copied: {len(os.listdir(labeled_folder))//2}")

# 실행
base_path = '/data2/bear/'
labeled = 'labeled'
process_folders(base_path)

# 검색할 폴더 경로 설정
folder_path = base_path + labeled
# 결과를 저장할 파일 경로 설정
output_file = "labeled.txt"

# 결과를 저장할 파일 열기
with open(output_file, 'w') as f:
    # 폴더 내의 모든 파일 및 하위 폴더를 탐색
    for root, dirs, files in os.walk(folder_path):
        for file in files:
            # .jpg 확장자 파일만 선택
            if file.endswith(".jpg"):
                # 전체 파일 경로 작성
                file_path = os.path.join(root, file)
                # 경로를 labeled.txt 파일에 기록
                f.write(file_path + "\n")

print(f"모든 .jpg 파일 경로가 {output_file}에 저장되었습니다.")


Processing complete.
Total images processed: 72270
Images with valid YOLO labels copied: 47584


# no filter

In [2]:
import os
import shutil

def count_total_images(base_path):
    total_images = 0
    for root, _, files in os.walk(base_path):
        if root == base_path:
            continue
        total_images += len([f for f in files if f.lower().endswith('.jpg')])
    return total_images

def process_folders(base_path):
    labeled_folder = os.path.join(base_path, 'labeled')
    
    # 'labeled' 폴더 생성 또는 비우기
    if os.path.exists(labeled_folder):
        shutil.rmtree(labeled_folder)
    os.makedirs(labeled_folder)

    total_images = count_total_images(base_path)
    processed_images = 0

    for root, dirs, files in os.walk(base_path):
        if root == base_path or root == labeled_folder:
            continue

        folder_name = os.path.basename(root)  # 현재 폴더 이름 가져오기
        jpg_files = [f for f in files if f.lower().endswith('.jpg')]
        txt_files = [f for f in files if f.lower().endswith('.txt')]

        for jpg_file in jpg_files:
            processed_images += 1
            txt_file = jpg_file[:-4] + '.txt'
            if txt_file in txt_files:
                jpg_path = os.path.join(root, jpg_file)
                txt_path = os.path.join(root, txt_file)
                
                # 새 파일 이름 지정 (폴더 이름 + 원본 파일 이름)
                new_jpg_name = f"{folder_name}_{jpg_file.lower()}"
                new_txt_name = f"{folder_name}_{txt_file.lower()}"
                
                # 복사 경로 설정
                new_jpg_path = os.path.join(labeled_folder, new_jpg_name)
                new_txt_path = os.path.join(labeled_folder, new_txt_name)
                
                # jpg 파일 복사
                shutil.copy2(jpg_path, new_jpg_path)
                # txt 파일 복사
                shutil.copy2(txt_path, new_txt_path)
                # print(f"Copied {new_jpg_name} and {new_txt_name} to labeled folder")
            
            # # 진행률 표시
            # progress = (processed_images / total_images) * 100
            # print(f"\rProgress: {processed_images}/{total_images} ({progress:.2f}%)", end="", flush=True)

    print("\nProcessing complete.")
    print(f"Total images processed: {processed_images}")
    print(f"Images copied: {len(os.listdir(labeled_folder))//2}")

# 실행
base_path = '/data2/bear/'
process_folders(base_path)



Processing complete.
Total images processed: 119531
Images copied: 94008


##### find folder/ -type f -name "*.jpg" > labeled.txt

# split

In [4]:
#for shuffle
import random
import math

# path = '/home/data2/fire/2023-10-31/fire.txt'
# out_path = '/home/data2/fire/2023-10-31/fire_split.txt'

path = '/home/data2/bear/labeled.txt'
out_path = '/home/data2/bear/bear_split.txt'

train_path = '/home/fishduke/Desktop/darknet/data/bear_csp_train.txt'
valid_path = '/home/fishduke/Desktop/darknet/data/bear_csp_valid.txt'

txt = open(path,'r')
f = open(out_path,'w')

tmp = []

while True :
    line = txt.readline()
    if not line:
        break
        
    tmp.append(line)
    
random.shuffle(tmp)
        
for i in tmp :  
    f.write(i)

txt.close()
f.close()

lines = []

path = out_path

# 파일을 먼저 읽어 전체 라인 수 계산
with open(path, 'r') as txt:
    lines = txt.readlines()

total_lines = len(lines)
split_index = math.ceil(total_lines * 0.1)  # 전체의 10%를 valid로 사용 (반올림 적용)

f = open(train_path, 'w')
f2 = open(valid_path, 'w')

for i, line in enumerate(lines):
    if i < split_index:
        f2.write(line)  # valid 데이터
    else:
        f.write(line)  # train 데이터

f.close()
f2.close()
