## general_trash split class update

In [1]:
import os
import json
import shutil

In [2]:
def process_folders(base_folder, folders, new_json_file, existing_categories, merge_categories=None):
    # JSON 파일 저장을 위한 폴더 생성 (폴더가 존재하지 않을 경우)
    os.makedirs(os.path.dirname(new_json_file), exist_ok=True)

    # 원본 JSON 파일 복사
    shutil.copyfile('../../dataset/train.json', new_json_file)

    # 복사된 JSON 파일 로드
    with open(new_json_file, 'r') as file:
        data = json.load(file)

    category_id_map = {category['name']: category['id'] for category in data['categories']}
    max_category_id = max(category_id_map.values(), default=0)

    # merge_categories 처리
    if merge_categories:
        merged_category_name = next(iter(merge_categories.values()))
        if merged_category_name in category_id_map:
            merged_category_id = category_id_map[merged_category_name]
        else:
            max_category_id += 1
            merged_category_id = max_category_id
            data['categories'].append({
                "id": merged_category_id,
                "name": merged_category_name,
                "supercategory": "General trash"
            })

    # base_folder 내의 모든 폴더 처리
    all_folders = os.listdir(base_folder)
    for folder in all_folders:
        if folder in existing_categories:
            category_id = existing_categories[folder]
        elif folder in folders:
            max_category_id += 1
            category_id = max_category_id
            data['categories'].append({
                "id": category_id,
                "name": folder,
                "supercategory": "General trash"
            })
        elif merge_categories and folder in merge_categories:
            category_id = merged_category_id
        else:
            continue

        # annotations 업데이트
        image_files = os.listdir(os.path.join(base_folder, folder))
        for file in image_files:
            parts = file.split('_')
            image_id = int(parts[0])
            annotation_id = int(parts[1].split('.')[0])

            for annotation in data['annotations']:
                if annotation['image_id'] == image_id and annotation['id'] == annotation_id:
                    annotation['category_id'] = category_id
                    break

    with open(new_json_file, 'w') as file:
        json.dump(data, file, indent=4)

# 기존 카테고리
existing_categories = {
    "General trash": 0,
    "Paper": 1,
    "Paper pack": 2,
    "Metal": 3,
    "Glass": 4,
    "Plastic": 5,
    "Styrofoam": 6,
    "Plastic bag": 7,
    "Battery": 8,
    "Clothing": 9
}

# 기본 설정
base_folder = '../../general_images_classified_from_original_label'
all_folders = ['binder', 'box_tape', 'business_card', 'cigarette_packet', 'coffee_bean_bag', 'etc', 'leaflet', 'mask', 'paper_piece', 'straw', 'wastepaper']
save_folder = base_folder.split('_')[4] + '_general_trash_json'

# 각 경우에 따른 처리
process_folders(base_folder, ['box_tape'], f'../../{save_folder}/train_{base_folder.split("_")[4]}_class_11_1.json', existing_categories)
process_folders(base_folder, ['wastepaper'], f'../../{save_folder}/train_{base_folder.split("_")[4]}_class_11_2.json', existing_categories)
process_folders(base_folder, ['cigarette_packet'], f'../../{save_folder}/train_{base_folder.split("_")[4]}_class_11_3.json', existing_categories)
process_folders(base_folder, ['straw'], f'../../{save_folder}/train_{base_folder.split("_")[4]}_class_11_4.json', existing_categories)
process_folders(base_folder, ['binder'], f'../../{save_folder}/train_{base_folder.split("_")[4]}_class_11_5.json', existing_categories)
process_folders(base_folder, ['box_tape', 'wastepaper'], f'../../{save_folder}/train_{base_folder.split("_")[4]}_class_12_1.json', existing_categories)
process_folders(base_folder, ['box_tape', 'wastepaper', 'cigarette_packet'], f'../../{save_folder}/train_{base_folder.split("_")[4]}_class_13_1.json', existing_categories)
process_folders(base_folder, ['box_tape', 'wastepaper', 'cigarette_packet', 'straw'], f'../../{save_folder}/train_{base_folder.split("_")[4]}_class_14_1.json', existing_categories)
process_folders(base_folder, ['box_tape', 'wastepaper', 'cigarette_packet', 'straw', 'binder'], f'../../{save_folder}/train_{base_folder.split("_")[4]}_class_15_1.json', existing_categories)
# ... (나머지 경우에 대해서도 동일한 패턴으로 process_folders 호출)
# 예: process_folders(base_folder, ['box_tape', 'wastepaper'], 'dataset/train_class_12_1.json')

# 특정 폴더들을 하나의 카테고리로 묶는 경우
merged_categories = {'paper_piece': 'paper_piece', 'leaflet': 'paper_piece', 'business_card': 'paper_piece'}
process_folders(base_folder, ['box_tape', 'wastepaper', 'cigarette_packet', 'straw', 'binder'], f'../../{save_folder}/train_{base_folder.split("_")[4]}_class_16_1.json', existing_categories, merge_categories=merged_categories)

# etc 폴더를 제외한 모든 폴더 처리
folders_except_etc = [folder for folder in all_folders if folder != 'etc']
process_folders(base_folder, folders_except_etc, f'../../{save_folder}/train_{base_folder.split("_")[4]}_class_20.json', existing_categories)


## Merge category

In [4]:
def merge_categories(json_file, categories_to_merge, final_category_name):
    # JSON 파일 로드
    with open(json_file, 'r') as file:
        data = json.load(file)

    # 합치려는 카테고리의 ID 찾기
    category_ids = [cat['id'] for cat in data['categories'] if cat['name'] in categories_to_merge]

    # 최종 카테고리의 ID (가장 작은 ID)
    final_category_id = min(category_ids)

    # annotations 업데이트
    for annotation in data['annotations']:
        if annotation['category_id'] in category_ids:
            annotation['category_id'] = final_category_id

    # 카테고리 업데이트: 불필요한 카테고리 제거 및 최종 카테고리 이름 변경
    data['categories'] = [cat for cat in data['categories'] if cat['id'] not in category_ids or cat['id'] == final_category_id]
    for cat in data['categories']:
        if cat['id'] == final_category_id:
            cat['name'] = final_category_name

    # 변경된 JSON 데이터 저장
    with open(json_file, 'w') as file:
        json.dump(data, file, indent=4)

# json file path
json_file = '../../dataset/train_class20.json'

# merge하고자하는 category 추가
categories_to_merge = ['box_tape', 'paper_piece']

# merge 후 최종 category name
final_category_name = 'paper_piece'

merge_categories(json_file, categories_to_merge, final_category_name)