In [None]:
import os
import numpy as np
import pandas as pd
import copy


# 비교할 파일들의 경로 리스트
file_paths = [
    '/path/to/csv',
    '/path/to/csv',
    '/path/to/csv',
    '/path/to/csv',
    '/path/to/csv',
    ]

SAVE_ROOT = '/data/ephemeral/home'

# threshold를 초과한 개수의 픽셀을 앙상블
threshold = len(file_paths) // 2
print(f'{threshold} 초과를 앙상블합니다.')

# 파일들을 담을 빈 리스트
dfs = []

# 각 파일을 순회하면서 DataFrame 생성 및 리스트에 추가
for file_path in file_paths:
    if os.path.exists(file_path):  # 파일이 존재하는지 확인
        df = pd.read_csv(file_path)
        dfs.append(df)
    else:
        print(f"{file_path} 파일이 존재하지 않습니다.")

In [None]:
def encode_mask_to_rle(mask):
    pixels = mask.flatten()
    pixels = np.concatenate([[0], pixels, [0]])
    runs = np.where(pixels[1:] != pixels[:-1])[0] + 1
    runs[1::2] -= runs[::2]
    return ' '.join(str(x) for x in runs)

def decode_rle_to_mask(rle, height, width):
    s = rle.split()
    starts, lengths = [np.asarray(x, dtype=int) for x in (s[0:][::2], s[1:][::2])]
    starts -= 1
    ends = starts + lengths
    img = np.zeros(height * width, dtype=np.uint8)

    for lo, hi in zip(starts, ends):
        img[lo:hi] = 1


    return img.reshape(height, width)

In [None]:
IMAGE_ROOT = "/data/ephemeral/home/data/test/DCM"

CLASSES = [
    'finger-1', 'finger-2', 'finger-3', 'finger-4', 'finger-5',
    'finger-6', 'finger-7', 'finger-8', 'finger-9', 'finger-10',
    'finger-11', 'finger-12', 'finger-13', 'finger-14', 'finger-15',
    'finger-16', 'finger-17', 'finger-18', 'finger-19', 'Trapezium',
    'Trapezoid', 'Capitate', 'Hamate', 'Scaphoid', 'Lunate',
    'Triquetrum', 'Pisiform', 'Radius', 'Ulna',
]

pngs = {
    os.path.relpath(os.path.join(root, fname), start=IMAGE_ROOT)
    for root, _dirs, files in os.walk(IMAGE_ROOT)
    for fname in files
    if os.path.splitext(fname)[1].lower() == ".png"
}

ensemble = {}
class_dict = {}

height = 2048
width = 2048

for bone in CLASSES:
    class_dict[bone] = np.zeros(height * width, dtype=np.uint8).reshape(height, width)

for png in pngs:
    ensemble[png[6:]] = copy.deepcopy(class_dict)

In [None]:
for fold, df in enumerate(dfs):
    # 모든 행 순회
    for index, row in df.iterrows():
        # 각 행에 대해 작업 수행
        if not pd.isna(row['rle']):
            mask_img = decode_rle_to_mask(row['rle'], height, width)
            ensemble[row['image_name']][row['class']] += mask_img
        else:
            print(f'{fold}fold의 {index}번에 문제 발생!')
            print(row)

In [None]:
for png in pngs:
    for bone in CLASSES:
        binary_arr = np.where(ensemble[png[6:]][bone] > threshold, 1, 0)
        ensemble[png[6:]][bone] = encode_mask_to_rle(binary_arr)

# encode 과정이 오래걸립니다. (test set 기준 약 10분)

In [None]:
image_name = []
classes = []
rles = []

for png in pngs:
    for bone in CLASSES:
        image_name.append(png[6:])
        classes.append(bone)
        rles.append(ensemble[png[6:]][bone])

df = pd.DataFrame({
    "image_name": image_name,
    "class": classes,
    "rle": rles,
})

df.to_csv(SAVE_ROOT + "/output.csv", index=False)