## Ensemble1 - Voting 방식
- ensemble_voting 폴더에 앙상블할 모델의 CSV 파일들을 위치시키면 됩니다.
- 각 픽셀에서 과반수 이상 True라고 예측한 값으로 앙상블합니다.
- ex) 2개 모델 앙상블시 - 2개 모델 둘다 예측시에만 TRUE
- ex) 3개 모델 앙상블시 - 2개 이상 모델 예측시에만 TRUE

In [None]:
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
submission_files = []
for file_name in os.listdir('./ensemble_voting'):
    if file_name.startswith('.'):
        continue
    submission_files.append(file_name)

In [None]:
submission_files

In [None]:
def decode_rle_to_mask(rle, height, width):
    s = rle.split()
    starts, lengths = [np.asarray(x, dtype=int) for x in (s[0:][::2], s[1:][::2])]
    starts -= 1
    ends = starts + lengths
    img = np.zeros(height * width, dtype=np.uint8)
    for lo, hi in zip(starts, ends):
        img[lo:hi] = 1
    return img.reshape(height, width)
    
def encode_mask_to_rle(mask):
    pixels = mask.flatten()
    pixels = np.concatenate([[0], pixels, [0]])
    runs = np.where(pixels[1:] != pixels[:-1])[0] + 1
    runs[1::2] -= runs[::2]
    return ' '.join(str(x) for x in runs)

In [None]:
vote = len(submission_files)//2
print(vote)

In [None]:
image_name = []
classes = []
rles = []
vote = len(submission_files)//2
for idx in range(0, 1000):
    print(idx)
    pred_list = []
    sum_result = np.zeros((2048, 2048), dtype=np.uint8)
    
    for submission in submission_files:
        df = pd.read_csv('./ensemble_voting/'+submission)
        pred_list.append(decode_rle_to_mask(df.loc[idx]['rle'], height=2048, width=2048))
    
    for pred in pred_list:
        sum_result += pred
    result = np.where(sum_result > vote, 1, 0)  # vote 이상만 앙상블 (codition, True, False)
    rle = encode_mask_to_rle(result)
    
    image_name.append(df.loc[idx]['image_name'])
    classes.append(df.loc[idx]['class'])
    rles.append(rle)

result_df = pd.DataFrame({
    "image_name": image_name,
    "class": classes,
    "rle": rles,
})
result_df.to_csv("ensemble_voting1000.csv", index=False)

## 예시 이미지 출력

In [None]:
vote = len(submission_files)//2
all_classes_result = []
img_id = 0
for idx in range(img_id*29, (img_id+1)*29):
    print(idx)
    pred_list = []
    sum_result = np.zeros((2048, 2048), dtype=np.uint8)
    
    for submission in submission_files:
        df = pd.read_csv('./ensemble_voting/'+submission)
        pred_list.append(decode_rle_to_mask(df.loc[idx]['rle'], height=2048, width=2048))
    
    for pred in pred_list:
        sum_result += pred
    result = np.where(sum_result > vote, 1, 0)  # vote 이상만 앙상블 (codition, True, False)
    all_classes_result.append(result)

all_classes_result = np.stack(all_classes_result, 0)

In [None]:
pngs = {
    os.path.relpath(os.path.join(root, fname), start=IMAGE_ROOT)
    for root, _dirs, files in os.walk(IMAGE_ROOT)
    for fname in files
    if os.path.splitext(fname)[1].lower() == ".png"
}
pngs = list(sorted(pngs))
IMAGE_ROOT = "/opt/ml/input/data/test/DCM"
PALETTE = [
    (220, 20, 60), (119, 11, 32), (0, 0, 142), (0, 0, 230), (106, 0, 228),
    (0, 60, 100), (0, 80, 100), (0, 0, 70), (0, 0, 192), (250, 170, 30),
    (100, 170, 30), (220, 220, 0), (175, 116, 175), (250, 0, 30), (165, 42, 42),
    (255, 77, 255), (0, 226, 252), (182, 182, 255), (0, 82, 0), (120, 166, 157),
    (110, 76, 0), (174, 57, 255), (199, 100, 0), (72, 0, 118), (255, 179, 240),
    (0, 125, 92), (209, 0, 151), (188, 208, 182), (0, 220, 176),
]
def label2rgb(label):
    image_size = label.shape[1:] + (3, )
    image = np.zeros(image_size, dtype=np.uint8)
    
    for i, class_label in enumerate(label):
        image[class_label == 1] = PALETTE[i]
        
    return image
import cv2
fig, ax = plt.subplots(1, 2, figsize=(12, 6))
image = cv2.imread(os.path.join(IMAGE_ROOT, pngs[0]))
image = image / 255.
ax[0].imshow(image)
ax[1].imshow(label2rgb(all_classes_result))
plt.show()