In [None]:
import fiftyone as fo
import pandas as pd
import numpy as np
import cv2, json, os, glob, re
from tqdm import tqdm

BASE_DIR = "/data/ephemeral/home/data"
TRAIN_IMG_ROOT = "/data/ephemeral/home/data/train/DCM"
TRAIN_LBL_ROOT = "/data/ephemeral/home/data/train/outputs_json"
TEST_IMG_ROOT = "/data/ephemeral/home/data/test/DCM"
META_PATH = "/data/ephemeral/home/data/meta_data.xlsx"

DATASET_NAME = "Hand Bone Image Segmentation"

In [None]:
def get_cleaned_meta(path) :
    df = pd.read_excel(path)
    # 1. 불필요한 'Unnamed' 컬럼 제거
    df = df.loc[:, ~df.columns.str.contains('^Unnamed')]

    # 2. 성별: 특수문자 제거
    if '성별' in df.columns:
        df['성별'] = df['성별'].astype(str).str.extract(r'([가-힣]+)')[0].str.strip()
    
    # 3. ID: 정수형(int) 변환
    if 'ID' in df.columns:
        df['ID'] = pd.to_numeric(df['ID'], errors='coerce').fillna(0).astype(int)

    # 4. 키/몸무게: 실수형(float) 변환 (소수점 유지)
    # 숫자가 아닌 값이 있으면 NaN(결측치)으로 처리됩니다.
    if '키(신장)' in df.columns:
        df['키(신장)'] = pd.to_numeric(df['키(신장)'], errors='coerce').astype(float)
    if '체중(몸무게)' in df.columns:
        df['체중(몸무게)'] = pd.to_numeric(df['체중(몸무게)'], errors='coerce').astype(float)
        
    return df

df_meta = get_cleaned_meta(META_PATH)
print(f"Metadata loaded: {len(df_meta)} rows")
display(df_meta.head())

In [None]:
if DATASET_NAME in fo.list_datasets():
    fo.delete_dataset(DATASET_NAME)

dataset = fo.Dataset(DATASET_NAME)
samples = []

# --- 1. Train 데이터 추가 ---
json_paths = glob.glob(os.path.join(TRAIN_LBL_ROOT, "**/*.json"), recursive=True)
for j_path in tqdm(json_paths, desc="Adding Train Samples"):
    folder_name = os.path.basename(os.path.dirname(j_path))
    f_id = int(''.join(filter(str.isdigit, folder_name)))
    
    # 이미지 경로 매칭 (outputs_json -> DCM)
    img_path = j_path.replace('.json', '.png').replace('outputs_json', 'DCM')
    if not os.path.exists(img_path): continue
    
    sample = fo.Sample(filepath=img_path, tags=["train"])
    sample["ID"] = f_id
    
    # 메타데이터 주입 (속성명: ID, 나이, 성별, 체중, 키)
    meta_row = df_meta[df_meta['ID'] == f_id]
    if not meta_row.empty:
        row = meta_row.iloc[0]
        sample["Gender"] = row['성별']
        sample["Age"] = row['나이']
        sample["Weight"] = row['체중(몸무게)']
        sample["Height"] = row['키(신장)']
    
    samples.append(sample)

# --- 2. Test 데이터 추가 (라벨은 없지만 메타데이터는 연결) ---
test_pngs = glob.glob(os.path.join(TEST_IMG_ROOT, "**/*.png"), recursive=True)
for t_path in tqdm(test_pngs, desc="Adding Test Samples"):
    folder_name = os.path.basename(os.path.dirname(t_path))
    f_id = int(''.join(filter(str.isdigit, folder_name)))
    
    sample = fo.Sample(filepath=t_path, tags=["test"])
    sample["ID"] = f_id
    
    meta_row = df_meta[df_meta['ID'] == f_id]
    if not meta_row.empty:
        row = meta_row.iloc[0]
        sample["Gender"] = row['성별']
        sample["Age"] = row['나이']
        sample["Weight"] = row['체중(몸무게)']
        sample["Height"] = row['키(신장)']
    
    samples.append(sample)

dataset.add_samples(samples)
dataset.persistent = True
print(f"Created dataset '{DATASET_NAME}' with {len(dataset)} samples.")

In [None]:
with dataset.save_context() as context:
    # 1. Train 데이터의 Ground Truth 업데이트
    for sample in tqdm(dataset.match_tags("train"), desc="Updating GT"):
        json_path = sample.filepath.replace('.png', '.json').replace('DCM', 'outputs_json')
        
        if os.path.exists(json_path):
            with open(json_path, 'r') as f:
                ann_data = json.load(f)
            
            # 이미지 사이즈 가져오기 (정규화용)
            img = cv2.imread(sample.filepath)
            h, w = img.shape[:2]
            
            polylines = []
            for ann in ann_data.get('annotations', []):
                pts = ann.get('points', [])
                norm_pts = [[(p[0]/w, p[1]/h) for p in pts]]
                polylines.append(fo.Polyline(label=ann['label'], points=norm_pts, closed=True, filled=True))
            
            sample["ground_truth"] = fo.Polylines(polylines=polylines)
        
        context.save(sample)

print("Update Complete!")

In [None]:
# 예측 결과 확인하고 싶으면 주석해제 후 CSV_PATH에 csv 경로 입력해주세요.

import pandas as pd
import numpy as np
import cv2
import os
import fiftyone as fo
from tqdm import tqdm

# 1. CSV 파일 로드
CSV_PATH = "/data/ephemeral/home/csb/ss/test_submission_CSB_011_segb3_4del.csv" # 🍎 시각화 원하는 csv 파일 경로 입력
if not os.path.exists(CSV_PATH):
    print(f"⚠️ {CSV_PATH} 파일을 찾을 수 없습니다.")
else:
    pred_df = pd.read_csv(CSV_PATH)
    print(f"Loaded predictions: {len(pred_df)} rows")

    # 2. RLE 디코딩 함수 (수정됨: 'nan' 문자열 처리 추가)
    def decode_rle_to_mask(rle, height, width):
        # rle가 실제 NaN이거나, 문자열 "nan"이면 빈 마스크 반환
        if pd.isna(rle) or str(rle).lower() == 'nan': 
            return np.zeros((height, width), dtype=np.uint8)
        
        s = str(rle).split() # 안전하게 문자열로 변환 후 split
        if not s: 
            return np.zeros((height, width), dtype=np.uint8)

        starts, lengths = [np.asarray(x, dtype=int) for x in (s[0:][::2], s[1:][::2])]
        starts -= 1
        ends = starts + lengths
        img = np.zeros(height * width, dtype=np.uint8)
        
        for lo, hi in zip(starts, ends):
            img[lo:hi] = 1
        
        return img.reshape(height, width)

    # 3. 마스크 -> FiftyOne Polyline 변환 함수
    def mask_to_polylines(mask, label, img_w, img_h):
        contours, _ = cv2.findContours(mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
        polylines = []
        for contour in contours:
            if len(contour) < 3: continue
            points = contour.squeeze().astype(float)
            if len(points.shape) < 2: continue
            
            # 좌표 정규화
            points[:, 0] /= img_w
            points[:, 1] /= img_h
            
            polylines.append(fo.Polyline(
                label=label,
                points=[points.tolist()],
                closed=True,
                filled=True
            ))
        return polylines

    # 4. 데이터셋에 예측값(Predictions) 추가하기
    sample_map = {os.path.basename(s.filepath): s for s in dataset.match_tags("test")}
    print("Adding predictions to dataset...")
    grouped = pred_df.groupby("image_name")

    with dataset.save_context() as context:
        for image_name, group in tqdm(grouped, total=len(grouped)):
            if image_name not in sample_map:
                continue
            
            sample = sample_map[image_name]
            all_polylines = []
            
            for _, row in group.iterrows():
                rle = row['rle']
                label = row['class']
                
                # 여기서 str(rle)를 해도 함수 내부에서 'nan' 체크를 하므로 안전함
                mask = decode_rle_to_mask(rle, 2048, 2048)
                
                # 마스크가 비어있으면(0) polyline 변환 스킵
                if mask.max() == 0:
                    continue

                polys = mask_to_polylines(mask, label, 2048, 2048)
                all_polylines.extend(polys)
            
            if all_polylines:
                sample["predictions"] = fo.Polylines(polylines=all_polylines)
                context.save(sample)

    print("✅ Prediction update complete!")


In [None]:
session = fo.launch_app(dataset, port=5151, auto=False)
'''
브라우저 창으로 보려면 VS Code에서 터미널 옆 Ports에 5151 추가 후 localhost 접속하면 됩니다.
'''