In [None]:
import fiftyone as fo
import pandas as pd
import numpy as np
import cv2, json, os, glob, re
from tqdm import tqdm

BASE_DIR = "/data/ephemeral/home/data"
TRAIN_IMG_ROOT = "/data/ephemeral/home/data/train/DCM"
TRAIN_LBL_ROOT = "/data/ephemeral/home/data/train/outputs_json"
TEST_IMG_ROOT = "/data/ephemeral/home/data/test/DCM"
META_PATH = "/data/ephemeral/home/data/meta_data.xlsx"

DATASET_NAME = "Hand Bone Image Segmentation"

In [None]:
def get_cleaned_meta(path) :
    df = pd.read_excel(path)
    # 1. 불필요한 'Unnamed' 컬럼 제거
    df = df.loc[:, ~df.columns.str.contains('^Unnamed')]

    # 2. 성별: 특수문자 제거
    if '성별' in df.columns:
        df['성별'] = df['성별'].astype(str).str.extract(r'([가-힣]+)')[0].str.strip()
    
    # 3. ID: 정수형(int) 변환
    if 'ID' in df.columns:
        df['ID'] = pd.to_numeric(df['ID'], errors='coerce').fillna(0).astype(int)

    # 4. 키/몸무게: 실수형(float) 변환 (소수점 유지)
    # 숫자가 아닌 값이 있으면 NaN(결측치)으로 처리됩니다.
    if '키(신장)' in df.columns:
        df['키(신장)'] = pd.to_numeric(df['키(신장)'], errors='coerce').astype(float)
    if '체중(몸무게)' in df.columns:
        df['체중(몸무게)'] = pd.to_numeric(df['체중(몸무게)'], errors='coerce').astype(float)
        
    return df

df_meta = get_cleaned_meta(META_PATH)
print(f"Metadata loaded: {len(df_meta)} rows")
display(df_meta.head())

In [None]:
if DATASET_NAME in fo.list_datasets():
    fo.delete_dataset(DATASET_NAME)

dataset = fo.Dataset(DATASET_NAME)
samples = []

# --- 1. Train 데이터 추가 ---
json_paths = glob.glob(os.path.join(TRAIN_LBL_ROOT, "**/*.json"), recursive=True)
for j_path in tqdm(json_paths, desc="Adding Train Samples"):
    folder_name = os.path.basename(os.path.dirname(j_path))
    f_id = int(''.join(filter(str.isdigit, folder_name)))
    
    # 이미지 경로 매칭 (outputs_json -> DCM)
    img_path = j_path.replace('.json', '.png').replace('outputs_json', 'DCM')
    if not os.path.exists(img_path): continue
    
    sample = fo.Sample(filepath=img_path, tags=["train"])
    sample["ID"] = f_id
    
    # 메타데이터 주입 (속성명: ID, 나이, 성별, 체중, 키)
    meta_row = df_meta[df_meta['ID'] == f_id]
    if not meta_row.empty:
        row = meta_row.iloc[0]
        sample["Gender"] = row['성별']
        sample["Age"] = row['나이']
        sample["Weight"] = row['체중(몸무게)']
        sample["Height"] = row['키(신장)']
    
    samples.append(sample)

# --- 2. Test 데이터 추가 (라벨은 없지만 메타데이터는 연결) ---
# Test 폴더 구조도 ID001/image.png 식이라고 가정합니다.
test_pngs = glob.glob(os.path.join(TEST_IMG_ROOT, "**/*.png"), recursive=True)
for t_path in tqdm(test_pngs, desc="Adding Test Samples"):
    folder_name = os.path.basename(os.path.dirname(t_path))
    f_id = int(''.join(filter(str.isdigit, folder_name)))
    
    sample = fo.Sample(filepath=t_path, tags=["test"])
    sample["ID"] = f_id
    
    meta_row = df_meta[df_meta['ID'] == f_id]
    if not meta_row.empty:
        row = meta_row.iloc[0]
        sample["Gender"] = row['성별']
        sample["Age"] = row['나이']
        sample["Weight"] = row['체중(몸무게)']
        sample["Height"] = row['키(신장)']
    
    samples.append(sample)

dataset.add_samples(samples)
dataset.persistent = True
print(f"Created dataset '{DATASET_NAME}' with {len(dataset)} samples.")

In [None]:
with dataset.save_context() as context:
    # 1. Train 데이터의 Ground Truth 업데이트
    for sample in tqdm(dataset.match_tags("train"), desc="Updating GT"):
        json_path = sample.filepath.replace('.png', '.json').replace('DCM', 'outputs_json')
        
        if os.path.exists(json_path):
            with open(json_path, 'r') as f:
                ann_data = json.load(f)
            
            # 이미지 사이즈 가져오기 (정규화용)
            img = cv2.imread(sample.filepath)
            h, w = img.shape[:2]
            
            polylines = []
            for ann in ann_data.get('annotations', []):
                pts = ann.get('points', [])
                norm_pts = [[(p[0]/w, p[1]/h) for p in pts]]
                polylines.append(fo.Polyline(label=ann['label'], points=norm_pts, closed=True, filled=True))
            
            sample["ground_truth"] = fo.Polylines(polylines=polylines)
        
        # 2. [미래용] 예측값(Prediction) 업데이트 로직
        # 만약 모델 결과 폴더가 따로 있다면 여기서 매칭해서 넣어주세요.
        # sample["predictions"] = fo.Polylines(polylines=...)
        
        context.save(sample)

print("Update Complete!")

In [None]:
session = fo.launch_app(dataset, port=5151)