In [1]:
import sys, os
sys.argv = ['']  # argparse 충돌 방지용
# 프로젝트 루트 경로를 조정하세요
proj_root = os.path.abspath(os.path.join('..'))
if proj_root not in sys.path:
    sys.path.insert(0, proj_root)
print("▶️ 프로젝트 루트:", proj_root)
print("Python executable:", sys.executable)

▶️ 프로젝트 루트: C:\Users\USER\Documents\GitHub\medication_object_detection_project_team2\Project
Python executable: C:\Users\USER\AppData\Local\pypoetry\Cache\virtualenvs\meditation-detection-project-ihVzpsAT-py3.11\Scripts\python.exe


In [2]:
from src.config import get_config, get_device
device = get_device()
print(f"▶ Device       : {device}")
cfg    = get_config()
print(f"▶ cfg          : {cfg}")

▶ Device       : cuda
▶ cfg          : Namespace(device='cuda', num_epochs=100, num_classes=44199, batch_size=16, lr=0.001, lrf=0.01, lr_scheduler='StepLR', optimizer='SGD', num_workers=0, weight_decay=0.0005, confidence_threshold=0.5, momentum=0.9, tune=False, iterations=300, tune_epochs=30, hyp_path=None, base_dir=WindowsPath('C:/Users/USER/Documents/GitHub/medication_object_detection_project_team2'), data_dir=WindowsPath('C:/Users/USER/Documents/GitHub/medication_object_detection_project_team2/data/ai03-level1-project'), train_image_dir=WindowsPath('C:/Users/USER/Documents/GitHub/medication_object_detection_project_team2/data/ai03-level1-project/train_images'), test_image_dir=WindowsPath('C:/Users/USER/Documents/GitHub/medication_object_detection_project_team2/data/ai03-level1-project/test_images'), annotation_dir=WindowsPath('C:/Users/USER/Documents/GitHub/medication_object_detection_project_team2/data/ai03-level1-project/train_annotations'), output_dir=WindowsPath('C:/Users/USER/D

In [3]:
import pandas as pd
import random
from pathlib import Path

# 1) 원본 어노테이션 로드
df = pd.read_csv('excluded_annotations.csv')

# 2) 이미지 리스트와 클래스→이미지 매핑 생성
all_images = df['images_file_name'].unique().tolist()
class_to_images = {
    cls: set(group['images_file_name'])
    for cls, group in df.groupby('categories_id')
}

# 3) 각 클래스별로 1장씩 검증세트에 할당
random.seed(42)
val_images = set()
for cls, imgs in class_to_images.items():
    val_images.add(random.choice(list(imgs)))

# 4) 원하는 검증 비율(예: 20%) 고려하여 추가 이미지 선택
val_ratio     = 0.2
n_total       = len(all_images)
n_desired_val = int(n_total * val_ratio)
remaining     = list(set(all_images) - val_images)
n_additional  = max(0, n_desired_val - len(val_images))
if n_additional > 0:
    val_images.update(random.sample(remaining, n_additional))

# 5) 최종 train/val 이미지 집합
train_images = set(all_images) - val_images

# 6) DataFrame 필터링 (순서를 검증 코드 앞에)
train_df = df[df['images_file_name'].isin(train_images)]
val_df   = df[df['images_file_name'].isin(val_images)]

# 7) train_df, val_df 검증
all_classes = df['categories_id'].unique()
bad = []
for cls in all_classes:
    if (train_df['categories_id'] == cls).sum() == 0:
        bad.append(f"{cls}번 클래스가 학습 세트에 없음")
    if (val_df['categories_id'] == cls).sum() == 0:
        bad.append(f"{cls}번 클래스가 검증 세트에 없음")

if not bad:
    print("✅ 모든 클래스가 학습/검증 세트에 최소 1개씩 포함되어 있습니다.")
else:
    print("❌ 문제가 있는 클래스:\n" + "\n".join(bad))

# 8) CSV 저장
train_df.to_csv('train_annotations.csv', index=False)
val_df.to_csv('val_annotations.csv',   index=False)

print(f"▶ Train images: {len(train_images)}, rows: {len(train_df)}")
print(f"▶  Val images: {len(val_images)}, rows: {len(val_df)}")


✅ 모든 클래스가 학습/검증 세트에 최소 1개씩 포함되어 있습니다.
▶ Train images: 504, rows: 1900
▶  Val images: 125, rows: 471


In [6]:
# ─────────── 1) 라이브러리 임포트 ───────────
import os
import shutil
from pathlib import Path
import pandas as pd
import yaml
from PIL import Image  # 사이즈 읽기용

# ─────────── 2) 경로 설정 ───────────
img_dir      = Path(cfg.train_image_dir)   # 원본 PNG들이 있는 폴더
train_csv    = 'train_annotations.csv'
val_csv      = 'val_annotations.csv'
mapping_csv  = 'category_id_name_mapping.csv'

# ─────────── 3) CSV 로드 및 매핑 준비 ───────────
train_df   = pd.read_csv(train_csv)
val_df     = pd.read_csv(val_csv)
mapping_df = pd.read_csv(mapping_csv)
orig_ids   = mapping_df['categories_id'].tolist()
names      = mapping_df['categories_name'].tolist()
id2idx     = {orig_id: idx for idx, orig_id in enumerate(orig_ids)}

# ─────────── 4) 이미지 리스트 생성 ───────────
train_imgs = train_df['images_file_name'].unique().tolist()
val_imgs   = val_df['images_file_name'].unique().tolist()

# ─────────── 5) 디렉터리 생성 ───────────
(Path('images/train')).mkdir(parents=True, exist_ok=True)
(Path('images/val')).mkdir(parents=True, exist_ok=True)
(Path('labels/train')).mkdir(parents=True, exist_ok=True)
(Path('labels/val')).mkdir(parents=True, exist_ok=True)

def copy_image_and_write_label(df_split, img_list, split):
    out_img_dir = Path('images') / split
    out_lbl_dir = Path('labels') / split

    for img_name in img_list:
        src_path = img_dir / img_name
        dst_path = out_img_dir / img_name  # 확장자 그대로 유지(모두 PNG라 가정)

        # 5-1) 원본 PNG 복사(타임스탬프/메타 보존)
        dst_path.parent.mkdir(parents=True, exist_ok=True)
        shutil.copy2(src_path, dst_path)

        # 5-2) 라벨 작성 (원본 해상도 기준 정규화)
        recs = df_split[df_split['images_file_name'] == img_name]
        w, h = Image.open(src_path).size
        lbl_path = out_lbl_dir / f"{Path(img_name).stem}.txt"
        with open(lbl_path, 'w', encoding='utf-8') as f:
            for _, row in recs.iterrows():
                cls = id2idx[int(row['categories_id'])]
                x, y, bw, bh = row[['annotations_bbox_x', 'annotations_bbox_y',
                                    'annotations_bbox_w', 'annotations_bbox_h']].values.astype(float)
                xc = (x + bw/2) / w
                yc = (y + bh/2) / h
                nw = bw / w
                nh = bh / h
                f.write(f"{cls} {xc:.6f} {yc:.6f} {nw:.6f} {nh:.6f}\n")

# ─────────── 6) 복사 + 라벨 생성 ───────────
copy_image_and_write_label(train_df, train_imgs, 'train')
copy_image_and_write_label(val_df,   val_imgs,   'val')

# ─────────── 7) train.txt & val.txt 생성 (복사된 경로로) ───────────
with open('train.txt', 'w', encoding='utf-8') as f:
    for nm in train_imgs:
        f.write(f"images/train/{nm}\n")

with open('val.txt', 'w', encoding='utf-8') as f:
    for nm in val_imgs:
        f.write(f"images/val/{nm}\n")

# ─────────── 8) data.yaml 작성 ───────────
data = {
    'path': '.',
    'train': 'train.txt',
    'val':   'val.txt',
    'nc':    len(names),
    'names': names
}
with open('data.yaml', 'w', encoding='utf-8') as f:
    yaml.dump(data, f, sort_keys=False, allow_unicode=True)

print("✅ 완료: images/train|val(원본 PNG 복사), labels/train|val(.txt), data.yaml")


✅ 완료: images/train|val(원본 PNG 복사), labels/train|val(.txt), data.yaml
