<a href="https://colab.research.google.com/github/dansojo/Medical_CV/blob/main/data_preprocessing_%EB%8F%99%EB%AC%BC_%EA%B7%BC%EA%B3%A8%EA%B2%A9%EA%B3%84.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# 필요한 라이브러리 임포트
import os
import json
import numpy as np
import cv2
from PIL import Image
import torchvision.transforms as transforms

In [None]:
# 기본 경로 설정
root_dir = '/content/drive/MyDrive/Medical_CV/3'  # train, val, test 폴더가 있는 경로
output_dir = os.path.join(root_dir, '전처리_데이터')  # 전처리된 데이터 저장 경로
os.makedirs(output_dir, exist_ok=True)

# 클래스 매핑
# Mu03(갈비뼈골절)
# Mu05(슬개골탈구)
# Mu06(전십자인대파열)
# Mu07(추간판질환)
disease_to_label = {'Mu03': 0, 'Mu05': 1, 'Mu06': 2, 'Mu07': 3}

In [None]:
# 전처리 설정 (Gaussian 필터 포함)
def preprocess_image(image_path):
    # 이미지 불러오기
    image = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)  # 흑백 이미지로 읽기
    if image is None:
        print(f"Could not load image at path: {image_path}")
        return None

    # Gaussian 필터 적용
    smoothed_image = cv2.GaussianBlur(image, (5, 5), 0)  # 5x5 커널 크기, 표준편차 0

    # 크기 조정 및 Tensor 변환
    transform = transforms.Compose([
        transforms.ToPILImage(),
        transforms.Resize((224, 224)),  # 모델 학습에 맞춰 크기 조정
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.5], std=[0.5])  # 정규화
    ])
    tensor_image = transform(smoothed_image)

    return tensor_image.numpy()  # 넘파이 배열로 변환하여 반환

In [None]:
# 전처리와 저장 함수
def process_and_save(split, root_dir, output_dir, disease_to_label):
    image_dir = os.path.join(root_dir, split, 'images')
    annotation_dir = os.path.join(root_dir, split, 'annotations')
    output_split_dir = os.path.join(output_dir, split)
    os.makedirs(output_split_dir, exist_ok=True)

    for img_filename in os.listdir(image_dir):
        img_path = os.path.join(image_dir, img_filename)
        annotation_path = os.path.join(annotation_dir, os.path.splitext(img_filename)[0] + '.json')

        # 이미지 전처리 수행
        image_np = preprocess_image(img_path)
        if image_np is None:
            continue  # 이미지 불러오기 실패 시 건너뜀

        # 어노테이션 파일에서 레이블 추출
        with open(annotation_path, 'r') as f:
            annotation = json.load(f)
        disease_name = annotation['metadata'].get('Disease-Name')
        label = disease_to_label.get(disease_name, -1)  # 매핑되지 않은 레이블의 경우 -1로 처리

        # 이미지와 레이블을 딕셔너리로 저장
        save_data = {'image': image_np, 'label': label}
        save_path = os.path.join(output_split_dir, os.path.splitext(img_filename)[0] + '.npy')
        np.save(save_path, save_data)
        print(f"Saved processed image and label to {save_path}")

In [None]:
# train, val, test 데이터 전처리 및 저장
for split in ['train', 'val', 'test']:
    process_and_save(split, root_dir, output_dir, disease_to_label)

print("Data preprocessing and saving completed.")

[1;30;43m스트리밍 출력 내용이 길어서 마지막 5000줄이 삭제되었습니다.[0m
Saved processed image and label to /content/drive/MyDrive/Medical_CV/3/전처리_데이터/train/D_62_20161108_IM_0009_NOR_Mu05_20190127_0707.npy
Saved processed image and label to /content/drive/MyDrive/Medical_CV/3/전처리_데이터/train/D_62_20161002_IF_0016_NOR_Mu06_20211213_0420.npy
Saved processed image and label to /content/drive/MyDrive/Medical_CV/3/전처리_데이터/train/D_62_20160929_SF_0020_NOR_Mu06_20211213_0416.npy
Saved processed image and label to /content/drive/MyDrive/Medical_CV/3/전처리_데이터/train/D_62_20160929_SF_0020_NOR_Mu06_20190616_0001.npy
Saved processed image and label to /content/drive/MyDrive/Medical_CV/3/전처리_데이터/train/D_62_20161001_IF_0014_ABN_Mu05_20191214_0009.npy
Saved processed image and label to /content/drive/MyDrive/Medical_CV/3/전처리_데이터/train/D_62_20161004_IF_0013_NOR_Mu03_20200430_0003.npy
Saved processed image and label to /content/drive/MyDrive/Medical_CV/3/전처리_데이터/train/D_62_20161106_CM_0028_NOR_Mu06_20211216_7311.npy
Saved proces