<a href="https://colab.research.google.com/github/joongwha/DeepRL-Agents/blob/master/pancancer_detection.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# Google Colab에서 실행할 완전한 코드
# CT DICOM 데이터를 사용한 CNN 모델 구현

# 1. 필요한 라이브러리 설치 및 import
!pip install pydicom opencv-python scikit-image tensorflow

import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout, BatchNormalization
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
from tensorflow.keras.utils import to_categorical
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix
import pydicom
import cv2
from skimage import exposure
from skimage.filters import gaussian

# 2. DICOM 처리 함수
def process_dicom_file(file_path, target_size=(64, 64)):
    """실제 DICOM 파일을 처리하는 함수"""
    try:
        dicom_data = pydicom.dcmread(file_path)
        image = dicom_data.pixel_array

        # HU 윈도우 레벨링 (폐 영역용)
        window_center, window_width = 40, 400
        min_hu = window_center - window_width // 2
        max_hu = window_center + window_width // 2

        image = np.clip(image, min_hu, max_hu)
        image = (image - min_hu) / (max_hu - min_hu)

        # 전처리
        image = cv2.resize(image, target_size)
        image = gaussian(image, sigma=0.5)
        image = exposure.equalize_adapthist(image)

        return np.stack([image, image, image], axis=-1).astype(np.float32)
    except Exception as e:
        print(f"DICOM 파일 처리 오류: {e}")
        return None

# 3. 샘플 데이터 생성 (실제 DICOM 데이터가 없을 때)
def create_ct_dataset(num_samples=600, img_size=(64, 64)):
    X, y = [], []
    class_names = ['Normal', 'Pneumonia', 'Tumor']

    for i in range(num_samples):
        img = np.random.normal(0.1, 0.05, img_size)
        class_idx = i % 3

        cy, cx = img_size[0]//2, img_size[1]//2
        y_grid, x_grid = np.ogrid[:img_size[0], :img_size[1]]

        # 폐 영역 시뮬레이션
        lung1 = ((x_grid - cx + 15)**2 + (y_grid - cy)**2) < 12**2
        lung2 = ((x_grid - cx - 15)**2 + (y_grid - cy)**2) < 12**2
        img[lung1 | lung2] += 0.2

        # 클래스별 특징 추가
        if class_idx == 1:  # Pneumonia
            mask = np.random.random(img_size) > 0.85
            img[mask & (lung1 | lung2)] += 0.4
        elif class_idx == 2:  # Tumor
            tumor_y = cy + np.random.randint(-8, 8)
            tumor_x = cx + np.random.randint(-8, 8)
            tumor_mask = ((x_grid - tumor_x)**2 + (y_grid - tumor_y)**2) < 5**2
            img[tumor_mask] += 0.6

        img = np.clip(img, 0, 1)
        img_rgb = np.stack([img, img, img], axis=-1)
        X.append(img_rgb)
        y.append(class_idx)

    return np.array(X, dtype=np.float32), np.array(y), class_names

# 4. CNN 모델 정의
def build_ct_cnn_model(input_shape=(64, 64, 3), num_classes=3):
    model = Sequential([
        Conv2D(32, (3, 3), activation='relu', input_shape=input_shape, padding='same'),
        BatchNormalization(),
        Conv2D(32, (3, 3), activation='relu', padding='same'),
        MaxPooling2D((2, 2)),
        Dropout(0.25),

        Conv2D(64, (3, 3), activation='relu', padding='same'),
        BatchNormalization(),
        Conv2D(64, (3, 3), activation='relu', padding='same'),
        MaxPooling2D((2, 2)),
        Dropout(0.25),

        Conv2D(128, (3, 3), activation='relu', padding='same'),
        BatchNormalization(),
        MaxPooling2D((2, 2)),
        Dropout(0.25),

        Flatten(),
        Dense(256, activation='relu'),
        BatchNormalization(),
        Dropout(0.5),
        Dense(128, activation='relu'),
        Dropout(0.5),
        Dense(num_classes, activation='softmax')
    ])
    return model

# 5. 데이터 준비 및 모델 훈련
X_data, y_data, class_names = create_ct_dataset(600, (64, 64))
y_cat = to_categorical(y_data, 3)

X_train, X_test, y_train, y_test = train_test_split(
    X_data, y_cat, test_size=0.3, random_state=42, stratify=y_cat
)
X_train, X_val, y_train, y_val = train_test_split(
    X_train, y_train, test_size=0.3, random_state=42, stratify=y_train
)

model = build_ct_cnn_model()
model.compile(
    optimizer=Adam(learning_rate=0.001),
    loss='categorical_crossentropy',
    metrics=['accuracy']
)

callbacks = [
    EarlyStopping(monitor='val_accuracy', patience=5, restore_best_weights=True),
    ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=3, min_lr=0.0001)
]

history = model.fit(
    X_train, y_train,
    batch_size=16,
    epochs=50,
    validation_data=(X_val, y_val),
    callbacks=callbacks,
    verbose=1
)

# 6. 예측 함수
def predict_ct_scan(model, image, class_names):
    if len(image.shape) == 2:
        image = np.stack([image, image, image], axis=-1)
    image = np.expand_dims(image, axis=0)

    predictions = model.predict(image, verbose=0)
    predicted_class = np.argmax(predictions[0])
    confidence = predictions[0][predicted_class]

    return class_names[predicted_class], confidence, predictions[0]

# 7. 모델 저장
model.save('ct_dicom_cnn_model.h5')

# 8. 사용 예제
# 저장된 모델 로딩
from tensorflow.keras.models import load_model
loaded_model = load_model('ct_dicom_cnn_model.h5')

# 실제 DICOM 파일로 예측
# dicom_image = process_dicom_file('path/to/dicom/file.dcm')
# if dicom_image is not None:
#     result, confidence, probs = predict_ct_scan(loaded_model, dicom_image, class_names)
#     print(f'예측: {result}, 확신도: {confidence:.4f}')

print("모델 훈련 및 저장 완료!")