In [None]:
# 임시 데이터 전용 코드 테스트 모드: 성능 자동 진단 포함

from torchvision import transforms
import torch
import torch.nn as nn
from torch.utils.data import DataLoader
import matplotlib.pyplot as plt
import torchvision.models as models
import albumentations as A
import pandas as pd
import numpy as np
from sklearn.metrics import f1_score, roc_auc_score
from sklearn.model_selection import train_test_split
from lung_nodule_dataset import LungNoduleSliceDataset

# 디바이스 설정
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)

# 하이퍼파라미터
batch_size = 16
learning_rate = 1e-4
num_epochs = 3

# 커스텀 모델
class LungCancerClassifier(nn.Module):
    def __init__(self):
        super().__init__()
        self.base_model = models.resnet18(pretrained=True)
        self.base_model.conv1 = nn.Conv2d(1, 64, kernel_size=7, stride=2, padding=3, bias=False)
        num_features = self.base_model.fc.in_features
        self.base_model.fc = nn.Sequential(nn.Linear(num_features, 1), nn.Sigmoid())

    def forward(self, x):
        return self.base_model(x)

# 데이터 로드 및 라벨 확인
csv_path = "/data2/lijin/lidc-prep/nodule_matched.csv"
df = pd.read_csv(csv_path)
df['binary_label'] = df['malignancy'].astype(int)
print("✅ 전체 라벨 분포:")
print(df['binary_label'].value_counts())

# Stratified split
train_df, test_df = train_test_split(df, test_size=0.2, stratify=df['binary_label'], random_state=42)
print("✅ 분할 후 라벨 분포:")
print("Train:", train_df['binary_label'].value_counts())
print("Test:", test_df['binary_label'].value_counts())

# 증강
a_train = A.Compose([A.HorizontalFlip(p=0.7), A.RandomBrightnessContrast(p=0.5), A.Rotate(limit=15, p=0.5)])
a_test = A.Compose([])

# Dataset
train_dataset = LungNoduleSliceDataset(df=train_df, image_root="/data2/lijin/lidc-prep/LIDC-IDRI-slices", transform=a_train)
test_dataset = LungNoduleSliceDataset(df=test_df, image_root="/data2/lijin/lidc-prep/LIDC-IDRI-slices", transform=a_test)

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

# 모델 세팅
model = LungCancerClassifier().to(device)
criterion = nn.BCELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

# 훈련 + 평가 루프
for epoch in range(num_epochs):
    model.train()
    total_loss = 0
    correct = 0
    total = 0

    for images, labels in train_loader:
        images = images.to(device).float()
        labels = labels.to(device).float().unsqueeze(1)

        outputs = model(images)
        loss = criterion(outputs, labels)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        predicted = (outputs > 0.5).float()
        correct += (predicted == labels).sum().item()
        total += labels.size(0)
        total_loss += loss.item()

    print(f"[Epoch {epoch+1}] Train Loss: {total_loss / len(train_loader):.4f}, Accuracy: {(correct/total)*100:.2f}%")

    # 평가
    model.eval()
    all_preds, all_probs, all_labels = [], [], []
    test_loss = 0

    with torch.no_grad():
        for images, labels in test_loader:
            images = images.to(device).float()
            labels = labels.to(device).float().unsqueeze(1)

            outputs = model(images)
            probs = outputs.cpu().numpy()
            preds = (probs > 0.5).astype(int)

            all_preds.extend(preds.flatten())
            all_probs.extend(probs.flatten())
            all_labels.extend(labels.cpu().numpy().flatten())

            loss = criterion(outputs, labels)
            test_loss += loss.item()

    # 성능 지표 출력
    try:
        f1 = f1_score(all_labels, all_preds)
        auc = roc_auc_score(all_labels, all_probs)
        print(f"✅ [Epoch {epoch+1}] Test Loss: {test_loss / len(test_loader):.4f}, F1: {f1:.4f}, AUC: {auc:.4f}")
    except ValueError as e:
        print("⚠️  지표 계산 실패:", str(e))

    print("📊 라벨 분포:", np.unique(all_labels, return_counts=True))
    print("📈 예측 분포:", np.unique(all_preds, return_counts=True))

In [None]:
# 모델 실험 스크립트 (EfficientNet-B0 / DenseNet121 / ResNet34 / ResNet50)

import torch
import torch.nn as nn
from torch.utils.data import DataLoader
import torchvision.models as models
import albumentations as A
import pandas as pd
import numpy as np
from sklearn.metrics import f1_score, roc_auc_score
from sklearn.model_selection import train_test_split
from lung_nodule_dataset import LungNoduleSliceDataset

# 하이퍼파라미터 설정
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
batch_size = 16
learning_rate = 1e-4
num_epochs = 3

# 모델 로딩 함수
def get_model(name):
    if name == 'resnet34':
        base = models.resnet34(weights=models.ResNet34_Weights.DEFAULT)
        base.conv1 = nn.Conv2d(1, 64, kernel_size=7, stride=2, padding=3, bias=False)
        num_ftrs = base.fc.in_features
        base.fc = nn.Sequential(nn.Linear(num_ftrs, 1), nn.Sigmoid())

    elif name == 'resnet50':
        base = models.resnet50(weights=models.ResNet50_Weights.DEFAULT)
        base.conv1 = nn.Conv2d(1, 64, kernel_size=7, stride=2, padding=3, bias=False)
        num_ftrs = base.fc.in_features
        base.fc = nn.Sequential(nn.Linear(num_ftrs, 1), nn.Sigmoid())

    elif name == 'densenet121':
        base = models.densenet121(weights=models.DenseNet121_Weights.DEFAULT)
        base.features.conv0 = nn.Conv2d(1, 64, kernel_size=7, stride=2, padding=3, bias=False)
        num_ftrs = base.classifier.in_features
        base.classifier = nn.Sequential(nn.Linear(num_ftrs, 1), nn.Sigmoid())

    elif name == 'efficientnet_b0':
        base = models.efficientnet_b0(weights=models.EfficientNet_B0_Weights.DEFAULT)
        base.features[0][0] = nn.Conv2d(1, 32, kernel_size=3, stride=2, padding=1, bias=False)
        num_ftrs = base.classifier[1].in_features
        base.classifier = nn.Sequential(nn.Linear(num_ftrs, 1), nn.Sigmoid())

    else:
        raise ValueError(f"Unsupported model: {name}")
    
    return base

# 데이터 로딩 및 전처리
csv_path = "/data2/lijin/lidc-prep/nodule_matched.csv"
df = pd.read_csv(csv_path)
df['binary_label'] = df['malignancy'].astype(int)
train_df, test_df = train_test_split(df, test_size=0.2, stratify=df['binary_label'], random_state=42)

a_train = A.Compose([A.HorizontalFlip(p=0.7), A.RandomBrightnessContrast(p=0.5), A.Rotate(limit=15, p=0.5)])
a_test = A.Compose([])

train_dataset = LungNoduleSliceDataset(df=train_df, image_root="/data2/lijin/lidc-prep/LIDC-IDRI-slices", transform=a_train)
test_dataset = LungNoduleSliceDataset(df=test_df, image_root="/data2/lijin/lidc-prep/LIDC-IDRI-slices", transform=a_test)

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

# 성능 기록
df_results = []

# 모델별 실험 반복
model_names = ['resnet34', 'resnet50', 'densenet121', 'efficientnet_b0']

for model_name in model_names:
    print(f"\n🚀 시작: {model_name}")
    model = get_model(model_name).to(device)
    criterion = nn.BCELoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

    for epoch in range(num_epochs):
        model.train()
        total_loss = 0
        correct = 0
        total = 0

        for images, labels in train_loader:
            images = images.to(device).float()
            labels = labels.to(device).float().unsqueeze(1)

            outputs = model(images)
            loss = criterion(outputs, labels)

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            predicted = (outputs > 0.5).float()
            correct += (predicted == labels).sum().item()
            total += labels.size(0)
            total_loss += loss.item()

        print(f"Epoch {epoch+1}: Train Loss: {total_loss / len(train_loader):.4f}, Accuracy: {(correct/total)*100:.2f}%")

    # 테스트 평가
    model.eval()
    all_preds, all_probs, all_labels = [], [], []
    test_loss = 0

    with torch.no_grad():
        for images, labels in test_loader:
            images = images.to(device).float()
            labels = labels.to(device).float().unsqueeze(1)
            outputs = model(images)
            probs = outputs.cpu().numpy()
            preds = (probs > 0.5).astype(int)
            all_preds.extend(preds.flatten())
            all_probs.extend(probs.flatten())
            all_labels.extend(labels.cpu().numpy().flatten())
            test_loss += criterion(outputs, labels).item()

    f1 = f1_score(all_labels, all_preds)
    auc = roc_auc_score(all_labels, all_probs)
    avg_test_loss = test_loss / len(test_loader)
    print(f"✅ {model_name}: Test Loss: {avg_test_loss:.4f}, F1: {f1:.4f}, AUC: {auc:.4f}")

    df_results.append({"model": model_name, "f1": f1, "auc": auc, "test_loss": avg_test_loss})

# 결과 출력
print("\n📊 모델별 결과 비교:")
df_results = pd.DataFrame(df_results)
print(df_results.sort_values(by="f1", ascending=False).reset_index(drop=True))