In [None]:
import os
import numpy as np
from glob import glob
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from sklearn.metrics import roc_auc_score, confusion_matrix, ConfusionMatrixDisplay
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from torchvision import models, transforms
import torch.nn.functional as F
from tqdm import tqdm
from collections import defaultdict
import gc

# 하이퍼파라미터 설정
device = torch.device("cuda:1" if torch.cuda.is_available() else "cpu")
print(device)

slice_root = "/data1/lidc-idri/slices"
batch_size = 16
num_epoch = 1
learning_rate = 1e-4

# 레이블 추출
def labels_filename(fname):
    try:
        score = int(fname.split("_")[-1].replace(".npy", ""))
        return None  if score == 3 else int(score >= 4)
    
    except:
        return None
    
# 데이터셋 전처리
class LIDCDataset(Dataset):
    def __init__(self, file_paths, labels, transform=None):
        self.file_paths = file_paths
        self.labels = labels
        self.transform = transform

    def __len__(self):
        return len(self.file_paths)
    
    def __getitem__(self, index):
        file_path = self.file_paths[index]
        label = self.labels[index]

        img = np.load(file_path).astype(np.float32)
        img = np.clip(img, -1000, 400)
        img = (img + 1000) / 1400.0
        img = np.expand_dims(img, axis=0)
        img_tensor = torch.tensor(img)

        if self.transform:
            img_tensor = self.transform(img_tensor)

        return img_tensor, torch.tensor(label).float()


# 데이터 증강
augmentation_configs = {
    'baseline': transforms.Compose([
        transforms.ToPILImage(),
        transforms.CenterCrop(180),
        transforms.ToTensor()
    ]),

    'flip_rotate': transforms.Compose([
        transforms.ToPILImage(),
        transforms.CenterCrop(180),
        transforms.RandomHorizontalFlip(),
        transforms.RandomRotation(15),
        transforms.ToTensor()
    ]),

    'blur': transforms.Compose([
        transforms.ToPILImage(),
        transforms.CenterCrop(180),
        transforms.GaussianBlur(kernel_size=3, sigma=(0.1, 2.0)),
        transforms.ToTensor()
    ]),

    'total': transforms.Compose([
        transforms.ToPILImage(),
        transforms.CenterCrop(180),
        transforms.RandomHorizontalFlip(),
        transforms.RandomRotation(15),
        transforms.GaussianBlur(kernel_size=3, sigma=(0.1, 2.0)),
        transforms.ToTensor()
    ])
}

# 데이터 불러오기
def get_model(name):
    if name == "resnet18":
        model = models.resnet18(pretrained=True)
        model.conv1 = nn.Conv2d(1, 64, kernel_size=7, stride=2, padding=3, bias=False)
        model.fc = nn.Linear(model.fc.in_features, 1)

    elif name == "resnet34":
        model = models.resnet34(pretrained=True)
        model.conv1 = nn.Conv2d(1, 64, kernel_size=7, stride=2, padding=3, bias=False)
        model.fc = nn.Linear(model.fc.in_features, 1)

    elif name == "densenet121":
        model = models.densenet121(pretrained=True)
        model.features.conv0 = nn.Conv2d(1, 64, kernel_size=7, stride=2, padding=3, bias=False)
        model.classifier = nn.Linear(model.classifier.in_features, 1)

    elif name == "efficientnet_b0":
        model = models.efficientnet_b0(pretrained=True)
        model.features[0][0] = nn.Conv2d(1, 32, kernel_size=3, stride=2, padding=1, bias=False)
        model.classifier[1] = nn.Linear(model.classifier[1].in_features, 1)
    else:
        raise ValueError("Unknown model name")
    return model.to(device)


# 데이터 로더
all_files = glob(os.path.join(slice_root, "LIDC-IDRI-*", "*.npy"))

file_label_pairs = [(f, labels_filename(f)) for f in all_files]
file_label_pairs = [(f, l) for f, l in file_label_pairs if l is not None]
files, labels = zip(*file_label_pairs)

train_files, temp_files, train_labels, temp_labels = train_test_split(files, labels, test_size=0.3, random_state=42)
val_files, test_files, val_labels, test_labels = train_test_split(temp_files, temp_labels, test_size=0.5, random_state=42)


# 모델 정의
results = defaultdict(dict)
model_names = ["resnet18", "resnet34", "densenet121", "efficientnet_b0"]

for model_name in model_names:
    for aug_name, transform in augmentation_configs.items():
        print(f"\n Running: {model_name} + {aug_name}")

        train_dataset = LIDCDataset(train_files, train_labels, transform)
        val_dataset = LIDCDataset(val_files, val_labels, transform)
        test_dataset = LIDCDataset(test_files, test_labels, transform)

        train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
        val_loader = DataLoader(val_dataset, batch_size=batch_size)
        test_loader = DataLoader(test_dataset, batch_size=batch_size)

        model = get_model(model_name)

# loss, optimizer 설정
        criterion = nn.BCEWithLogitsLoss()
        optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)


# 반복문
        best_val_acc = 0.0

        # --- 저장 경로 및 변수 초기화 ---
        save_dir = os.path.join(os.path.dirname(os.getcwd()), "pth")
        os.makedirs(save_dir, exist_ok=True)
        best_val_acc = 0.0

        for epoch in range(num_epoch):
            model.train()

            correct = 0
            total = 0
            epoch_loss = 0

            for images, labels in train_loader:
                images = images.to(device)
                labels = labels.unsqueeze(1).to(device)

                outputs = model(images)
                loss = criterion(outputs, labels)

                optimizer.zero_grad()
                loss.backward()
                optimizer.step()

                predicted = (torch.sigmoid(outputs) > 0.5).long()
                correct += (predicted == labels.long()).sum().item()
                total += labels.size(0)

            train_acc = correct / total
            print(f"[{model_name} + {aug_name}] Epoch: {epoch+1}/{num_epoch} Train Acc: {train_acc * 100:.4f}%")

            model.eval()

            val_correct = 0
            val_total = 0

            with torch.no_grad():
                for images, labels in val_loader:
                    images = images.to(device)
                    labels = labels.to(device)

                    outputs = model(images)

                    predicted = (torch.sigmoid(outputs) > 0.5).squeeze().long()
                    val_correct += (predicted == labels.long()).sum().item()
                    val_total += labels.size(0)

            val_acc = val_correct / val_total
            print(f"[{model_name} + {aug_name}] Epoch {epoch+1}/{num_epoch} Val Acc {val_acc * 100:.4f}%")

            if val_acc > best_val_acc:
                best_val_acc = val_acc
                torch.save(model.state_dict(), os.path.join(save_dir, f"best_aug_{model_name}_{aug_name}.pth"))


        model.load_state_dict(torch.load(os.path.join(save_dir, f"best_aug_{model_name}_{aug_name}.pth")))
        model.eval()

        y_true, y_pred, y_probs = [], [], []

        with torch.no_grad():
            for images, labels in test_loader:
                images, labels = images.to(device), labels.to(device)

                outputs = model(images)

                probs = torch.sigmoid(outputs).squeeze()
                preds = (probs > 0.5).long()

                y_true.extend(labels.cpu().numpy())
                y_pred.extend(preds.cpu().numpy())
                y_probs.extend(probs.cpu().numpy())

        acc = (np.array(y_true) == np.array(y_pred)).mean()
        auc = roc_auc_score(y_true, y_probs)
        cm = confusion_matrix(y_true, y_pred)
        results[model_name][aug_name] = {"acc": acc, "auc": auc, "cm": cm}
        print(f"✅ Test Acc: {acc:.4f}, AUC: {auc:.4f}")


In [None]:
import os
import numpy as np
from glob import glob
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from sklearn.metrics import roc_auc_score, confusion_matrix, ConfusionMatrixDisplay
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from torchvision import models, transforms
import torch.nn.functional as F
from tqdm import tqdm
import gc


# --- 설정 ---
SLICE_ROOT = "/data1/lidc-idri/slices"
BATCH_SIZE = 16
NUM_EPOCHS = 100
LR = 1e-4
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# --- 라벨 추출 함수 ---
def extract_label_from_filename(filename):
    try:
        score = int(filename.split("_")[-1].replace(".npy", ""))
        if score == 3:
            return None
        return 1 if score >= 4 else 0
    except:
        return None

# --- 파일 리스트 구성 ---
all_files = glob(os.path.join(SLICE_ROOT, "LIDC-IDRI-*", "*.npy"))
file_label_pairs = [(f, extract_label_from_filename(f)) for f in all_files]
file_label_pairs = [(f, l) for f, l in file_label_pairs if l is not None]
files, labels = zip(*file_label_pairs)

# --- 3-way split (70% train / 15% val / 15% test) ---
train_files, temp_files, train_labels, temp_labels = train_test_split(
    files, labels, test_size=0.3, random_state=42
)
val_files, test_files, val_labels, test_labels = train_test_split(
    temp_files, temp_labels, test_size=0.5, random_state=42
)

# --- Transform 정의 ---
train_transform = transforms.Compose([
    transforms.ToPILImage(),
    transforms.CenterCrop(180),
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(15),
    transforms.GaussianBlur(kernel_size=3, sigma=(0.1, 2.0)),
    transforms.ToTensor()
])

val_transform = transforms.Compose([
    transforms.ToPILImage(),
    transforms.CenterCrop(180),
    transforms.ToTensor()
])

# --- Dataset 정의 ---
class LIDCDataset(Dataset):
    def __init__(self, file_paths, labels, transform=None):
        self.file_paths = file_paths
        self.labels = labels
        self.transform = transform

    def __len__(self):
        return len(self.file_paths)

    def __getitem__(self, index):
        file_path = self.file_paths[index]
        label = self.labels[index]

        img = np.load(file_path)
        img = np.clip(img, -1000, 400)
        img = ((img + 1000) / 1400.0 * 255).astype(np.uint8)  # uint8로 변환
        img = img.squeeze()  # shape: (H, W)

        if self.transform:
            img = self.transform(img)  # img: np.uint8 (H, W) → PIL → tensor(C, H, W)
        else:
            img = torch.tensor(img / 255.0).unsqueeze(0).float()

        return img, torch.tensor(label).float()

# --- DataLoader 정의 ---
train_dataset = LIDCDataset(train_files, train_labels, transform=train_transform)
val_dataset = LIDCDataset(val_files, val_labels, transform=val_transform)
test_dataset = LIDCDataset(test_files, test_labels, transform=val_transform)

train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE)
test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE)

# --- 모델 정의 ---
model = models.resnet18(pretrained=True)
model.conv1 = nn.Conv2d(1, 64, kernel_size=7, stride=2, padding=3, bias=False)
model.fc = nn.Linear(model.fc.in_features, 1)
model = model.to(DEVICE)

criterion = nn.BCEWithLogitsLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=LR)

# --- 저장 경로 및 변수 초기화 ---
save_dir = os.path.join(os.path.dirname(os.getcwd()), "pth")
os.makedirs(save_dir, exist_ok=True)
best_val_acc = 0.0

# --- 학습 루프 ---
for epoch in range(NUM_EPOCHS):
    model.train()

    epoch_loss = 0
    correct = 0
    total = 0

    for images, labels in tqdm(train_loader, desc=f"Epoch {epoch+1}/{NUM_EPOCHS}"):
        images = images.to(DEVICE)
        labels = labels.unsqueeze(1).to(DEVICE)

        outputs = model(images)
        loss = criterion(outputs, labels)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        predicted = torch.sigmoid(outputs) >= 0.5
        total += labels.size(0)
        correct += (predicted == labels.long()).sum().item()

        epoch_loss += loss.item()
        train_acc = correct / total

    print(f"[Epoch {epoch+1}] Train Loss: {epoch_loss/len(train_loader):.4f} Accuracy : {train_acc * 100:.2f}%")

    gc.collect()
    torch.cuda.empty_cache()

    # --- Validation ---
    model.eval()
    correct = total = 0
    y_true = []
    y_pred = []

    with torch.no_grad():
        for images, labels in val_loader:
            images = images.to(DEVICE)
            labels = labels.to(DEVICE)

            outputs = model(images)
            preds = (torch.sigmoid(outputs) > 0.5).squeeze().long()

            y_true.extend(labels.cpu().numpy())
            y_pred.extend(preds.cpu().numpy())

            correct += (preds == labels.long()).sum().item()
            total += labels.size(0)

    val_acc = correct / total
    print(f"Validation Accuracy: {val_acc * 100:.2f}%")

    if val_acc > best_val_acc:
        best_val_acc = val_acc
        torch.save(model.state_dict(), os.path.join(save_dir, "model_aug_resnet18.pth"))
        print("✅ Best model saved!")


# --- Test (최종 평가) ---
print("\n\U0001F4CA Test Set Evaluation (Best Model \uAE30\uc900):")
model.load_state_dict(torch.load(os.path.join(save_dir, "model_aug_resnet18.pth")))
model.eval()

correct = total = 0
y_true = []
y_pred = []
y_probs = []

with torch.no_grad():
    for images, labels in test_loader:
        images = images.to(DEVICE)
        labels = labels.unsqueeze(1).float().to(DEVICE)

        outputs = model(images)
        probs = torch.sigmoid(outputs).squeeze()
        preds = (probs > 0.5).long()

        y_true.extend(labels.cpu().numpy())
        y_pred.extend(preds.cpu().numpy())
        y_probs.extend(probs.cpu().numpy())

        correct += (preds == labels.long()).sum().item()
        total += labels.size(0)

test_acc = correct / total
print(f"\u2705 Test Accuracy: {test_acc * 100:.2f}%")
print(classification_report(y_true, y_pred, digits=4))

# AUC 출력
try:
    auc_score = roc_auc_score(y_true, y_probs)
    print(f"AUC: {auc_score:.4f}")
except ValueError:
    print("AUC \uacc4\uc0b0 \uc2e4\ud328: \uc591/\uc74c \ud074\ub798\uc2a4\uac00 \ubaa8\ub450 \uc788\uc5b4\uc57c \ud568.")

# Confusion Matrix 출력
cm = confusion_matrix(y_true, y_pred)
print("Confusion Matrix:")
print(cm)


Epoch 1/100: 100%|██████████| 234/234 [00:28<00:00,  8.13it/s]


[Epoch 1] Train Loss: 0.6589 Accuracy : 62.67%
Validation Accuracy: 67.12%
✅ Best model saved!


Epoch 2/100: 100%|██████████| 234/234 [00:29<00:00,  7.81it/s]


[Epoch 2] Train Loss: 0.6071 Accuracy : 67.87%
Validation Accuracy: 66.00%


Epoch 3/100: 100%|██████████| 234/234 [00:25<00:00,  9.03it/s]


[Epoch 3] Train Loss: 0.5733 Accuracy : 69.53%
Validation Accuracy: 68.00%
✅ Best model saved!


Epoch 4/100: 100%|██████████| 234/234 [00:23<00:00, 10.03it/s]


[Epoch 4] Train Loss: 0.5222 Accuracy : 73.85%
Validation Accuracy: 70.25%
✅ Best model saved!


Epoch 5/100: 100%|██████████| 234/234 [00:29<00:00,  8.00it/s]


[Epoch 5] Train Loss: 0.4889 Accuracy : 75.32%
Validation Accuracy: 72.75%
✅ Best model saved!


Epoch 6/100: 100%|██████████| 234/234 [00:22<00:00, 10.47it/s]


[Epoch 6] Train Loss: 0.4446 Accuracy : 79.13%
Validation Accuracy: 71.88%


Epoch 7/100: 100%|██████████| 234/234 [00:28<00:00,  8.11it/s]


[Epoch 7] Train Loss: 0.4120 Accuracy : 80.44%
Validation Accuracy: 78.12%
✅ Best model saved!


Epoch 8/100: 100%|██████████| 234/234 [00:26<00:00,  8.79it/s]


[Epoch 8] Train Loss: 0.3731 Accuracy : 83.52%
Validation Accuracy: 76.50%


Epoch 9/100: 100%|██████████| 234/234 [00:24<00:00,  9.49it/s]


[Epoch 9] Train Loss: 0.3327 Accuracy : 85.40%
Validation Accuracy: 79.75%
✅ Best model saved!


Epoch 10/100: 100%|██████████| 234/234 [00:27<00:00,  8.44it/s]


[Epoch 10] Train Loss: 0.2981 Accuracy : 87.92%
Validation Accuracy: 78.25%


Epoch 11/100: 100%|██████████| 234/234 [00:26<00:00,  8.67it/s]


[Epoch 11] Train Loss: 0.2715 Accuracy : 88.80%
Validation Accuracy: 79.50%


Epoch 12/100: 100%|██████████| 234/234 [00:28<00:00,  8.17it/s]


[Epoch 12] Train Loss: 0.2412 Accuracy : 90.09%
Validation Accuracy: 83.50%
✅ Best model saved!


Epoch 13/100: 100%|██████████| 234/234 [00:23<00:00,  9.78it/s]


[Epoch 13] Train Loss: 0.2415 Accuracy : 89.90%
Validation Accuracy: 80.75%


Epoch 14/100: 100%|██████████| 234/234 [00:34<00:00,  6.84it/s]


[Epoch 14] Train Loss: 0.2148 Accuracy : 91.45%
Validation Accuracy: 83.25%


Epoch 15/100: 100%|██████████| 234/234 [00:30<00:00,  7.78it/s]


[Epoch 15] Train Loss: 0.1942 Accuracy : 91.99%
Validation Accuracy: 83.75%
✅ Best model saved!


Epoch 16/100: 100%|██████████| 234/234 [00:28<00:00,  8.10it/s]


[Epoch 16] Train Loss: 0.1962 Accuracy : 92.26%
Validation Accuracy: 81.50%


Epoch 17/100: 100%|██████████| 234/234 [00:27<00:00,  8.62it/s]


[Epoch 17] Train Loss: 0.1528 Accuracy : 94.00%
Validation Accuracy: 82.00%


Epoch 18/100: 100%|██████████| 234/234 [00:26<00:00,  8.68it/s]


[Epoch 18] Train Loss: 0.1568 Accuracy : 93.54%
Validation Accuracy: 82.12%


Epoch 19/100: 100%|██████████| 234/234 [00:30<00:00,  7.62it/s]


[Epoch 19] Train Loss: 0.1591 Accuracy : 93.54%
Validation Accuracy: 84.25%
✅ Best model saved!


Epoch 20/100: 100%|██████████| 234/234 [00:25<00:00,  9.05it/s]


[Epoch 20] Train Loss: 0.1494 Accuracy : 95.02%
Validation Accuracy: 83.12%


Epoch 21/100: 100%|██████████| 234/234 [00:27<00:00,  8.65it/s]


[Epoch 21] Train Loss: 0.1296 Accuracy : 95.18%
Validation Accuracy: 83.38%


Epoch 22/100: 100%|██████████| 234/234 [00:27<00:00,  8.66it/s]


[Epoch 22] Train Loss: 0.1404 Accuracy : 94.56%
Validation Accuracy: 81.88%


Epoch 23/100: 100%|██████████| 234/234 [00:26<00:00,  8.93it/s]


[Epoch 23] Train Loss: 0.1091 Accuracy : 95.90%
Validation Accuracy: 83.62%


Epoch 24/100: 100%|██████████| 234/234 [00:27<00:00,  8.44it/s]


[Epoch 24] Train Loss: 0.1203 Accuracy : 95.36%
Validation Accuracy: 84.62%
✅ Best model saved!


Epoch 25/100: 100%|██████████| 234/234 [00:23<00:00, 10.07it/s]


[Epoch 25] Train Loss: 0.1293 Accuracy : 95.15%
Validation Accuracy: 83.25%


Epoch 26/100: 100%|██████████| 234/234 [00:26<00:00,  8.84it/s]


[Epoch 26] Train Loss: 0.1109 Accuracy : 96.11%
Validation Accuracy: 83.88%


Epoch 27/100: 100%|██████████| 234/234 [00:24<00:00,  9.39it/s]


[Epoch 27] Train Loss: 0.1018 Accuracy : 95.95%
Validation Accuracy: 85.50%
✅ Best model saved!


Epoch 28/100: 100%|██████████| 234/234 [00:32<00:00,  7.31it/s]


[Epoch 28] Train Loss: 0.1040 Accuracy : 95.95%
Validation Accuracy: 85.00%


Epoch 29/100: 100%|██████████| 234/234 [00:28<00:00,  8.09it/s]


[Epoch 29] Train Loss: 0.0895 Accuracy : 96.92%
Validation Accuracy: 86.25%
✅ Best model saved!


Epoch 30/100: 100%|██████████| 234/234 [00:28<00:00,  8.25it/s]


[Epoch 30] Train Loss: 0.0925 Accuracy : 96.46%
Validation Accuracy: 85.38%


Epoch 31/100: 100%|██████████| 234/234 [00:25<00:00,  9.27it/s]


[Epoch 31] Train Loss: 0.1033 Accuracy : 96.20%
Validation Accuracy: 86.50%
✅ Best model saved!


Epoch 32/100: 100%|██████████| 234/234 [00:26<00:00,  8.96it/s]


[Epoch 32] Train Loss: 0.0939 Accuracy : 96.60%
Validation Accuracy: 87.38%
✅ Best model saved!


Epoch 33/100: 100%|██████████| 234/234 [00:27<00:00,  8.61it/s]


[Epoch 33] Train Loss: 0.0725 Accuracy : 97.43%
Validation Accuracy: 83.62%


Epoch 34/100: 100%|██████████| 234/234 [00:25<00:00,  9.10it/s]


[Epoch 34] Train Loss: 0.0953 Accuracy : 96.49%
Validation Accuracy: 86.88%


Epoch 35/100: 100%|██████████| 234/234 [00:25<00:00,  9.06it/s]


[Epoch 35] Train Loss: 0.0795 Accuracy : 97.16%
Validation Accuracy: 85.00%


Epoch 36/100: 100%|██████████| 234/234 [00:42<00:00,  5.56it/s]


[Epoch 36] Train Loss: 0.0720 Accuracy : 97.32%
Validation Accuracy: 87.62%
✅ Best model saved!


Epoch 37/100: 100%|██████████| 234/234 [00:46<00:00,  5.04it/s]


[Epoch 37] Train Loss: 0.0639 Accuracy : 97.72%
Validation Accuracy: 87.88%
✅ Best model saved!


Epoch 38/100: 100%|██████████| 234/234 [00:34<00:00,  6.79it/s]


[Epoch 38] Train Loss: 0.0612 Accuracy : 97.56%
Validation Accuracy: 86.75%


Epoch 39/100: 100%|██████████| 234/234 [00:41<00:00,  5.69it/s]


[Epoch 39] Train Loss: 0.0783 Accuracy : 97.27%
Validation Accuracy: 83.62%


Epoch 40/100: 100%|██████████| 234/234 [00:39<00:00,  5.96it/s]


[Epoch 40] Train Loss: 0.0889 Accuracy : 96.54%
Validation Accuracy: 86.25%


Epoch 41/100: 100%|██████████| 234/234 [00:32<00:00,  7.13it/s]


[Epoch 41] Train Loss: 0.0562 Accuracy : 98.07%
Validation Accuracy: 84.00%


Epoch 42/100: 100%|██████████| 234/234 [00:34<00:00,  6.86it/s]


[Epoch 42] Train Loss: 0.0738 Accuracy : 97.19%
Validation Accuracy: 85.75%


Epoch 43/100: 100%|██████████| 234/234 [00:25<00:00,  9.03it/s]


[Epoch 43] Train Loss: 0.0679 Accuracy : 97.56%
Validation Accuracy: 85.25%


Epoch 44/100: 100%|██████████| 234/234 [00:27<00:00,  8.39it/s]


[Epoch 44] Train Loss: 0.0732 Accuracy : 97.51%
Validation Accuracy: 86.88%


Epoch 45/100: 100%|██████████| 234/234 [00:26<00:00,  8.86it/s]


[Epoch 45] Train Loss: 0.0739 Accuracy : 97.59%
Validation Accuracy: 85.62%


Epoch 46/100: 100%|██████████| 234/234 [00:27<00:00,  8.37it/s]


[Epoch 46] Train Loss: 0.0615 Accuracy : 97.99%
Validation Accuracy: 85.75%


Epoch 47/100: 100%|██████████| 234/234 [00:27<00:00,  8.63it/s]


[Epoch 47] Train Loss: 0.0639 Accuracy : 97.43%
Validation Accuracy: 87.88%


Epoch 48/100: 100%|██████████| 234/234 [00:26<00:00,  8.67it/s]


[Epoch 48] Train Loss: 0.0570 Accuracy : 97.94%
Validation Accuracy: 86.00%


Epoch 49/100: 100%|██████████| 234/234 [00:24<00:00,  9.44it/s]


[Epoch 49] Train Loss: 0.0701 Accuracy : 97.56%
Validation Accuracy: 86.12%


Epoch 50/100: 100%|██████████| 234/234 [00:26<00:00,  8.69it/s]


[Epoch 50] Train Loss: 0.0604 Accuracy : 98.10%
Validation Accuracy: 79.00%


Epoch 51/100: 100%|██████████| 234/234 [00:27<00:00,  8.47it/s]


[Epoch 51] Train Loss: 0.0517 Accuracy : 98.23%
Validation Accuracy: 85.88%


Epoch 52/100: 100%|██████████| 234/234 [00:30<00:00,  7.69it/s]


[Epoch 52] Train Loss: 0.0412 Accuracy : 98.61%
Validation Accuracy: 87.62%


Epoch 53/100: 100%|██████████| 234/234 [00:27<00:00,  8.58it/s]


[Epoch 53] Train Loss: 0.0519 Accuracy : 97.99%
Validation Accuracy: 87.50%


Epoch 54/100: 100%|██████████| 234/234 [00:30<00:00,  7.72it/s]


[Epoch 54] Train Loss: 0.0708 Accuracy : 97.53%
Validation Accuracy: 83.62%


Epoch 55/100: 100%|██████████| 234/234 [00:26<00:00,  8.96it/s]


[Epoch 55] Train Loss: 0.0564 Accuracy : 97.70%
Validation Accuracy: 84.50%


Epoch 56/100: 100%|██████████| 234/234 [00:25<00:00,  9.05it/s]


[Epoch 56] Train Loss: 0.0534 Accuracy : 98.12%
Validation Accuracy: 86.25%


Epoch 57/100: 100%|██████████| 234/234 [00:28<00:00,  8.22it/s]


[Epoch 57] Train Loss: 0.0426 Accuracy : 98.58%
Validation Accuracy: 88.00%
✅ Best model saved!


Epoch 58/100: 100%|██████████| 234/234 [00:28<00:00,  8.15it/s]


[Epoch 58] Train Loss: 0.0463 Accuracy : 98.37%
Validation Accuracy: 86.25%


Epoch 59/100: 100%|██████████| 234/234 [00:28<00:00,  8.22it/s]


[Epoch 59] Train Loss: 0.0575 Accuracy : 97.99%
Validation Accuracy: 87.12%


Epoch 60/100: 100%|██████████| 234/234 [00:27<00:00,  8.39it/s]


[Epoch 60] Train Loss: 0.0597 Accuracy : 97.96%
Validation Accuracy: 87.00%


Epoch 61/100: 100%|██████████| 234/234 [00:27<00:00,  8.44it/s]


[Epoch 61] Train Loss: 0.0468 Accuracy : 98.31%
Validation Accuracy: 87.62%


Epoch 62/100: 100%|██████████| 234/234 [00:26<00:00,  8.77it/s]


[Epoch 62] Train Loss: 0.0512 Accuracy : 98.04%
Validation Accuracy: 87.00%


Epoch 63/100: 100%|██████████| 234/234 [00:27<00:00,  8.38it/s]


[Epoch 63] Train Loss: 0.0423 Accuracy : 98.58%
Validation Accuracy: 88.12%
✅ Best model saved!


Epoch 64/100: 100%|██████████| 234/234 [00:26<00:00,  8.89it/s]


[Epoch 64] Train Loss: 0.0578 Accuracy : 98.10%
Validation Accuracy: 87.75%


Epoch 65/100: 100%|██████████| 234/234 [00:29<00:00,  8.06it/s]


[Epoch 65] Train Loss: 0.0657 Accuracy : 97.35%
Validation Accuracy: 87.75%


Epoch 66/100: 100%|██████████| 234/234 [00:28<00:00,  8.35it/s]


[Epoch 66] Train Loss: 0.0457 Accuracy : 98.47%
Validation Accuracy: 85.38%


Epoch 67/100: 100%|██████████| 234/234 [00:26<00:00,  8.67it/s]


[Epoch 67] Train Loss: 0.0365 Accuracy : 98.69%
Validation Accuracy: 87.38%


Epoch 68/100: 100%|██████████| 234/234 [00:27<00:00,  8.44it/s]


[Epoch 68] Train Loss: 0.0357 Accuracy : 98.63%
Validation Accuracy: 86.88%


Epoch 69/100: 100%|██████████| 234/234 [00:25<00:00,  9.27it/s]


[Epoch 69] Train Loss: 0.0558 Accuracy : 98.18%
Validation Accuracy: 87.25%


Epoch 70/100: 100%|██████████| 234/234 [00:29<00:00,  7.90it/s]


[Epoch 70] Train Loss: 0.0493 Accuracy : 98.12%
Validation Accuracy: 86.75%


Epoch 71/100: 100%|██████████| 234/234 [00:26<00:00,  8.93it/s]


[Epoch 71] Train Loss: 0.0406 Accuracy : 98.45%
Validation Accuracy: 86.88%


Epoch 72/100: 100%|██████████| 234/234 [00:23<00:00, 10.01it/s]


[Epoch 72] Train Loss: 0.0547 Accuracy : 98.10%
Validation Accuracy: 86.62%


Epoch 73/100: 100%|██████████| 234/234 [00:29<00:00,  7.97it/s]


[Epoch 73] Train Loss: 0.0451 Accuracy : 98.58%
Validation Accuracy: 87.75%


Epoch 74/100: 100%|██████████| 234/234 [00:27<00:00,  8.39it/s]


[Epoch 74] Train Loss: 0.0291 Accuracy : 98.77%
Validation Accuracy: 87.62%


Epoch 75/100: 100%|██████████| 234/234 [00:27<00:00,  8.41it/s]


[Epoch 75] Train Loss: 0.0227 Accuracy : 99.22%
Validation Accuracy: 88.00%


Epoch 76/100: 100%|██████████| 234/234 [00:21<00:00, 10.85it/s]


[Epoch 76] Train Loss: 0.0393 Accuracy : 98.61%
Validation Accuracy: 86.62%


Epoch 77/100: 100%|██████████| 234/234 [00:25<00:00,  9.23it/s]


[Epoch 77] Train Loss: 0.0437 Accuracy : 98.26%
Validation Accuracy: 87.75%


Epoch 78/100: 100%|██████████| 234/234 [00:25<00:00,  9.12it/s]


[Epoch 78] Train Loss: 0.0343 Accuracy : 98.74%
Validation Accuracy: 86.88%


Epoch 79/100: 100%|██████████| 234/234 [00:24<00:00,  9.45it/s]


[Epoch 79] Train Loss: 0.0391 Accuracy : 98.85%
Validation Accuracy: 85.00%


Epoch 80/100: 100%|██████████| 234/234 [00:32<00:00,  7.19it/s]


[Epoch 80] Train Loss: 0.0683 Accuracy : 97.56%
Validation Accuracy: 87.88%


Epoch 81/100: 100%|██████████| 234/234 [00:31<00:00,  7.32it/s]


[Epoch 81] Train Loss: 0.0448 Accuracy : 98.71%
Validation Accuracy: 86.38%


Epoch 82/100: 100%|██████████| 234/234 [00:39<00:00,  5.99it/s]


[Epoch 82] Train Loss: 0.0324 Accuracy : 98.90%
Validation Accuracy: 87.25%


Epoch 83/100: 100%|██████████| 234/234 [00:37<00:00,  6.24it/s]


[Epoch 83] Train Loss: 0.0240 Accuracy : 99.04%
Validation Accuracy: 86.88%


Epoch 84/100: 100%|██████████| 234/234 [00:40<00:00,  5.76it/s]


[Epoch 84] Train Loss: 0.0237 Accuracy : 99.17%
Validation Accuracy: 87.50%


Epoch 85/100: 100%|██████████| 234/234 [00:41<00:00,  5.68it/s]


[Epoch 85] Train Loss: 0.0323 Accuracy : 98.82%
Validation Accuracy: 86.50%


Epoch 86/100: 100%|██████████| 234/234 [00:32<00:00,  7.15it/s]


[Epoch 86] Train Loss: 0.0238 Accuracy : 99.25%
Validation Accuracy: 87.00%


Epoch 87/100: 100%|██████████| 234/234 [00:24<00:00,  9.36it/s]


[Epoch 87] Train Loss: 0.0392 Accuracy : 98.63%
Validation Accuracy: 86.62%


Epoch 88/100: 100%|██████████| 234/234 [00:25<00:00,  9.18it/s]


[Epoch 88] Train Loss: 0.0414 Accuracy : 98.71%
Validation Accuracy: 86.00%


Epoch 89/100: 100%|██████████| 234/234 [00:24<00:00,  9.44it/s]


[Epoch 89] Train Loss: 0.0353 Accuracy : 98.63%
Validation Accuracy: 86.38%


Epoch 90/100: 100%|██████████| 234/234 [00:29<00:00,  7.99it/s]


[Epoch 90] Train Loss: 0.0361 Accuracy : 98.85%
Validation Accuracy: 88.38%
✅ Best model saved!


Epoch 91/100: 100%|██████████| 234/234 [00:24<00:00,  9.42it/s]


[Epoch 91] Train Loss: 0.0158 Accuracy : 99.38%
Validation Accuracy: 87.38%


Epoch 92/100: 100%|██████████| 234/234 [00:28<00:00,  8.21it/s]


[Epoch 92] Train Loss: 0.0340 Accuracy : 98.87%
Validation Accuracy: 85.50%


Epoch 93/100: 100%|██████████| 234/234 [00:27<00:00,  8.41it/s]


[Epoch 93] Train Loss: 0.0374 Accuracy : 98.87%
Validation Accuracy: 87.75%


Epoch 94/100: 100%|██████████| 234/234 [00:29<00:00,  8.01it/s]


[Epoch 94] Train Loss: 0.0207 Accuracy : 99.33%
Validation Accuracy: 86.00%


Epoch 95/100: 100%|██████████| 234/234 [00:27<00:00,  8.49it/s]


[Epoch 95] Train Loss: 0.0390 Accuracy : 98.58%
Validation Accuracy: 87.75%


Epoch 96/100: 100%|██████████| 234/234 [00:25<00:00,  9.22it/s]


[Epoch 96] Train Loss: 0.0232 Accuracy : 99.09%
Validation Accuracy: 86.12%


Epoch 97/100: 100%|██████████| 234/234 [00:24<00:00,  9.44it/s]


[Epoch 97] Train Loss: 0.0448 Accuracy : 98.37%
Validation Accuracy: 86.00%


Epoch 98/100: 100%|██████████| 234/234 [00:24<00:00,  9.64it/s]


[Epoch 98] Train Loss: 0.0227 Accuracy : 99.28%
Validation Accuracy: 86.50%


Epoch 99/100: 100%|██████████| 234/234 [00:28<00:00,  8.22it/s]


[Epoch 99] Train Loss: 0.0280 Accuracy : 99.04%
Validation Accuracy: 86.62%


Epoch 100/100: 100%|██████████| 234/234 [00:28<00:00,  8.11it/s]


[Epoch 100] Train Loss: 0.0320 Accuracy : 99.01%
Validation Accuracy: 86.75%

📊 Test Set Evaluation (Best Model 기준):
✅ Test Accuracy: 9.27%
              precision    recall  f1-score   support

         0.0     0.8172    0.7964    0.8066       275
         1.0     0.8947    0.9067    0.9007       525

    accuracy                         0.8688       800
   macro avg     0.8560    0.8515    0.8536       800
weighted avg     0.8681    0.8688    0.8683       800

AUC: 0.9176
Confusion Matrix:
[[219  56]
 [ 49 476]]


In [None]:
import os
import numpy as np
from glob import glob
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from sklearn.metrics import roc_auc_score, confusion_matrix, ConfusionMatrixDisplay
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from torchvision import models, transforms
import torch.nn.functional as F
from tqdm import tqdm
import gc


# --- 설정 ---
SLICE_ROOT = "/data1/lidc-idri/slices"
BATCH_SIZE = 16
NUM_EPOCHS = 100
LR = 1e-4
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# --- 라벨 추출 함수 ---
def extract_label_from_filename(filename):
    try:
        score = int(filename.split("_")[-1].replace(".npy", ""))
        if score == 3:
            return None
        return 1 if score >= 4 else 0
    except:
        return None

# --- 파일 리스트 구성 ---
all_files = glob(os.path.join(SLICE_ROOT, "LIDC-IDRI-*", "*.npy"))
file_label_pairs = [(f, extract_label_from_filename(f)) for f in all_files]
file_label_pairs = [(f, l) for f, l in file_label_pairs if l is not None]
files, labels = zip(*file_label_pairs)

# --- 3-way split (70% train / 15% val / 15% test) ---
train_files, temp_files, train_labels, temp_labels = train_test_split(
    files, labels, test_size=0.3, random_state=42
)
val_files, test_files, val_labels, test_labels = train_test_split(
    temp_files, temp_labels, test_size=0.5, random_state=42
)

# --- Transform 정의 ---
common_transform = transforms.Compose([
    transforms.ToPILImage(),
    transforms.Resize((224, 224)),  # 모든 입력을 동일 크기로 맞춤
    transforms.ToTensor()
])

train_transform = common_transform
val_transform = common_transform

# --- Dataset 정의 ---
class LIDCDataset(Dataset):
    def __init__(self, file_paths, labels, transform=None):
        self.file_paths = file_paths
        self.labels = labels
        self.transform = transform

    def __len__(self):
        return len(self.file_paths)

    def __getitem__(self, index):
        file_path = self.file_paths[index]
        label = self.labels[index]

        img = np.load(file_path).astype(np.float32)
        img = np.clip(img, -1000, 400)
        img = (img + 1000) / 1400.0  # normalize to 0~1
        img = np.stack([img] * 3, axis=-1)  # [H, W, 3] for RGB PIL input

        if self.transform:
            img_tensor = self.transform(img)  # transform includes ToTensor
        else:
            img_tensor = torch.tensor(img).permute(2, 0, 1).float()

        return img_tensor, torch.tensor(label).float()

# --- DataLoader 정의 ---
train_dataset = LIDCDataset(train_files, train_labels, transform=train_transform)
val_dataset = LIDCDataset(val_files, val_labels, transform=val_transform)
test_dataset = LIDCDataset(test_files, test_labels, transform=val_transform)

train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE)
test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE)

# --- 모델 정의 ---
model = models.resnet18(pretrained=True)
model.conv1 = nn.Conv2d(1, 64, kernel_size=7, stride=2, padding=3, bias=False)
model.fc = nn.Linear(model.fc.in_features, 1)
model = model.to(DEVICE)

criterion = nn.BCEWithLogitsLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=LR)

# --- 저장 경로 및 변수 초기화 ---
save_dir = os.path.join(os.path.dirname(os.getcwd()), "pth")
os.makedirs(save_dir, exist_ok=True)
best_val_acc = 0.0

# --- 학습 루프 ---
for epoch in range(NUM_EPOCHS):
    model.train()

    epoch_loss = 0
    correct = 0
    total = 0

    for images, labels in tqdm(train_loader, desc=f"Epoch {epoch+1}/{NUM_EPOCHS}"):
        images = images.to(DEVICE)
        labels = labels.unsqueeze(1).to(DEVICE)

        outputs = model(images)
        loss = criterion(outputs, labels)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        predicted = torch.sigmoid(outputs) >= 0.5
        total += labels.size(0)
        correct += (predicted == labels.long()).sum().item()

        epoch_loss += loss.item()
        train_acc = correct / total

    print(f"[Epoch {epoch+1}] Train Loss: {epoch_loss/len(train_loader):.4f} Accuracy : {train_acc * 100:.2f}%")

    gc.collect()
    torch.cuda.empty_cache()

    # --- Validation ---
    model.eval()
    correct = total = 0
    y_true = []
    y_pred = []

    with torch.no_grad():
        for images, labels in val_loader:
            images = images.to(DEVICE)
            labels = labels.to(DEVICE)

            outputs = model(images)
            preds = (torch.sigmoid(outputs) > 0.5).squeeze().long()

            y_true.extend(labels.cpu().numpy())
            y_pred.extend(preds.cpu().numpy())

            correct += (preds == labels.long()).sum().item()
            total += labels.size(0)

    val_acc = correct / total
    print(f"Validation Accuracy: {val_acc * 100:.2f}%")

    if val_acc > best_val_acc:
        best_val_acc = val_acc
        torch.save(model.state_dict(), os.path.join(save_dir, "best_model_resnet18.pth"))
        print("✅ Best model saved!")


# --- Test (최종 평가) ---
print("\n\U0001F4CA Test Set Evaluation (Best Model \uAE30\uc900):")
model.load_state_dict(torch.load(os.path.join(save_dir, "best_model_resnet18.pth")))
model.eval()

correct = total = 0
y_true = []
y_pred = []
y_probs = []

with torch.no_grad():
    for images, labels in test_loader:
        images = images.to(DEVICE)
        labels = labels.unsqueeze(1).float().to(DEVICE)

        outputs = model(images)
        probs = torch.sigmoid(outputs).squeeze()
        preds = (probs > 0.5).long()

        y_true.extend(labels.cpu().numpy())
        y_pred.extend(preds.cpu().numpy())
        y_probs.extend(probs.cpu().numpy())

        correct += (preds == labels.long()).sum().item()
        total += labels.size(0)

test_acc = correct / total
print(f"\u2705 Test Accuracy: {test_acc:.2f}%")
print(classification_report(y_true, y_pred, digits=4))

# AUC 출력
try:
    auc_score = roc_auc_score(y_true, y_probs)
    print(f"AUC: {auc_score:.4f}")
except ValueError:
    print("AUC \uacc4\uc0b0 \uc2e4\ud328: \uc591/\uc74c \ud074\ub798\uc2a4\uac00 \ubaa8\ub450 \uc788\uc5b4\uc57c \ud568.")

# Confusion Matrix 출력
cm = confusion_matrix(y_true, y_pred)
print("Confusion Matrix:")
print(cm)
