In [None]:
# 앙상블

# 전체 앙상블 예측 및 결과 저장 코드

import os
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from torchvision.models import (
    resnet34, densenet121, efficientnet_b0,
    ResNet34_Weights, DenseNet121_Weights, EfficientNet_B0_Weights
)
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
import pandas as pd
from glob import glob

# 설정
SLICE_ROOT = "/data1/lidc-idri/slices"
BATCH_SIZE = 16
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# 라벨 추출
def extract_label_from_filename(filename):
    try:
        score = int(filename.split("_")[-1].replace(".npy", ""))
        if score == 3: return None
        return 1 if score >= 4 else 0
    except: return None

# 데이터셋 정의
class LIDC1chDataset(Dataset):
    def __init__(self, file_paths, labels): self.file_paths, self.labels = file_paths, labels
    def __getitem__(self, idx):
        path = self.file_paths[idx]
        img = np.load(path).astype(np.float32)
        img = (img - img.min()) / (img.max() - img.min() + 1e-8)
        img = np.expand_dims(img, 0)
        img_tensor = torch.tensor(img)
        img_tensor = F.interpolate(img_tensor.unsqueeze(0), size=(224, 224), mode='bilinear').squeeze(0)
        return img_tensor, torch.tensor(self.labels[idx]).float(), path
    def __len__(self): return len(self.file_paths)

class LIDC3chDataset(Dataset):
    def __init__(self, file_paths, labels): self.file_paths, self.labels = file_paths, labels
    def __getitem__(self, idx):
        center_path = self.file_paths[idx]
        label = self.labels[idx]
        folder = os.path.dirname(center_path)
        fname = os.path.basename(center_path)
        slice_num = int(fname.split("_")[1])
        suffix = fname.split("_")[-1]
        images = []
        for sn in [slice_num-1, slice_num, slice_num+1]:
            path = os.path.join(folder, f"slice_{sn:03d}_{suffix}")
            img = np.load(path).astype(np.float32) if os.path.exists(path) else np.load(center_path).astype(np.float32)
            img = (img - img.min()) / (img.max() - img.min() + 1e-8)
            images.append(img)
        stacked = np.stack(images, axis=0)
        img_tensor = torch.tensor(stacked)
        img_tensor = F.interpolate(img_tensor.unsqueeze(0), size=(224,224), mode='bilinear').squeeze(0)
        return img_tensor, torch.tensor(label).float(), center_path
    def __len__(self): return len(self.file_paths)

# 데이터 로딩
all_files = glob(os.path.join(SLICE_ROOT, "LIDC-IDRI-*", "*.npy"))
file_label_pairs = [(f, extract_label_from_filename(f)) for f in all_files]
file_label_pairs = [(f, l) for f, l in file_label_pairs if l is not None]
files, labels = zip(*file_label_pairs)
_, val_files, _, val_labels = train_test_split(files, labels, test_size=0.2, random_state=42)

val_loader_1ch = DataLoader(LIDC1chDataset(val_files, val_labels), batch_size=BATCH_SIZE)
val_loader_3ch = DataLoader(LIDC3chDataset(val_files, val_labels), batch_size=BATCH_SIZE)

# 모델 로딩
def load_models():
    m1 = resnet34(weights=ResNet34_Weights.IMAGENET1K_V1)
    m1.conv1 = nn.Conv2d(1, 64, kernel_size=7, stride=2, padding=3, bias=False)
    m1.fc = nn.Linear(m1.fc.in_features, 1)
    m1.load_state_dict(torch.load("best_model_resnet34.pth"))
    m1.to(DEVICE).eval()

    m2 = densenet121(weights=DenseNet121_Weights.IMAGENET1K_V1)
    m2.features.conv0 = nn.Conv2d(1, 64, kernel_size=7, stride=2, padding=3, bias=False)
    m2.classifier = nn.Linear(m2.classifier.in_features, 1)
    m2.load_state_dict(torch.load("best_model_densenet121.pth"))
    m2.to(DEVICE).eval()

    m3 = efficientnet_b0(weights=EfficientNet_B0_Weights.IMAGENET1K_V1)
    m3.features[0][0] = nn.Conv2d(1, 32, kernel_size=3, stride=2, padding=1, bias=False)
    m3.classifier[1] = nn.Linear(m3.classifier[1].in_features, 1)
    m3.load_state_dict(torch.load("best_model_efficientnet_b0.pth"))
    m3.to(DEVICE).eval()

    m4 = resnet34(weights=ResNet34_Weights.IMAGENET1K_V1)
    m4.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, bias=False)
    m4.fc = nn.Linear(m4.fc.in_features, 1)
    m4.load_state_dict(torch.load("best_model_resnet34_3ch.pth"))
    m4.to(DEVICE).eval()
    return m1, m2, m3, m4

resnet34_1ch, densenet, effnet, resnet34_3ch = load_models()

# 예측 및 저장
results = []
with torch.no_grad():
    for (data1, data3) in zip(val_loader_1ch, val_loader_3ch):
        x1, labels, paths = data1
        x3, _, _ = data3
        x1, x3, labels = x1.to(DEVICE), x3.to(DEVICE), labels.to(DEVICE)

        prob1 = torch.sigmoid(resnet34_1ch(x1).squeeze())
        prob2 = torch.sigmoid(densenet(x1).squeeze())
        prob3 = torch.sigmoid(effnet(x1).squeeze())
        prob4 = torch.sigmoid(resnet34_3ch(x3).squeeze())

        ensemble_prob = (prob1 + prob2 + prob3 + prob4) / 4
        ensemble_pred = (ensemble_prob > 0.5).long()

        for i in range(len(paths)):
            results.append({
                "file": os.path.basename(paths[i]),
                "label": int(labels[i].item()),
                "ensemble_pred": int(ensemble_pred[i].item()),
                "resnet34_1ch_prob": float(prob1[i]),
                "densenet121_prob": float(prob2[i]),
                "efficientnet_b0_prob": float(prob3[i]),
                "resnet34_3ch_prob": float(prob4[i]),
                "ensemble_prob": float(ensemble_prob[i])
            })

# CSV 저장
df = pd.DataFrame(results)
df.to_csv("weighted_ensemble_predictions_expanded.csv", index=False)

# (선택) 성능 요약 출력
print("\n[Classification Report: Ensemble with expanded output]")
print(classification_report([r['label'] for r in results], [r['ensemble_pred'] for r in results], digits=4))

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
import pandas as pd
import os

# 1. 더미 모델 정의
class SimpleModel(nn.Module):
    def __init__(self, in_channels):
        super().__init__()
        self.conv = nn.Conv2d(in_channels, 8, kernel_size=3, padding=1)
        self.pool = nn.AdaptiveAvgPool2d(1)
        self.fc = nn.Linear(8, 1)
    def forward(self, x):
        x = F.relu(self.conv(x))
        x = self.pool(x).squeeze(-1).squeeze(-1)
        return self.fc(x)

# 2. 더미 데이터셋
class DummyDataset(Dataset):
    def __init__(self, channels):
        self.data = [torch.rand(channels, 224, 224) for _ in range(6)]
        self.labels = [1, 0, 1, 1, 0, 0]
        self.paths = [f"slice_{i:03d}_{s}.npy" for i, s in enumerate(self.labels)]
    def __len__(self): return len(self.data)
    def __getitem__(self, idx):
        return self.data[idx], torch.tensor(self.labels[idx]).float(), self.paths[idx]

# 3. 장치 설정
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# 4. 모델 준비
resnet34_1ch = SimpleModel(1).to(DEVICE).eval()
densenet      = SimpleModel(1).to(DEVICE).eval()
effnet        = SimpleModel(1).to(DEVICE).eval()
resnet34_3ch  = SimpleModel(3).to(DEVICE).eval()

# 5. 데이터 로더
val_loader_1ch = DataLoader(DummyDataset(1), batch_size=2)
val_loader_3ch = DataLoader(DummyDataset(3), batch_size=2)

# 6. 예측 및 결과 저장
results = []
with torch.no_grad():
    for (data1, data3) in zip(val_loader_1ch, val_loader_3ch):
        x1, labels, paths = data1
        x3, _, _ = data3
        x1, x3, labels = x1.to(DEVICE), x3.to(DEVICE), labels.to(DEVICE)

        p1 = torch.sigmoid(resnet34_1ch(x1).squeeze())
        p2 = torch.sigmoid(densenet(x1).squeeze())
        p3 = torch.sigmoid(effnet(x1).squeeze())
        p4 = torch.sigmoid(resnet34_3ch(x3).squeeze())

        p_ens = (p1 + p2 + p3 + p4) / 4
        pred_ens = (p_ens > 0.5).long()

        for i in range(len(paths)):
            results.append({
                "file": paths[i],
                "label": int(labels[i]),
                "ensemble_pred": int(pred_ens[i]),
                "resnet34_1ch_prob": float(p1[i]),
                "densenet121_prob": float(p2[i]),
                "efficientnet_b0_prob": float(p3[i]),
                "resnet34_3ch_prob": float(p4[i]),
                "ensemble_prob": float(p_ens[i])
            })

# 7. CSV 저장
df = pd.DataFrame(results)
df.to_csv("test_weighted_ensemble_predictions.csv", index=False)
print("✅ 저장 완료: test_weighted_ensemble_predictions.csv")