In [None]:
# ---------------------------------------------------------
# ✅ 패키지
# ---------------------------------------------------------
import os, numpy as np, matplotlib.pyplot as plt
from PIL import Image
from glob import glob
from tqdm import tqdm
import torch, torch.nn as nn, torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import albumentations as A
from albumentations.pytorch import ToTensorV2
import segmentation_models_pytorch as smp
import warnings
warnings.filterwarnings("ignore")

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"✅ Using device: {device}")

# ---------------------------------------------------------
# ✅ 데이터셋
# ---------------------------------------------------------
class CityscapesDataset(Dataset):
    def __init__(self, image_dir, resize=(512, 512), mode='train'):
        self.image_paths = sorted(glob(os.path.join(image_dir, "*", "*.png")))
        self.mask_paths  = [p.replace("images", "gtFine").replace("_leftImg8bit", "_gtFine_labelIds") for p in self.image_paths]
        self.mode = mode

        if mode == 'train':
            self.transform = A.Compose([
                A.Resize(*resize),
                A.HorizontalFlip(p=0.5),
                A.RandomBrightnessContrast(p=0.3),
                A.ShiftScaleRotate(shift_limit=0.05, scale_limit=0.1, rotate_limit=10, p=0.5),
                A.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)),
                ToTensorV2()
            ])
        else:
            self.transform = A.Compose([
                A.Resize(*resize),
                A.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)),
                ToTensorV2()
            ])

    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, idx):
        image = np.array(Image.open(self.image_paths[idx]).convert("RGB"))
        mask = np.array(Image.open(self.mask_paths[idx])).astype(np.int64)

        augmented = self.transform(image=image, mask=mask)
        image = augmented['image']
        mask = torch.from_numpy(augmented['mask']).long()  # ✅ FIXED: long 타입 변환

        return image, mask

# ---------------------------------------------------------
# ✅ 경로 및 데이터로더
# ---------------------------------------------------------
train_image_dir = r"C:\Users\ghwns\HJ_git\CV-Projects\urban-scene-segmentation\Dataset\Cityspaces\images\train"
val_image_dir   = r"C:\Users\ghwns\HJ_git\CV-Projects\urban-scene-segmentation\Dataset\Cityspaces\images\val"

train_dataset = CityscapesDataset(train_image_dir, mode='train')
val_dataset   = CityscapesDataset(val_image_dir, mode='val')

train_loader = DataLoader(train_dataset, batch_size=2, shuffle=True, num_workers=0)
val_loader   = DataLoader(val_dataset, batch_size=2, shuffle=False, num_workers=0)

print(f"✅ Train: {len(train_dataset)} | Val: {len(val_dataset)}")

# ---------------------------------------------------------
# ✅ 모델, 손실함수, 옵티마이저
# ---------------------------------------------------------
model = smp.DeepLabV3Plus(
    encoder_name="resnet50",
    encoder_weights="imagenet",
    in_channels=3,
    classes=34
).to(device)

loss_dice = smp.losses.DiceLoss(mode='multiclass')
loss_ce   = nn.CrossEntropyLoss()
def combo_loss(pred, target):
    return 0.5 * loss_ce(pred, target) + 0.5 * loss_dice(pred, target)

optimizer = optim.AdamW(model.parameters(), lr=1e-4)

# ---------------------------------------------------------
# ✅ 학습 루프
# ---------------------------------------------------------
num_epochs = 10

for epoch in range(num_epochs):
    model.train()
    train_loss = 0
    print(f"\n🔁 Epoch {epoch+1}/{num_epochs}")
    for images, masks in tqdm(train_loader, desc="🔧 Training", leave=False):
        images, masks = images.to(device), masks.to(device)
        outputs = model(images)
        loss = combo_loss(outputs, masks)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        train_loss += loss.item()

    model.eval()
    val_loss = 0
    for images, masks in tqdm(val_loader, desc="🔍 Validating", leave=False):
        images, masks = images.to(device), masks.to(device)
        with torch.no_grad():
            outputs = model(images)
            loss = combo_loss(outputs, masks)
        val_loss += loss.item()

    print(f"✅ Epoch [{epoch+1}/{num_epochs}] | Train Loss: {train_loss:.4f} | Val Loss: {val_loss:.4f}")

# ---------------------------------------------------------
# ✅ mIoU 계산
# ---------------------------------------------------------
def fast_hist(pred, label, num_class):
    mask = (label >= 0) & (label < num_class)
    return np.bincount(num_class * label[mask].astype(int) + pred[mask], minlength=num_class ** 2).reshape(num_class, num_class)

def per_class_iou(hist):
    with np.errstate(divide='ignore', invalid='ignore'):
        return np.diag(hist) / (hist.sum(1) + hist.sum(0) - np.diag(hist))

def compute_mIoU(model, dataloader, num_classes=34):
    model.eval()
    hist = np.zeros((num_classes, num_classes))
    with torch.no_grad():
        for images, masks in tqdm(dataloader, desc="📏 Calculating mIoU"):
            images, masks = images.to(device), masks.to(device)
            outputs = model(images)
            preds = torch.argmax(outputs, dim=1).cpu().numpy()
            targets = masks.cpu().numpy()
            for p, t in zip(preds, targets):
                hist += fast_hist(p.flatten(), t.flatten(), num_classes)

    ious = per_class_iou(hist)
    miou = np.nanmean(ious)
    print(f"📊 Final mIoU: {miou:.4f}")
    return miou

# ---------------------------------------------------------
# ✅ 실행
# ---------------------------------------------------------
compute_mIoU(model, val_loader)

✅ Using device: cpu
✅ Train: 2975 | Val: 500

🔁 Epoch 1/10


🔧 Training:   0%|                                                                            | 0/1488 [00:00<?, ?it/s]