**Data Loader**

In [None]:
from google.colab import drive
import os, sys, torch, numpy as np, glob
from torch.utils.data import Dataset, DataLoader
from torchvision import models, transforms
from PIL import Image
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import pandas as pd

drive.mount('/content/drive')

PATH = f"/content/drive/MyDrive/Colab Notebooks/dane"
DATA_ROOT = os.path.join(PATH, "dataset")
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Mapowanie
MAPPING = {24: 1, 26: 2, 33: 3, 27: 4, 28: 5}

class CityscapesDataset(Dataset):
    def __init__(self, root, split='train'):
        self.files = sorted(glob.glob(os.path.join(root, split, 'images', '**', '*.png'), recursive=True))
        self.transform = transforms.Compose([transforms.Resize((256, 512)), transforms.ToTensor()])

    def __len__(self): return len(self.files)

    def __getitem__(self, idx):
        path = self.files[idx]
        img = self.transform(Image.open(path).convert("RGB"))

        mask_path = path.replace('images', 'annotations').replace('_leftImg8bit.png', '_gtFine_labelIds.png')
        if not os.path.exists(mask_path): mask_path = path.replace('images', 'annotations')

        mask_pil = Image.open(mask_path).resize((512, 256), resample=Image.NEAREST)
        mask_np = np.array(mask_pil)

        target = np.zeros_like(mask_np)
        for k, v in MAPPING.items(): target[mask_np == k] = v
        return img, torch.from_numpy(target).long()

# Ilośc danych
ds = CityscapesDataset(DATA_ROOT)
print(f"Dataset załadowany: {len(ds)} zdjęć.")

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Dataset załadowany: 586 zdjęć.


**Ważone mIoU, metryki oraz model z dwiema głowicami**

In [None]:
#Ważone mIoU
class IncrementalIoU:
    def __init__(self): self.mat = np.zeros((6, 6))
    def update(self, p, t):
        m = (t > 0) & (t < 6)
        if m.sum() > 0: self.mat += np.bincount(6*t[m].flatten()+p[m].flatten(), minlength=36).reshape(6,6)
    def compute(self):
        d = np.diag(self.mat)
        iou = d / (self.mat.sum(1) + self.mat.sum(0) - d + 1e-7)
        return 0.4 * iou[1] + 0.15 * np.sum(iou[2:6])

# Dice Loss
def dice_loss(p, t):
    probs = torch.softmax(p, 1)[:, 1:]
    t_1h = torch.nn.functional.one_hot(t, 6).permute(0,3,1,2).float()[:, 1:]
    return 1 - (2*(probs*t_1h).sum())/(probs.sum()+t_1h.sum()+1e-7)

# Dual Head Model
class DualHeadModel(nn.Module):
    def __init__(self, arch="deeplabv3"):
        super().__init__()
        if arch == "deeplabv3":
            base = models.segmentation.deeplabv3_resnet50(weights="DEFAULT")
            in_ch = 256
        else:
            base = models.segmentation.fcn_resnet50(weights="DEFAULT")
            in_ch = 512

        self.backbone = base.backbone
        self.classifier = nn.Sequential(*list(base.classifier.children())[:-1])
        self.head_ped = nn.Conv2d(in_ch, 2, 1)
        self.head_others = nn.Conv2d(in_ch, 6, 1)

    def forward(self, x):
        feat = self.classifier(self.backbone(x)['out'])
        p = F.interpolate(self.head_ped(feat), size=x.shape[-2:], mode='bilinear', align_corners=False)
        o = F.interpolate(self.head_others(feat), size=x.shape[-2:], mode='bilinear', align_corners=False)
        return {'ped': p, 'others': o}

**Trening i Porównanie**

In [None]:
print("Porównywanie modeli po 32 kroków każdy")

configs = [
    ("DeepLabV3", "CrossEntropy"),
    ("DeepLabV3", "DiceLoss"),
    ("FCN", "CrossEntropy")
]

results = []
ds = CityscapesDataset(DATA_ROOT, split='train')

for arch, loss_name in configs:
    print(f"Trenowanie: {arch} + {loss_name}...")
    model = DualHeadModel(arch.lower()).to(DEVICE)
    optimizer = optim.Adam(model.parameters(), lr=0.0001)

    if loss_name == "CrossEntropy": criterion = nn.CrossEntropyLoss(ignore_index=0)
    else: criterion = dice_loss

    model.train()
    loader = DataLoader(ds, batch_size=4, shuffle=True)
    steps = 0

    for img, target in loader:
        img, target = img.to(DEVICE), target.to(DEVICE)
        optimizer.zero_grad()
        out = model(img)

        if loss_name == "CrossEntropy":
            l_ped, l_oth = criterion(out['ped'], (target==1).long()), criterion(out['others'], target)
        else:
            l_ped, l_oth = dice_loss(out['ped'], target), dice_loss(out['others'], target)

        (0.7 * l_ped + 0.3 * l_oth).backward()
        optimizer.step()

        steps += 1
        if steps >= 32: break

    # Ewaluacja
    model.eval()
    metric = IncrementalIoU()
    with torch.no_grad():
        for i, (img, target) in enumerate(loader):
            if i >= 15: break
            img, target = img.to(DEVICE), target.to(DEVICE)
            metric.update(model(img)['others'].argmax(1).cpu().numpy(), target.cpu().numpy())

    score = metric.compute()
    print(f"Wynik mIoU: {score:.4f}")
    results.append({"Model": arch, "Loss": loss_name, "mIoU": score})

print("Podsumowanie modeli:")
print(pd.DataFrame(results))

Porównywanie modeli po 32 kroków każdy
Trenowanie: DeepLabV3 + CrossEntropy...
Wynik mIoU: 0.4691
Trenowanie: DeepLabV3 + DiceLoss...
Wynik mIoU: 0.1296
Trenowanie: FCN + CrossEntropy...
Wynik mIoU: 0.4558
Podsumowanie modeli:
       Model          Loss      mIoU
0  DeepLabV3  CrossEntropy  0.469139
1  DeepLabV3      DiceLoss  0.129631
2        FCN  CrossEntropy  0.455821


# Wnioski
Najlepszą wydajność mIoU (0,469) osiągnął model DeepLabV3 przy zastosowaniu funkcji straty CrossEntropy. Architektura ta okazała się skuteczniejsza od modelu FCN, choć przy tej samej funkcji straty różnica między nimi była niewielka. Zastosowanie DiceLoss zamiast CrossEntropy drastycznie obniżyło jakość segmentacji, co czyni tę konfigurację najmniej efektywną w zestawieniu.

**Trenowanie najlepszego modelu**

In [None]:
import time
import torch.optim as optim

NUM_EPOCHS = 3
BEST_MIOU = 0.0

print(f"Rozpoczynam trening (DeepLabV3 + CE) na {NUM_EPOCHS} epok...")

model = DualHeadModel("deeplabv3").to(DEVICE)
optimizer = optim.Adam(model.parameters(), lr=0.0001)
criterion = nn.CrossEntropyLoss(ignore_index=0)

ds_train = CityscapesDataset(DATA_ROOT, split='train')
loader = DataLoader(ds_train, batch_size=4, shuffle=True)

for epoch in range(NUM_EPOCHS):
    start_time = time.time()
    model.train()
    epoch_loss = 0.0

    for i, (img, target) in enumerate(loader):
        img, target = img.to(DEVICE), target.to(DEVICE)
        optimizer.zero_grad()

        out = model(img)

        l_ped = criterion(out['ped'], (target==1).long())
        l_oth = criterion(out['others'], target)

        loss = 0.7 * l_ped + 0.3 * l_oth
        loss.backward()
        optimizer.step()

        epoch_loss += loss.item()

    model.eval()
    metric = IncrementalIoU()
    print(f" Epoka {epoch+1}/{NUM_EPOCHS} zakończona. Liczę mIoU...")

    with torch.no_grad():
        for i, (img, target) in enumerate(loader):
            if i >= 50: break
            img, target = img.to(DEVICE), target.to(DEVICE)
            preds = model(img)['others'].argmax(1).cpu().numpy()
            metric.update(preds, target.cpu().numpy())

    current_miou = metric.compute()
    duration = (time.time() - start_time) / 60

    print(f"Epoka {epoch+1}: Loss={epoch_loss/len(loader):.4f} | mIoU={current_miou:.4f} | Czas: {duration:.1f} min")

    if current_miou > BEST_MIOU:
        BEST_MIOU = current_miou
        torch.save(model.state_dict(), "best_model_cityscapes.pth")
        print(" Zapisano nowy najlepszy model!")

print("\n" + "="*50)
print(f"WYNIK KOŃCOWY: mIoU = {BEST_MIOU:.4f}")
print("="*50)

Rozpoczynam trening (DeepLabV3 + CE) na 3 epok...
 Epoka 1/3 zakończona. Liczę mIoU...
Epoka 1: Loss=nan | mIoU=0.6377 | Czas: 112.0 min
 Zapisano nowy najlepszy model!
 Epoka 2/3 zakończona. Liczę mIoU...
Epoka 2: Loss=0.0596 | mIoU=0.6914 | Czas: 112.4 min
 Zapisano nowy najlepszy model!
