# Mini-U-Net für synthetische Gebäudesegmentierung (PyTorch)

Bisher haben wir Punkte (Insekten, Kreise) mit einer **Geraden** klassifiziert.

Jetzt betrachten wir **Bilder**:
- Jedes Pixel soll entscheiden: *Gebäude oder Hintergrund?*

Dafür verwenden wir ein kleines **U-Net-ähnliches Netzwerk** in PyTorch, das auf künstlich erzeugten Rechteck-Bildern trainiert wird.

In [None]:
import numpy as np
import matplotlib.pyplot as plt

import torch
from torch import nn
from torch.utils.data import Dataset, DataLoader

print('PyTorch-Version:', torch.__version__)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print('Verwendetes Gerät:', device)


In [None]:
def generate_rectangle_image(img_size=64, num_rects_range=(1, 3)):
    image = np.zeros((img_size, img_size), dtype=np.float32)
    mask = np.zeros((img_size, img_size), dtype=np.float32)

    num_rects = np.random.randint(num_rects_range[0], num_rects_range[1] + 1)

    for _ in range(num_rects):
        x1 = np.random.randint(0, img_size - 10)
        y1 = np.random.randint(0, img_size - 10)
        x2 = np.random.randint(x1 + 5, min(x1 + 20, img_size))
        y2 = np.random.randint(y1 + 5, min(y1 + 20, img_size))

        val = np.random.uniform(0.6, 1.0)
        image[y1:y2, x1:x2] = val
        mask[y1:y2, x1:x2] = 1.0

    noise = np.random.normal(0, 0.15, size=(img_size, img_size))
    image = np.clip(image + noise, 0.0, 1.0)

    return image, mask

def create_dataset(num_samples=400, img_size=64):
    X, Y = [], []
    for _ in range(num_samples):
        img, msk = generate_rectangle_image(img_size=img_size)
        X.append(img)
        Y.append(msk)
    return np.array(X, dtype=np.float32), np.array(Y, dtype=np.float32)

img_size = 64
num_samples = 800
X, Y = create_dataset(num_samples=num_samples, img_size=img_size)
split = int(0.8 * num_samples)
X_train, X_test = X[:split], X[split:]
Y_train, Y_test = Y[:split], Y[split:]
print('Trainingsdaten:', X_train.shape, Y_train.shape)
print('Testdaten:', X_test.shape, Y_test.shape)


In [None]:
def show_random_example():
    idx = np.random.randint(0, X_train.shape[0])
    img = X_train[idx]
    mask = Y_train[idx]

    plt.figure(figsize=(6, 3))
    plt.subplot(1, 2, 1)
    plt.imshow(img, cmap='gray')
    plt.title('Bild')
    plt.axis('off')

    plt.subplot(1, 2, 2)
    plt.imshow(mask, cmap='gray')
    plt.title('Maske (Gebäude)')
    plt.axis('off')

    plt.tight_layout()
    plt.show()

for _ in range(3):
    show_random_example()


In [None]:
class RectanglesDataset(Dataset):
    def __init__(self, X, Y):
        self.X = X
        self.Y = Y

    def __len__(self):
        return self.X.shape[0]

    def __getitem__(self, idx):
        img = self.X[idx]
        mask = self.Y[idx]
        img_tensor = torch.from_numpy(img).unsqueeze(0)
        mask_tensor = torch.from_numpy(mask).unsqueeze(0)
        return img_tensor, mask_tensor

train_dataset = RectanglesDataset(X_train, Y_train)
test_dataset = RectanglesDataset(X_test, Y_test)

train_loader = DataLoader(train_dataset, batch_size=8, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=8, shuffle=False)
len(train_dataset), len(test_dataset)


In [None]:
class ConvBlock(nn.Module):
    def __init__(self, in_channels, out_channels):
        super().__init__()
        self.block = nn.Sequential(
            nn.Conv2d(in_channels, out_channels, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(out_channels, out_channels, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
        )

    def forward(self, x):
        return self.block(x)

class SimpleUNet(nn.Module):
    def __init__(self, in_channels=1, out_channels=1):
        super().__init__()

        self.enc1 = ConvBlock(in_channels, 16)
        self.pool1 = nn.MaxPool2d(2)
        self.enc2 = ConvBlock(16, 32)
        self.pool2 = nn.MaxPool2d(2)
        self.bottleneck = ConvBlock(32, 64)
        self.up2 = nn.ConvTranspose2d(64, 32, kernel_size=2, stride=2)
        self.dec2 = ConvBlock(64, 32)
        self.up1 = nn.ConvTranspose2d(32, 16, kernel_size=2, stride=2)
        self.dec1 = ConvBlock(32, 16)
        self.final_conv = nn.Conv2d(16, out_channels, kernel_size=1)

    def forward(self, x):
        c1 = self.enc1(x)
        p1 = self.pool1(c1)
        c2 = self.enc2(p1)
        p2 = self.pool2(c2)
        b = self.bottleneck(p2)
        u2 = self.up2(b)
        u2 = torch.cat([u2, c2], dim=1)
        c3 = self.dec2(u2)
        u1 = self.up1(c3)
        u1 = torch.cat([u1, c1], dim=1)
        c4 = self.dec1(u1)
        out = self.final_conv(c4)
        return out

model = SimpleUNet(in_channels=1, out_channels=1).to(device)
print(model)


In [None]:
criterion = nn.BCEWithLogitsLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-100) # lr=1e-3)

num_epochs = 10
train_losses = []
val_losses = []

for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    for imgs, masks in train_loader:
        imgs = imgs.to(device)
        masks = masks.to(device)
        optimizer.zero_grad()
        outputs = model(imgs)
        loss = criterion(outputs, masks)
        loss.backward()
        optimizer.step()
        running_loss += loss.item() * imgs.size(0)
    epoch_loss = running_loss / len(train_loader.dataset)
    train_losses.append(epoch_loss)

    model.eval()
    val_running_loss = 0.0
    with torch.no_grad():
        for imgs, masks in test_loader:
            imgs = imgs.to(device)
            masks = masks.to(device)
            outputs = model(imgs)
            loss = criterion(outputs, masks)
            val_running_loss += loss.item() * imgs.size(0)
    val_loss = val_running_loss / len(test_loader.dataset)
    val_losses.append(val_loss)
    print(f'Epoche {epoch+1}/{num_epochs} - Train Loss: {epoch_loss:.4f}, Val Loss: {val_loss:.4f}')


In [None]:
def iou_score(pred, target, threshold=0.5, eps=1e-6):
    pred_bin = (pred > threshold).astype(np.float32)
    target_bin = (target > threshold).astype(np.float32)
    intersection = np.sum(pred_bin * target_bin)
    union = np.sum(pred_bin) + np.sum(target_bin) - intersection
    return intersection / (union + eps)


def show_overlay_example(index=0, threshold=0.5):
    model.eval()
    
    img = X_test[index]
    true_mask = Y_test[index]

    # Modellvorhersage
    img_tensor = torch.from_numpy(img).unsqueeze(0).unsqueeze(0).to(device)
    with torch.no_grad():
        logits = model(img_tensor)
        pred = torch.sigmoid(logits).cpu().numpy()[0, 0]

    pred_binary = (pred > threshold).astype(np.float32)
    iou = iou_score(pred, true_mask, threshold=threshold)

    # 4 Subplots
    fig, axes = plt.subplots(1, 4, figsize=(16, 4))

    # Original
    axes[0].imshow(img, cmap='gray')
    axes[0].set_title('Bild')
    axes[0].axis('off')

    # Ground Truth
    axes[1].imshow(true_mask, cmap='gray')
    axes[1].set_title('Ground Truth')
    axes[1].axis('off')

    # Prediction (binär)
    axes[2].imshow(pred_binary, cmap='gray')
    axes[2].set_title('Prediction (binär)')
    axes[2].axis('off')

    # Overlay
    axes[3].imshow(true_mask, cmap='gray')
    axes[3].contour(pred_binary, levels=[0.5], colors='red', linewidths=1)
    axes[3].set_title(f'Overlay (IoU = {iou:.2f})')
    axes[3].axis('off')

    plt.tight_layout()
    plt.show()


# Beispiele anzeigen
for i in range(3):
    show_overlay_example(index=i)
