In [2]:
import numpy as np
import torch
from PIL import Image
from torch import Tensor
IMAGE_SIZE = (252, 378)
def load_mask(mask_path):
    """Loads the segmentation mask from the specified path.
    Inputs:
        mask_path (str): the path from which the segmentation mask will be read.
        It should have the format "/PATH/TO/LOAD/DIR/XXXX_mask.png".
    Outputs:
        mask (np.array): segmentation mask as a numpy array.
    """
    mask = np.asarray(Image.open(mask_path)).astype(np.uint8)  # Ensure mask is uint8
    if mask.max() > 1:
        mask = mask // 255
    return mask
def compute_iou(pred_mask, gt_mask, eps=1e-6):
    """Computes the IoU between two numpy arrays: pred_mask and gt_mask.
    Inputs:
        pred_mask (np.array): dtype:int, shape:(image_height, image_width), values are 0 or 1.
        gt_mask (np.array): dtype:int, shape:(image_height, image_width), values are 0 or 1.
        eps (float): epsilon to smooth the division in order to avoid 0/0.
    Outputs:
        iou_score (float)
    """
    intersection = (
        (pred_mask & gt_mask).astype(float).sum()
    )  # will be zero if gt=0 or pred=0
    union = (pred_mask | gt_mask).astype(float).sum()  # will be zero if both are 0
    iou = (intersection + eps) / (
        union + eps
    )  # we smooth our division by epsilon to avoid 0/0
    iou_score = iou.mean()
    return iou_score

In [2]:
import os
from PIL import Image
import torch
from torch.utils.data import Dataset
from torchvision import transforms
class ETHMugsDataset(Dataset):
    """Torch dataset for ETH Mugs."""
    def __init__(self, root_dir, mode="train"):
        """
        This dataset class loads the ETH Mugs dataset.
        Args:
            root_dir (str): Path to the root directory of the dataset.
            mode (str): Mode of the dataset. It can be "train", "val" or "test"
        """
        self.mode = mode
        self.root_dir = root_dir
        self.rgb_dir = os.path.join(self.root_dir, "rgb")
        self.mask_dir = os.path.join(self.root_dir, "masks") if mode != "test" else None
        self.image_paths = [os.path.join(self.rgb_dir, fname) for fname in os.listdir(self.rgb_dir) if fname.endswith('.jpg')]
        self.transform = transforms.Compose([
            transforms.Resize(IMAGE_SIZE),
            transforms.ToTensor()
        ])
        self.mask_transform = transforms.Compose([
            transforms.Resize(IMAGE_SIZE),
            transforms.ToTensor()
        ])
        print("[INFO] Dataset mode:", mode)
        print(f"[INFO] Number of images in the ETHMugsDataset: {len(self.image_paths)}")
    def __len__(self):
        return len(self.image_paths)
    def __getitem__(self, idx: int):
        image_path = self.image_paths[idx]
        image = Image.open(image_path).convert("RGB")
        image = self.transform(image)
        if self.mode != "test":
            # Adjusting mask file name and path
            base_filename = os.path.basename(image_path).replace('_rgb.jpg', '_mask.png')
            mask_path = os.path.join(self.mask_dir, base_filename)
            if not os.path.exists(mask_path):
                raise FileNotFoundError(f"Mask file {mask_path} not found.")
            mask = load_mask(mask_path)
            mask = Image.fromarray(mask.astype(np.uint8))  # Convert to PIL Image in uint8
            mask = self.mask_transform(mask)
        else:
            mask = torch.tensor([])  # Empty tensor for test mode
        return image, mask

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from torchvision import transforms
from PIL import Image
import numpy as np
import matplotlib.pyplot as plt
class SimpleCNN(nn.Module):
    def __init__(self, in_channels=3, out_channels=1):
        super(SimpleCNN, self).__init__()
        self.conv1 = nn.Conv2d(in_channels, 64, kernel_size=3, padding=1)
        self.conv2 = nn.Conv2d(64, 64, kernel_size=3, padding=1)
        self.conv3 = nn.Conv2d(64, out_channels, kernel_size=1)
    def forward(self, x):
        x = torch.relu(self.conv1(x))
        x = torch.relu(self.conv2(x))
        x = self.conv3(x)
        return x
# Initialize dataset and dataloader
train_dataset = ETHMugsDataset(root_dir='/Users/michaelaernst/Documents/Fabian/ETH/ML Projekte/student_template2/project2/datasets/train_images_378_252', mode='train')
train_dataloader = DataLoader(train_dataset, batch_size=4, shuffle=True)
val_dataset = ETHMugsDataset(root_dir='/Users/michaelaernst/Documents/Fabian/ETH/ML Projekte/student_template2/project2/datasets/train_images_378_252', mode='val')
val_dataloader = DataLoader(val_dataset, batch_size=1, shuffle=False)
# Initialize model, loss function, and optimizer
model = SimpleCNN()
criterion = nn.BCEWithLogitsLoss()
optimizer = optim.Adam(model.parameters(), lr=1e-4)
# Move model to GPU if available
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model.to(device)
# Training loop
num_epochs = 2
for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    for images, masks in train_dataloader:
        images = images.to(device)
        masks = masks.to(device)
        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, masks)
        running_loss += loss.item()
        loss.backward()
        optimizer.step()
    print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {running_loss/len(train_dataloader):.4f}")
    # Validation phase
    model.eval()
    total_iou = 0.0
    with torch.no_grad():
        for val_images, val_masks in val_dataloader:
            val_images = val_images.to(device)
            val_masks = val_masks.to(device)
            val_outputs = model(val_images)
            val_outputs = torch.sigmoid(val_outputs)
            val_outputs = (val_outputs > 0.5).float()
            val_outputs_np = val_outputs.cpu().numpy().astype(int)
            val_masks_np = val_masks.cpu().numpy().astype(int)
            for val_output_np, val_mask_np in zip(val_outputs_np, val_masks_np):
                total_iou += compute_iou(val_output_np, val_mask_np)
    avg_iou = total_iou / len(val_dataloader)
    print(f"Epoch [{epoch+1}/{num_epochs}], Validation IoU: {avg_iou:.4f}")
    # Visualize a sample output
    val_images, val_masks = next(iter(val_dataloader))
    model.eval()
    with torch.no_grad():
        val_images = val_images.to(device)
        val_outputs = model(val_images)
        val_outputs = torch.sigmoid(val_outputs)
        val_outputs = (val_outputs > 0.5).float()
    # Plot the image, ground truth mask, and predicted mask
    plt.figure(figsize=(15, 5))
    plt.subplot(1, 3, 1)
    plt.imshow(val_images[0].cpu().permute(1, 2, 0))
    plt.title('Image')
    plt.subplot(1, 3, 2)
    plt.imshow(val_masks[0].cpu().squeeze(), cmap='gray')
    plt.title('Ground Truth Mask')
    plt.subplot(1, 3, 3)
    plt.imshow(val_outputs[0].cpu().squeeze(), cmap='gray')
    plt.title('Predicted Mask')
    plt.show()