In [None]:
#This cell imports all the necessary libraries required for building and training the U-Net model.
#It includes PyTorch components like `torch`, `torch.nn`, and `torchvision`, along with `tqdm` for progress bars, `numpy` for numerical operations, `PIL` for image processing, and `os` for file path management.

import torch
import torchvision
from tqdm import tqdm
from torch.utils.data import DataLoader
import torch.nn as nn
from torch.nn import BatchNorm2d
from torch.utils.data import Dataset
from PIL import Image
import numpy as np
import os
from torchvision import transforms

In [None]:
#This function defines a standard convolutional block used throughout the U-Net architecture.
#Each block consists of two convolutional layers, each followed by Batch Normalization and a ReLU activation function.

def conv(in_ch , out_ch):
  return nn.Sequential(
      nn.Conv2d(in_ch , out_ch , kernel_size= 3 , padding=1),
      nn.BatchNorm2d(out_ch),
      nn.ReLU(inplace=True),
      nn.Conv2d(out_ch , out_ch , kernel_size= 3 , padding=1),
      nn.BatchNorm2d(out_ch),
      nn.ReLU(inplace=True),
  )

In [None]:
#This cell defines the `UNet` class, implementing the complete U-Net architecture.
#It includes the encoder (downsampling path), a bottleneck, and the decoder (upsampling path) with skip connections.

class UNet(nn.Module):
    def __init__(self, in_channels, out_channels):
        super(UNet, self).__init__()
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2)

        self.down1 = conv(in_channels, 64)
        self.down2 = conv(64, 128)
        self.down3 = conv(128, 256)
        self.down4 = conv(256, 512)

        self.bottleneck = conv(512, 1024)

        self.upconv1 = nn.ConvTranspose2d(1024, 512, kernel_size=2, stride=2)
        self.up1 = conv(1024, 512)

        self.upconv2 = nn.ConvTranspose2d(512, 256, kernel_size=2, stride=2)
        self.up2 = conv(512, 256)

        self.upconv3 = nn.ConvTranspose2d(256, 128, kernel_size=2, stride=2)
        self.up3 = conv(256, 128)

        self.upconv4 = nn.ConvTranspose2d(128, 64, kernel_size=2, stride=2)
        self.up4 = conv(128, 64)

        self.outconv = nn.Conv2d(64, out_channels, kernel_size=1)

    def forward(self, x):
        enc1 = self.down1(x)
        pl1 = self.pool(enc1)

        enc2 = self.down2(pl1)
        pl2 = self.pool(enc2)

        enc3 = self.down3(pl2)
        pl3 = self.pool(enc3)

        enc4 = self.down4(pl3)
        pl4 = self.pool(enc4)


        bn = self.bottleneck(pl4)


        dec1 = self.upconv1(bn)
        dec1 = torch.cat([dec1, enc4], dim=1)
        dec1 = self.up1(dec1)

        dec2 = self.upconv2(dec1)
        dec2 = torch.cat([dec2, enc3], dim=1)
        dec2 = self.up2(dec2)

        dec3 = self.upconv3(dec2)
        dec3 = torch.cat([dec3, enc2], dim=1)
        dec3 = self.up3(dec3)

        dec4 = self.upconv4(dec3)
        dec4 = torch.cat([dec4, enc1], dim=1)
        dec4 = self.up4(dec4)


        out = self.outconv(dec4)

        return out

In [None]:
#This cell mounts Google Drive to the Colab environment.
#This step is essential for accessing dataset files stored in your Google Drive.

from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
#This cell defines the directory paths for the image and mask data.
#These paths point to the locations within Google Drive where the `CameraRGB` (input images) and `CameraMask` (segmentation masks) folders are stored.

image_dir = "/content/drive/MyDrive/U-net(files)/data/CameraRGB"
mask_dir = "/content/drive/MyDrive/U-net(files)/data/CameraMask"

In [None]:
#This cell defines image and mask transformations (resizing, converting to tensor, normalization).
#It also defines the `UNetDataset` class for loading images and masks, and then initializes the dataset and `DataLoader` for batching data during training.

img_transform = transforms.Compose([
    transforms.ToPILImage(),
    transforms.Resize((256, 256)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

mask_transform = transforms.Compose([
    transforms.ToPILImage(),
    transforms.Resize((256, 256)),
    transforms.ToTensor()
])

dataset = UNet(image_dir=image_dir, mask_dir=mask_dir, transform=None)

class UNetDataset(Dataset):
    def __init__(self, image_dir, mask_dir, img_transform=None, mask_transform=None):
        self.image_dir = image_dir
        self.mask_dir = mask_dir
        self.img_transform = img_transform
        self.mask_transform = mask_transform
        self.images = os.listdir(image_dir)

    def __len__(self):
        return len(self.images)

    def __getitem__(self, index):
        img_name = self.images[index]
        img_path = os.path.join(self.image_dir, img_name)
        mask_path = os.path.join(self.mask_dir, img_name)

        image = np.array(Image.open(img_path).convert("RGB"))
        mask = np.array(Image.open(mask_path).convert("L"), dtype=np.float32)
        mask[mask == 255.0] = 1.0

        if self.img_transform is not None:
            image = self.img_transform(image)
        if self.mask_transform is not None:
            mask = self.mask_transform(mask)

        return image, mask

dataset = UNetDataset(image_dir=image_dir, mask_dir=mask_dir, img_transform=img_transform, mask_transform=mask_transform)

batch_size = 8
dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True)