<a href="https://colab.research.google.com/github/ghaith-mq/U-Net/blob/main/U_Net.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
pip install torch torchvision albumentations opencv-python


Collecting nvidia-cuda-nvrtc-cu12==12.1.105 (from torch)
  Using cached nvidia_cuda_nvrtc_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (23.7 MB)
Collecting nvidia-cuda-runtime-cu12==12.1.105 (from torch)
  Using cached nvidia_cuda_runtime_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (823 kB)
Collecting nvidia-cuda-cupti-cu12==12.1.105 (from torch)
  Using cached nvidia_cuda_cupti_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (14.1 MB)
Collecting nvidia-cudnn-cu12==8.9.2.26 (from torch)
  Using cached nvidia_cudnn_cu12-8.9.2.26-py3-none-manylinux1_x86_64.whl (731.7 MB)
Collecting nvidia-cublas-cu12==12.1.3.1 (from torch)
  Using cached nvidia_cublas_cu12-12.1.3.1-py3-none-manylinux1_x86_64.whl (410.6 MB)
Collecting nvidia-cufft-cu12==11.0.2.54 (from torch)
  Using cached nvidia_cufft_cu12-11.0.2.54-py3-none-manylinux1_x86_64.whl (121.6 MB)
Collecting nvidia-curand-cu12==10.3.2.106 (from torch)
  Using cached nvidia_curand_cu12-10.3.2.106-py3-none-manylinux1_x86_64.whl (56.5 MB)
Collectin

In [1]:
import os
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
from torchvision.datasets import VOCSegmentation
import albumentations as A
from albumentations.pytorch import ToTensorV2
from sklearn.metrics import jaccard_score

# U-Net Model
class UNet(nn.Module):
    def __init__(self):
        super(UNet, self).__init__()

        def CBR(in_channels, out_channels):
            return nn.Sequential(
                nn.Conv2d(in_channels, out_channels, kernel_size=3, padding=1),
                nn.BatchNorm2d(out_channels),
                nn.ReLU(inplace=True)
            )

        self.enc1 = nn.Sequential(CBR(3, 64), CBR(64, 64))
        self.pool1 = nn.MaxPool2d(2)
        self.enc2 = nn.Sequential(CBR(64, 128), CBR(128, 128))
        self.pool2 = nn.MaxPool2d(2)
        self.enc3 = nn.Sequential(CBR(128, 256), CBR(256, 256))
        self.pool3 = nn.MaxPool2d(2)
        self.enc4 = nn.Sequential(CBR(256, 512), CBR(512, 512))
        self.pool4 = nn.MaxPool2d(2)
        self.enc5 = nn.Sequential(CBR(512, 1024), CBR(1024, 1024))

        self.up4 = nn.ConvTranspose2d(1024, 512, kernel_size=2, stride=2)
        self.dec4 = nn.Sequential(CBR(1024, 512), CBR(512, 512))
        self.up3 = nn.ConvTranspose2d(512, 256, kernel_size=2, stride=2)
        self.dec3 = nn.Sequential(CBR(512, 256), CBR(256, 256))
        self.up2 = nn.ConvTranspose2d(256, 128, kernel_size=2, stride=2)
        self.dec2 = nn.Sequential(CBR(256, 128), CBR(128, 128))
        self.up1 = nn.ConvTranspose2d(128, 64, kernel_size=2, stride=2)
        self.dec1 = nn.Sequential(CBR(128, 64), CBR(64, 64))

        self.out_conv = nn.Conv2d(64, 21, kernel_size=1)  # VOCSegmentation has 21 classes

    def forward(self, x):
        enc1 = self.enc1(x)
        enc2 = self.enc2(self.pool1(enc1))
        enc3 = self.enc3(self.pool2(enc2))
        enc4 = self.enc4(self.pool3(enc3))
        enc5 = self.enc5(self.pool4(enc4))

        dec4 = self.up4(enc5)
        dec4 = torch.cat((enc4, dec4), dim=1)
        dec4 = self.dec4(dec4)

        dec3 = self.up3(dec4)
        dec3 = torch.cat((enc3, dec3), dim=1)
        dec3 = self.dec3(dec3)

        dec2 = self.up2(dec3)
        dec2 = torch.cat((enc2, dec2), dim=1)
        dec2 = self.dec2(dec2)

        dec1 = self.up1(dec2)
        dec1 = torch.cat((enc1, dec1), dim=1)
        dec1 = self.dec1(dec1)

        return self.out_conv(dec1)

# Dataset Class
class VOCDataset(Dataset):
    def __init__(self, root, year, image_set, transform=None):
        self.dataset = VOCSegmentation(root=root, year=year, image_set=image_set, download=True)
        self.transform = transform

    def __len__(self):
        return len(self.dataset)

    def __getitem__(self, idx):
        image, mask = self.dataset[idx]
        mask = np.array(mask)

        if self.transform:
            augmented = self.transform(image=np.array(image), mask=mask)
            image = augmented['image']
            mask = augmented['mask']

        return image, mask

# Data Augmentation
transform = A.Compose([
    A.Resize(256, 256),
    A.HorizontalFlip(p=0.5),
    A.VerticalFlip(p=0.5),
    A.RandomRotate90(p=0.5),
    A.Transpose(p=0.5),
    # A.ShiftScaleRotate(shift_limit=0.1, scale_limit=0.1, rotate_limit=45, p=0.5),
    # A.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)),
    ToTensorV2()
])

# Load data
root = './data'
train_dataset = VOCDataset(root=root, year='2012', image_set='train', transform=transform)
train_dataloader = DataLoader(train_dataset, batch_size=128, shuffle=True)
val_dataset = VOCDataset(root=root, year='2012', image_set='val', transform=transform)
val_dataloader = DataLoader(val_dataset, batch_size=8, shuffle=False)

# Training and Evaluation
def train_model(model, dataloader, criterion, optimizer, num_epochs=25):
    model.train()
    for epoch in range(num_epochs):
        epoch_loss = 0
        for images, masks in dataloader:
            images = images.to(device)
            masks = masks.to(device).to(torch.long)
            outputs = model(images)
            loss = criterion(outputs, masks)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            epoch_loss += loss.item()

        print(f"Epoch {epoch+1}/{num_epochs}, Loss: {epoch_loss/len(dataloader)}")
    return model

def evaluate_model(model, dataloader):
    model.eval()
    iou_scores = []
    with torch.no_grad():
        for images, masks in dataloader:
            images = images.to(device)
            masks = masks.to(device)
            outputs = model(images)
            preds = torch.argmax(outputs, dim=1)
            iou = jaccard_score(masks.cpu().numpy().flatten(), preds.cpu().numpy().flatten(), average='macro')
            iou_scores.append(iou)
    return np.mean(iou_scores)

# Set device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Initialize model, criterion, and optimizer
model = UNet().to(device)
criterion = nn.CrossEntropyLoss(ignore_index=255)
optimizer = optim.Adam(model.parameters(), lr=1e-3)

# Train the model
model = train_model(model, train_dataloader, criterion, optimizer, num_epochs=25)

# Evaluate the model
iou_score = evaluate_model(model, val_dataloader)
print(f"Mean IoU Score: {iou_score}")


Downloading http://host.robots.ox.ac.uk/pascal/VOC/voc2012/VOCtrainval_11-May-2012.tar to ./data/VOCtrainval_11-May-2012.tar


100%|██████████| 1999639040/1999639040 [02:01<00:00, 16418615.99it/s]


Extracting ./data/VOCtrainval_11-May-2012.tar to ./data
Using downloaded and verified file: ./data/VOCtrainval_11-May-2012.tar
Extracting ./data/VOCtrainval_11-May-2012.tar to ./data
Epoch 1/25, Loss: 2.333742539087931
Epoch 2/25, Loss: 1.8301678036080031
Epoch 3/25, Loss: 1.5431009654790326
Epoch 4/25, Loss: 1.3861430749867132
Epoch 5/25, Loss: 1.3020874528937
Epoch 6/25, Loss: 1.2502457457161993
Epoch 7/25, Loss: 1.2229775877598206
Epoch 8/25, Loss: 1.2068209856585728
Epoch 9/25, Loss: 1.188135197579535
Epoch 10/25, Loss: 1.183849547404409
Epoch 11/25, Loss: 1.167995563947438
Epoch 12/25, Loss: 1.1599865327767336
Epoch 13/25, Loss: 1.154297649860382
Epoch 14/25, Loss: 1.1401966745084753
Epoch 15/25, Loss: 1.1336796896053793
Epoch 16/25, Loss: 1.1175116069329893
Epoch 17/25, Loss: 1.113813193769403
Epoch 18/25, Loss: 1.1084282616448533
Epoch 19/25, Loss: 1.104497784473857
Epoch 20/25, Loss: 1.0911862306256113
Epoch 21/25, Loss: 1.0884979126231917
Epoch 22/25, Loss: 1.076053762045063
E

In [2]:
gradient_norms = {}
for name, param in model.named_parameters():
    if param.requires_grad:
        gradient_norms[name] = param.grad.norm().item()

# Print gradient norms for each layer
for name, norm in gradient_norms.items():
    print(f"Layer: {name} | Gradient norm: {norm}")

Layer: enc1.0.0.weight | Gradient norm: 1.1967054605484009
Layer: enc1.0.0.bias | Gradient norm: 4.225205785246544e-08
Layer: enc1.0.1.weight | Gradient norm: 0.03182877227663994
Layer: enc1.0.1.bias | Gradient norm: 0.03238758444786072
Layer: enc1.1.0.weight | Gradient norm: 0.7465816140174866
Layer: enc1.1.0.bias | Gradient norm: 8.278336238731754e-09
Layer: enc1.1.1.weight | Gradient norm: 0.030847996473312378
Layer: enc1.1.1.bias | Gradient norm: 0.02587631717324257
Layer: enc2.0.0.weight | Gradient norm: 0.4285193979740143
Layer: enc2.0.0.bias | Gradient norm: 4.80527440060996e-09
Layer: enc2.0.1.weight | Gradient norm: 0.01319054514169693
Layer: enc2.0.1.bias | Gradient norm: 0.010793798603117466
Layer: enc2.1.0.weight | Gradient norm: 0.2755362391471863
Layer: enc2.1.0.bias | Gradient norm: 1.9681229979084947e-09
Layer: enc2.1.1.weight | Gradient norm: 0.010840913280844688
Layer: enc2.1.1.bias | Gradient norm: 0.006416734773665667
Layer: enc3.0.0.weight | Gradient norm: 0.153108