In [1]:
!pip install monai[all] torch torchvision matplotlib

In [7]:
import torch
from torchvision import datasets, transforms
from torch.utils.data import DataLoader

transform = transforms.Compose([
    transforms.Resize((128, 128)),  
    transforms.ToTensor()           
])

voc_dataset = datasets.VOCSegmentation(
    root="./data",
    year="2012",
    image_set="train",
    download=True,
    transform=transform,
    target_transform=transform
)

train_loader = DataLoader(voc_dataset, batch_size=16, shuffle=True)

print(f"Dataset size: {len(voc_dataset)} images")


Downloading http://host.robots.ox.ac.uk/pascal/VOC/voc2012/VOCtrainval_11-May-2012.tar to ./data/VOCtrainval_11-May-2012.tar


100%|██████████| 2.00G/2.00G [00:19<00:00, 101MB/s] 


Extracting ./data/VOCtrainval_11-May-2012.tar to ./data
Dataset size: 1464 images


In [8]:
import torch.nn as nn

class UNet(nn.Module):
    def __init__(self):
        super(UNet, self).__init__()
        self.encoder = nn.Sequential(
            nn.Conv2d(3, 64, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(64, 128, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(2)
        )
        self.middle = nn.Sequential(
            nn.Conv2d(128, 256, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(2)
        )
        self.decoder = nn.Sequential(
            nn.Conv2d(256, 128, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(128, 64, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.ConvTranspose2d(64, 64, kernel_size=2, stride=2),
            nn.ConvTranspose2d(64, 21, kernel_size=2, stride=2)  # 21 classes in Pascal VOC
        )

    def forward(self, x):
        enc_out = self.encoder(x)
        mid_out = self.middle(enc_out)
        dec_out = self.decoder(mid_out)
        return torch.sigmoid(dec_out)

model = UNet().cuda()


In [9]:
from torch.optim import Adam
from torch.nn import CrossEntropyLoss

criterion = CrossEntropyLoss()
optimizer = Adam(model.parameters(), lr=1e-3)


In [10]:
num_epochs = 10
model.train()

for epoch in range(num_epochs):
    epoch_loss = 0
    for batch in train_loader:
        images = batch[0].cuda()
        masks = batch[1].cuda()

        optimizer.zero_grad()
        outputs = model(images)

        loss = criterion(outputs, masks.squeeze(1).long())  # Pascal VOC has multi-class labels

        loss.backward()
        optimizer.step()

        epoch_loss += loss.item()

    print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {epoch_loss/len(train_loader):.4f}")


Epoch [1/10], Loss: 2.2983
Epoch [2/10], Loss: 2.2114
Epoch [3/10], Loss: 2.1939
Epoch [4/10], Loss: 2.1939
Epoch [5/10], Loss: 2.1938
Epoch [6/10], Loss: 2.1940
Epoch [7/10], Loss: 2.1939
Epoch [8/10], Loss: 2.1940
Epoch [9/10], Loss: 2.1939
Epoch [10/10], Loss: 2.1937
