# U-net Paper Replication

- Original Paper: https://arxiv.org/abs/1505.04597

In [1]:
!nvidia-smi

Thu May  2 20:44:02 2024       
+---------------------------------------------------------------------------------------+
| NVIDIA-SMI 545.23.05              Driver Version: 545.84       CUDA Version: 12.3     |
|-----------------------------------------+----------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |         Memory-Usage | GPU-Util  Compute M. |
|                                         |                      |               MIG M. |
|   0  NVIDIA GeForce RTX 4070 Ti     On  | 00000000:01:00.0  On |                  N/A |
|  0%   43C    P8              11W / 285W |   1032MiB / 12282MiB |      8%      Default |
|                                         |                      |                  N/A |
+-----------------------------------------+----------------------+----------------------+
                                                                    

In [2]:
import torch
import torchvision
from torchvision import transforms

print(f"torch version: {torch.__version__}")
print(f"torchvision version: {torchvision.__version__}")

torch version: 2.0.1+cu117
torchvision version: 0.15.2+cu117


In [3]:
import os
import sys
from pathlib import Path

sys.path.insert(0, str(Path(os.getcwd()).parent))

input_path = Path(os.getcwd()).parent / "data/VOC2012/JPEGImages"
target_path = Path(os.getcwd()).parent / "data/VOC2012/SegmentationClass"

In [4]:
device = "cuda" if torch.cuda.is_available() else "cpu"

print(f"Device: {device}")

Device: cuda


In [5]:
BATCH_SIZE = 16
IMAGE_SIZE = 512
TRAIN_SPLIT = 0.7
NUM_WORKERS = os.cpu_count()

SEED = 42

## 01. Data

In [31]:
import random

import src.data.transforms as transforms_custom
from src.data.dataset import DATASET_NAME
from src.data.dataloader import get_dataloaders


transform_train = transforms.Compose(
    [
        transforms_custom.Rescale(output_size=IMAGE_SIZE),
        transforms_custom.RandomCrop(output_size=IMAGE_SIZE),
        transforms_custom.RandomHorizontalFlip(p=0.5),
        transforms_custom.ToTensor(),
    ]
)

transform_test = transforms.Compose(
    [
        transforms_custom.Rescale(output_size=IMAGE_SIZE),
        transforms_custom.RandomCrop(output_size=IMAGE_SIZE),
        transforms_custom.ToTensor(),
    ]
)

# TODO: Test transform?
names = os.listdir(path=target_path)
random.seed(SEED)
random.shuffle(names)

train_names = names[: int(len(names) * TRAIN_SPLIT)]
test_names = names[int(len(names) * TRAIN_SPLIT) :]

train_dataloader, test_dataloader = get_dataloaders(
    dataset=DATASET_NAME.VOC,
    train_names=train_names,
    test_names=test_names,
    input_path=input_path,
    target_path=target_path,
    batch_size=BATCH_SIZE,
    num_workers=NUM_WORKERS,
    transform_train=transform_train,
    transform_test=transform_test,
)

## 02. Model

In [32]:
from src.models.unet.unet import UNet
from torchinfo import summary

model = UNet()

summary(
    model,
    input_size=(1, 3, IMAGE_SIZE, IMAGE_SIZE),
    verbose=0,
    col_names=["input_size", "output_size", "num_params", "trainable"],
    col_width=20,
    row_settings=["var_names"],
)

Layer (type (var_name))                       Input Shape          Output Shape         Param #              Trainable
UNet (UNet)                                   [1, 3, 512, 512]     [1, 1, 512, 512]     --                   True
├─Encoder (encoder)                           [1, 3, 512, 512]     [1, 512, 32, 32]     --                   True
│    └─ModuleList (layers)                    --                   --                   (recursive)          True
│    │    └─DoubleConv (0)                    [1, 3, 512, 512]     [1, 64, 512, 512]    38,848               True
│    └─MaxPool2d (pool)                       [1, 64, 512, 512]    [1, 64, 256, 256]    --                   --
│    └─ModuleList (layers)                    --                   --                   (recursive)          True
│    │    └─DoubleConv (1)                    [1, 64, 256, 256]    [1, 128, 256, 256]   221,696              True
│    └─MaxPool2d (pool)                       [1, 128, 256, 256]   [1, 128, 128, 128]

## 03. Train

In [33]:
from torch import nn

EPOCHS = 50

loss_fn = nn.BCEWithLogitsLoss()
optimizer = torch.optim.Adam(params=model.parameters(), lr=10e-4)

In [34]:
from src.models.train import train

train(
    model=model,
    train_dataloader=train_dataloader,
    test_dataloader=test_dataloader,
    loss_fn=None,
    optimizer=None,
    epochs=EPOCHS,
    device=device,
    writer=None,
)

  0%|          | 0/50 [00:00<?, ?it/s]


FileNotFoundError: Caught FileNotFoundError in DataLoader worker process 0.
Original Traceback (most recent call last):
  File "/home/geri/.venv/computer_vision/lib/python3.10/site-packages/torch/utils/data/_utils/worker.py", line 308, in _worker_loop
    data = fetcher.fetch(index)
  File "/home/geri/.venv/computer_vision/lib/python3.10/site-packages/torch/utils/data/_utils/fetch.py", line 51, in fetch
    data = [self.dataset[idx] for idx in possibly_batched_index]
  File "/home/geri/.venv/computer_vision/lib/python3.10/site-packages/torch/utils/data/_utils/fetch.py", line 51, in <listcomp>
    data = [self.dataset[idx] for idx in possibly_batched_index]
  File "/home/geri/work/unet_replication/src/data/dataset.py", line 27, in __getitem__
    try:
  File "/home/geri/.venv/computer_vision/lib/python3.10/site-packages/PIL/Image.py", line 3277, in open
    fp = builtins.open(filename, "rb")
FileNotFoundError: [Errno 2] No such file or directory: '/home/geri/work/unet_replication/data/VOC2012/JPEGImages/2011_000999.png'


In [None]:
data/VOC2012/JPEGImages/2008_005679.jpg