# Unet Model

In [1]:
%pip install einops timm imutils torchvision lightning torchmetrics 

Collecting einops
  Downloading einops-0.8.0-py3-none-any.whl.metadata (12 kB)
Collecting imutils
  Downloading imutils-0.5.4.tar.gz (17 kB)
  Preparing metadata (setup.py) ... [?25l- \ done
Collecting lightning
  Downloading lightning-2.3.3-py3-none-any.whl.metadata (35 kB)
Downloading einops-0.8.0-py3-none-any.whl (43 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m43.2/43.2 kB[0m [31m2.0 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading lightning-2.3.3-py3-none-any.whl (808 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m808.5/808.5 kB[0m [31m5.9 MB/s[0m eta [36m0:00:00[0m
[?25hBuilding wheels for collected packages: imutils
  Building wheel for imutils (setup.py) ... [?25l- \ | done
[?25h  Created wheel for imutils: filename=imutils-0.5.4-py3-none-any.whl size=25834 sha256=2a774827ee24d68a88b8d5bb2e10983b4d97dba491658dbbb00835807f0155ac
  Stored in directory: /root/.cache/pip/wheels/85/cf/3a/e265e975a1e7c7e54eb369

In [2]:
from lightning.pytorch.loggers import CSVLogger
import lightning as L
try: 
    from utils import SegModule, SegDM
except:
    import sys
    sys.path.append("/kaggle/input/segmentation-util")
    from utils import SegModule, SegDM

In [3]:
import os
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F

class EncodeBlock(nn.Module):
    """Apply 2 successive layers of 3x3 convolutions and ReLU activation."""

    def __init__(self, in_channels, out_channels):
        super().__init__()
        self.encode = nn.Sequential(
            nn.Conv2d(in_channels, out_channels, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.Conv2d(out_channels, out_channels, kernel_size=3, padding=1),
            nn.ReLU(),
        )

    def forward(self, x, maxpool_first=False):
        if maxpool_first:
            x = F.max_pool2d(x, kernel_size=2, stride=2)
        return self.encode(x)

class DecodeBlock(nn.Module):
    """Upsample using 2x2 transposed convolution layers, concatenate with skip
    connections, and then apply 2 successive layers of 3x3 convolutions and ReLU."""

    def __init__(self, in_channels, out_channels):
        super().__init__()
        ## Upsample need to halve the feature maps to have the same shape 
        ## with skip connection 
        self.upsample = nn.ConvTranspose2d(
            in_channels,
            in_channels // 2,
            kernel_size=2,
            stride=2,
        )
        self.decode = nn.Sequential(
            nn.Conv2d(in_channels, out_channels, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.Conv2d(out_channels, out_channels, kernel_size=3, padding=1),
            nn.ReLU(),
        )

    def forward(self, x, skip):
        x = self.upsample(x)
        # Crop skip tensor (based on
        # https://github.com/milesial/Pytorch-UNet/blob/master/unet/unet_parts.py)
        height_diff = skip.size()[2] - x.size()[2]
        width_diff = skip.size()[3] - x.size()[3]
        pad = [
            width_diff // 2, width_diff - width_diff // 2, # Pad last dimension
            height_diff // 2, height_diff - height_diff // 2, # Pad second-to-last dimension
        ]
        x = F.pad(x, pad)
        x = torch.cat([x, skip], dim=1)
        return self.decode(x)
class SimpleUNet(nn.Module):
    """Basic U-Net model architecture."""

    def __init__(self, in_channels=3, n_classes=2):
        super().__init__()

        # Create encoder (channel dimensions based on original U-Net paper)
        self.encode_1 = EncodeBlock(in_channels, 64)
        self.encode_2 = EncodeBlock(64, 128)
        self.encode_3 = EncodeBlock(128, 256)
        self.encode_4 = EncodeBlock(256, 512)

        self.encode_last = nn.Sequential(
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.Conv2d(512, 1024, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.Conv2d(1024, 1024, kernel_size=3, padding=1),
            nn.ReLU(),
        )

        # Create decoder
        self.decode_1 = DecodeBlock(1024, 512)
        self.decode_2 = DecodeBlock(512, 256)
        self.decode_3 = DecodeBlock(256, 128)
        self.decode_4 = DecodeBlock(128, 64)

        # Original paper used a 1x1 convolution to map features to n_classes
        self.decode_last = nn.Conv2d(64, n_classes, kernel_size=1) #1x1 Conv2d


    def forward(self, x):
        # Encoding
        x1 = self.encode_1(x)
        x2 = self.encode_2(x1, maxpool_first=True)
        x3 = self.encode_3(x2, maxpool_first=True)
        x4 = self.encode_4(x3, maxpool_first=True)
        x = self.encode_last(x4)
        # Decoding
        x = self.decode_1(x, skip=x4)
        x = self.decode_2(x, skip=x3)
        x = self.decode_3(x, skip=x2)
        x = self.decode_4(x, skip=x1)
        outputs = self.decode_last(x)
        return outputs


In [4]:
FAST = True
dataset_paths = dict(
    darwin=dict(
        img_dir='/kaggle/input/xray-segmentation/Darwin/Darwin/img', 
        mask_dir='/kaggle/input/xray-segmentation/Darwin/Darwin/mask'
    ),
    shenzen=dict(
        img_dir='/kaggle/input/xray-segmentation/Shenzhen/Shenzhen/img', 
        mask_dir='/kaggle/input/xray-segmentation/Shenzhen/Shenzhen/mask'
    ),
    covid=dict(
        img_dir='/kaggle/input/xray-segmentation/Covid19 Radiography/COVID-19_Radiography_Dataset/COVID/images', 
        mask_dir='/kaggle/input/xray-segmentation/Covid19 Radiography/COVID-19_Radiography_Dataset/COVID/masks'
    ),
)
local_paths = dict(
    darwin=dict(
        img_dir='./datasets/Darwin/img', 
        mask_dir='./datasets/Darwin/mask'
    ),
    shenzen=dict(
        img_dir='./datasets/Shenzhen/img', 
        mask_dir='./datasets/Shenzhen/mask'
    ),
    covid=dict(
        img_dir='./datasets/COVID-19_Radiography_Dataset/COVID/images', 
        mask_dir='./datasets/COVID-19_Radiography_Dataset/COVID/masks'
    ),
)
def train_model(model_name, mask_dir, img_dir, model, max_epochs=20):
    n_classes = 2
    print(mask_dir)
    print(img_dir)
    data_module = SegDM(mask_dir=mask_dir, img_dir=img_dir, batch_size=2)

    logger = CSVLogger("logs", name=model_name)
    module = SegModule(model, num_classes=n_classes, result_path=f'./{model_name}.csv')

    trainer = L.Trainer(fast_dev_run=FAST,
                        devices=[0,1], accelerator='gpu',
                        logger=logger, max_epochs=max_epochs)
    trainer.fit(module, data_module)
    trainer.test(module, data_module)
    return model


# Shenzen

In [5]:
# shenzen = train_model(
#     model_name='shenzen_unet',
#     model=SimpleUNet(),
#     **dataset_paths['shenzen'],
# )

# Darwin

In [6]:
darwin = train_model(
    model_name='darwin_unet',
    model=SimpleUNet(),
    **dataset_paths['darwin'],
)

/kaggle/input/xray-segmentation/Darwin/Darwin/mask
/kaggle/input/xray-segmentation/Darwin/Darwin/img


INFO: GPU available: True (cuda), used: True
INFO: TPU available: False, using: 0 TPU cores
INFO: HPU available: False, using: 0 HPUs
INFO: Running in `fast_dev_run` mode: will run the requested loop using 1 batch(es). Logging and checkpointing is suppressed.
INFO: Initializing distributed: GLOBAL_RANK: 0, MEMBER: 1/2
INFO: Initializing distributed: GLOBAL_RANK: 1, MEMBER: 2/2
INFO: ----------------------------------------------------------------------------------------------------
distributed_backend=nccl
All distributed processes registered. Starting with 2 processes
----------------------------------------------------------------------------------------------------



3434 examples in the training set...3434 examples in the training set...

1145 examples in the validation set...1145 examples in the validation set...



INFO: LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]
INFO: LOCAL_RANK: 1 - CUDA_VISIBLE_DEVICES: [0,1]
INFO: 
  | Name      | Type             | Params | Mode 
-------------------------------------------------------
0 | model     | SimpleUNet       | 31.0 M | train
1 | loss_fn   | CrossEntropyLoss | 0      | train
2 | f1        | BinaryF1Score    | 0      | train
3 | accuracy  | BinaryAccuracy   | 0      | train
4 | recall    | BinaryRecall     | 0      | train
5 | precision | BinaryPrecision  | 0      | train
6 | mean_iou  | MeanIoU          | 0      | train
-------------------------------------------------------
31.0 M    Trainable params
0         Non-trainable params
31.0 M    Total params
124.127   Total estimated model params size (MB)


Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

INFO: `Trainer.fit` stopped: `max_steps=1` reached.
INFO: Initializing distributed: GLOBAL_RANK: 0, MEMBER: 1/2
INFO: Initializing distributed: GLOBAL_RANK: 1, MEMBER: 2/2
INFO: ----------------------------------------------------------------------------------------------------
distributed_backend=nccl
All distributed processes registered. Starting with 2 processes
----------------------------------------------------------------------------------------------------



1527 examples in the test set...1527 examples in the test set...



INFO: LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]
INFO: LOCAL_RANK: 1 - CUDA_VISIBLE_DEVICES: [0,1]
/opt/conda/lib/python3.10/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:215: Using `DistributedSampler` with the dataloaders. During `trainer.test()`, it is recommended to use `Trainer(devices=1, num_nodes=1)` to ensure each sample/batch gets evaluated exactly once. Otherwise, multi-device settings use `DistributedSampler` that replicates some samples to make sure all devices have same batch size in case of uneven inputs.


Testing: |          | 0/? [00:00<?, ?it/s]

# Covid

In [7]:
covid = train_model(
    model_name='covid_unet',
    model=SimpleUNet(),
    **dataset_paths['covid'],
)

/kaggle/input/xray-segmentation/Covid19 Radiography/COVID-19_Radiography_Dataset/COVID/masks
/kaggle/input/xray-segmentation/Covid19 Radiography/COVID-19_Radiography_Dataset/COVID/images


INFO: GPU available: True (cuda), used: True
INFO: TPU available: False, using: 0 TPU cores
INFO: HPU available: False, using: 0 HPUs
INFO: Running in `fast_dev_run` mode: will run the requested loop using 1 batch(es). Logging and checkpointing is suppressed.
INFO: Initializing distributed: GLOBAL_RANK: 0, MEMBER: 1/2
INFO: Initializing distributed: GLOBAL_RANK: 1, MEMBER: 2/2
INFO: ----------------------------------------------------------------------------------------------------
distributed_backend=nccl
All distributed processes registered. Starting with 2 processes
----------------------------------------------------------------------------------------------------



2034 examples in the training set...2034 examples in the training set...

678 examples in the validation set...
678 examples in the validation set...


INFO: LOCAL_RANK: 1 - CUDA_VISIBLE_DEVICES: [0,1]
INFO: LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]
INFO: 
  | Name      | Type             | Params | Mode 
-------------------------------------------------------
0 | model     | SimpleUNet       | 31.0 M | train
1 | loss_fn   | CrossEntropyLoss | 0      | train
2 | f1        | BinaryF1Score    | 0      | train
3 | accuracy  | BinaryAccuracy   | 0      | train
4 | recall    | BinaryRecall     | 0      | train
5 | precision | BinaryPrecision  | 0      | train
6 | mean_iou  | MeanIoU          | 0      | train
-------------------------------------------------------
31.0 M    Trainable params
0         Non-trainable params
31.0 M    Total params
124.127   Total estimated model params size (MB)


Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

INFO: `Trainer.fit` stopped: `max_steps=1` reached.
INFO: Initializing distributed: GLOBAL_RANK: 0, MEMBER: 1/2
INFO: Initializing distributed: GLOBAL_RANK: 1, MEMBER: 2/2
INFO: ----------------------------------------------------------------------------------------------------
distributed_backend=nccl
All distributed processes registered. Starting with 2 processes
----------------------------------------------------------------------------------------------------



904 examples in the test set...904 examples in the test set...



INFO: LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]
INFO: LOCAL_RANK: 1 - CUDA_VISIBLE_DEVICES: [0,1]
/opt/conda/lib/python3.10/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:215: Using `DistributedSampler` with the dataloaders. During `trainer.test()`, it is recommended to use `Trainer(devices=1, num_nodes=1)` to ensure each sample/batch gets evaluated exactly once. Otherwise, multi-device settings use `DistributedSampler` that replicates some samples to make sure all devices have same batch size in case of uneven inputs.


Testing: |          | 0/? [00:00<?, ?it/s]