In [12]:
import os

import albumentations as A
from albumentations.pytorch import ToTensorV2

from PIL import Image
import numpy as np


from torch.utils.data import Dataset
from torchvision.transforms import Compose, ToTensor, Normalize

In [13]:
DATA_PATH = "data/splitted/deepglobe-2018-dataset"
TRAIN_PATH = "train/"
VAL_PATH = "val/"

ENCODER_NAME = "resnet34"
ENCODER_WEIGHTS = "imagenet"

# hyper parameters
MAX_EPOCHS = 50
GPUS = 1
BATCH_SIZE = 16

## Dataset Preparation
1. Download and Explore the Dataset:  
- Download the dataset from the provided Codalab competition link.
- Explore the data structure, including image dimensions, file formats, and corresponding labels.

2. Data Augmentation:  
- Use augmentations like rotations, flips, and brightness adjustments to increase the dataset's variety. Leverage libraries like `torchvision.transforms` or `Albumentations`.

3. Dataset Class:  
- Create a PyTorch dataset class that loads images and masks, applies augmentations, and prepares data for model training:

In [14]:
class HaulRoadDataset(Dataset):
    def __init__(self, images_dir, masks_dir, transform=None):
        """
        Args:
            images_dir (str): Path to the directory containing images.
            masks_dir (str): Path to the directory containing masks.
            transform (callable, optional): Albumentations transformations.
        """
        self.images_dir = images_dir
        self.masks_dir = masks_dir
        self.transform = transform
        self.images = sorted(os.listdir(images_dir))
        self.masks = sorted(os.listdir(masks_dir))

    def __len__(self):
        return len(self.images)

    def __getitem__(self, idx):
        # Load image and mask
        img_path = os.path.join(self.images_dir, self.images[idx])
        mask_path = os.path.join(self.masks_dir, self.masks[idx])
        image = np.array(Image.open(img_path).convert("RGB"))
        mask = np.array(Image.open(mask_path).convert("L"))
        mask = (mask > 127).astype(np.float32)

        # Apply Albumentations transformations
        if self.transform:
            augmented = self.transform(image=image, mask=mask)
            image = augmented['image']
            mask = augmented['mask']

        # Add a channel dimension to the mask
        mask = mask[None, ...]  # Shape: [1, H, W]

        return image, mask



## Model Selection
1. Backbone Model:  
Use a segmentation model like U-Net, DeepLabV3, or SegFormer. Libraries like segmentation_models_pytorch can help simplify this step.

2. Customize for Road Segmentation:  
Choose an appropriate backbone (e.g., ResNet, EfficientNet) and adjust output classes for binary segmentation (road vs. background).

3. PyTorch Lightning Module:  
Wrap your model into a Lightning module for better structure:

In [15]:
import pytorch_lightning as pl
import torch.nn as nn
import torch.optim as optim
from torchmetrics import MeanMetric


class HaulRoadSegmentationModel(pl.LightningModule):
    def __init__(self, model, lr=1e-3):
        super().__init__()
        self.model = model
        self.loss_fn = nn.BCEWithLogitsLoss()
        self.lr = lr
        self.train_loss = MeanMetric()
        self.val_loss = MeanMetric()

    def forward(self, x):
        return self.model(x)

    def training_step(self, batch, batch_idx):
        images, masks = batch
        images = images.float()
        masks = masks.float()
        outputs = self(images)  # Forward pass
        loss = self.loss_fn(outputs, masks)
        self.train_loss.update(loss)
        self.log("train_loss", self.train_loss, on_step=False, on_epoch=True)
        return loss

    def validation_step(self, batch, batch_idx):
        images, masks = batch
        images = images.float()
        masks = masks.float()
        outputs = self(images)
        loss = self.loss_fn(outputs, masks)
        self.val_loss.update(loss)
        self.log("val_loss", self.val_loss, on_step=False, on_epoch=True)

    def configure_optimizers(self):
        return optim.Adam(self.parameters(), lr=self.lr)


In [16]:
from scipy.ndimage import label

def calculate_road_size(mask, pixel_to_meter_ratio):
    labeled_array, num_features = label(mask)
    road_sizes = []
    for i in range(1, num_features + 1):
        component = (labeled_array == i).astype(np.uint8)
        road_size = component.sum() * pixel_to_meter_ratio**2
        road_sizes.append(road_size)
    return road_sizes


## Training

1. Data Loaders:  
- Create PyTorch data loaders for train, validation, and test splits.

2. Training Loop:  
- Use PyTorch Lightning’s Trainer to handle the training loop and evaluation:

In [17]:
import segmentation_models_pytorch as smp

# Define the base segmentation model
base_model = smp.Unet(
    encoder_name=ENCODER_NAME,  # Choose your backbone
    encoder_weights=ENCODER_WEIGHTS,  # Pre-trained on ImageNet
    in_channels=3,  # Input channels (e.g., RGB images)
    classes=1,  # Output channels (binary segmentation)
)

In [18]:
# Define transformations
transform = A.Compose([
    A.Resize(256, 256),  # Resize images and masks
    A.HorizontalFlip(p=0.5),  # Random horizontal flip
    A.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)),  # Normalize
    ToTensorV2()  # Convert to PyTorch tensors
])


In [19]:
from torch.utils.data import DataLoader
import os

class HaulRoadDataModule(pl.LightningDataModule):
    def __init__(self, train_dir, val_dir, batch_size=16, transform=None):
        super().__init__()
        self.train_dir = train_dir
        self.val_dir = val_dir
        self.batch_size = batch_size
        self.transform = transform

    def setup(self, stage=None):
        self.train_dataset = HaulRoadDataset(
            images_dir=os.path.join(self.train_dir, "images"),
            masks_dir=os.path.join(self.train_dir, "masks"),
            transform=self.transform,
        )
        self.val_dataset = HaulRoadDataset(
            images_dir=os.path.join(self.val_dir, "images"),
            masks_dir=os.path.join(self.val_dir, "masks"),
            transform=self.transform,
        )

    def train_dataloader(self):
        return DataLoader(self.train_dataset, batch_size=self.batch_size, shuffle=True)

    def val_dataloader(self):
        return DataLoader(self.val_dataset, batch_size=self.batch_size, shuffle=False)


In [20]:
# Create dataset instances
train_dataset = HaulRoadDataset(
    images_dir=os.path.join(DATA_PATH, TRAIN_PATH, "images/"),
    masks_dir=os.path.join(DATA_PATH, TRAIN_PATH, "masks/"),
    transform=transform
)
val_dataset = HaulRoadDataset(
    images_dir=os.path.join(DATA_PATH, VAL_PATH, "images/"),
    masks_dir=os.path.join(DATA_PATH, VAL_PATH, "masks/"),
    transform=transform
)


In [21]:
from pytorch_lightning import Trainer
from pytorch_lightning.callbacks import ModelCheckpoint, EarlyStopping

# Define paths
train_dir = os.path.join(DATA_PATH, TRAIN_PATH)
val_dir = os.path.join(DATA_PATH, VAL_PATH)

# Initialize data module
data_module = HaulRoadDataModule(
    train_dir=train_dir,
    val_dir=val_dir,
    batch_size=BATCH_SIZE,
    transform=transform
)

# # Define model (replace UNet with your custom architecture if applicable)
# from torchvision.models.segmentation import fcn_resnet50
# model = fcn_resnet50(pretrained=False, num_classes=1)

# Lightning model
lit_model = HaulRoadSegmentationModel(model=base_model)

# Callbacks
checkpoint_callback = ModelCheckpoint(monitor="val_loss", mode="min")
early_stopping_callback = EarlyStopping(monitor="val_loss", patience=5)

# Trainer
trainer = Trainer(
    max_epochs=MAX_EPOCHS,
    callbacks=[checkpoint_callback, early_stopping_callback],
    accelerator="auto",
    devices=GPUS
)

# Train
trainer.fit(lit_model, datamodule=data_module)


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name       | Type              | Params | Mode 
---------------------------------------------------------
0 | model      | Unet              | 24.4 M | train
1 | loss_fn    | BCEWithLogitsLoss | 0      | train
2 | train_loss | MeanMetric        | 0      | train
3 | val_loss   | MeanMetric        | 0      | train
---------------------------------------------------------
24.4 M    Trainable params
0         Non-trainable params
24.4 M    Total params
97.745    Total estimated model params size (MB)


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

/home/asrulsibaoel/miniconda3/lib/python3.12/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:424: The 'val_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=3` in the `DataLoader` to improve performance.
/home/asrulsibaoel/miniconda3/lib/python3.12/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:424: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=3` in the `DataLoader` to improve performance.
/home/asrulsibaoel/miniconda3/lib/python3.12/site-packages/pytorch_lightning/loops/fit_loop.py:298: The number of training batches (41) is smaller than the logging interval Trainer(log_every_n_steps=50). Set a lower value for log_every_n_steps if you want to see logs for the training epoch.


Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

In [22]:
trainer

<pytorch_lightning.trainer.trainer.Trainer at 0x7fb454612450>