In [1]:
import torch

num_gpus = torch.cuda.device_count()
num_cpus = torch.get_num_threads()

print(f"Number of available CPU cores: {num_cpus}")
print(f"Number of available GPUs: {num_gpus}")

Number of available CPU cores: 1
Number of available GPUs: 1


In [None]:
!pip install torch torchvision albumentations

In [3]:
!git clone https://github.com/aliabbasi2000/PIDNet.git
%cd /content/PIDNet/

Cloning into 'PIDNet'...
remote: Enumerating objects: 585, done.[K
remote: Counting objects: 100% (27/27), done.[K
remote: Compressing objects: 100% (17/17), done.[K
remote: Total 585 (delta 11), reused 21 (delta 10), pack-reused 558 (from 1)[K
Receiving objects: 100% (585/585), 246.53 MiB | 22.94 MiB/s, done.
Resolving deltas: 100% (239/239), done.
/content/PIDNet


In [None]:
!wget https://zenodo.org/record/5706578/files/Train.zip
!wget https://zenodo.org/record/5706578/files/Val.zip
!unzip Train.zip -d ./LoveDA
!unzip Val.zip -d ./LoveDA

In [None]:
!wget --content-disposition "https://drive.usercontent.google.com/u/0/uc?id=1hIBp_8maRr60-B3PF0NVtaA6TYBvO4y-&export=download"
!mv PIDNet_S_ImageNet.pth.tar /content/PIDNet/pretrained_models/imagenet

In [15]:
import os
from torch.utils.data import Dataset
from PIL import Image
import numpy as np
from albumentations import Compose, Normalize, Resize
from albumentations.pytorch import ToTensorV2
import cv2

class LoveDADataset(Dataset):
    def __init__(self, root, split='train', region='urban', transform=None):
        self.root = root
        self.split = split
        self.region = region
        self.image_dir = os.path.join(root, split, region, 'images_png')
        self.mask_dir = os.path.join(root, split, region, 'masks_png')
        self.images = sorted(os.listdir(self.image_dir))
        self.masks = sorted(os.listdir(self.mask_dir))
        self.transform = transform

    def __len__(self):
        return len(self.images)

    def __getitem__(self, idx):
      image_path = os.path.join(self.image_dir, self.images[idx])
      mask_path = os.path.join(self.mask_dir, self.masks[idx])

      # Load the image and mask
      image = cv2.imread(image_path)
      image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)  # Convert BGR to RGB
      mask = cv2.imread(mask_path, cv2.IMREAD_GRAYSCALE)

      # Convert mask to tensor and shift labels
      mask = np.array(mask, dtype=np.int64) - 1  # Shift labels to range [0, 6]
      mask = np.clip(mask, 0, num_classes - 1)  # Ensure no negative values

      # Apply transformations if specified
      if self.transform:
          augmented = self.transform(image=image, mask=mask)
          image = augmented["image"]
          mask = augmented["mask"]

      # Convert to PyTorch tensors
      image = image.clone().detach().float()  # HWC to CHW
      mask = mask.clone().detach().long()

      return image, mask


In [16]:
from torch.utils.data import DataLoader
from albumentations import Compose, HorizontalFlip, RandomRotate90
from torchvision.transforms import ToTensor
from torchvision import transforms

# Define transforms for training phase
import albumentations as A
from albumentations.pytorch import ToTensorV2

train_transform = A.Compose([
    A.Resize(512, 512),  # Resize both image and mask
    #####    AUGMENTATION HERE      #######
    A.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),  # Normalize
    ToTensorV2()  # Convert to PyTorch tensors
])

# Define transforms for the evaluation phase

eval_transform = A.Compose([
    A.Resize(512, 512),  # Resize to match the input size of the model
    A.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),  # Normalize with ImageNet stats
    ToTensorV2()  # Convert to PyTorch tensors
])

# Define dataset paths
data_root = '/content/PIDNet/LoveDA'


# Load datasets
train_dataset = LoveDADataset(root=data_root, split='Train', region='Urban', transform=train_transform)
val_dataset = LoveDADataset(root=data_root, split='Val', region='Rural', transform=eval_transform)


# Create DataLoaders
train_loader = DataLoader(train_dataset, batch_size=8, shuffle=True, num_workers=2)
val_loader = DataLoader(val_dataset, batch_size=8, shuffle=False, num_workers=2)



In [17]:
import torch
from models.pidnet import get_seg_model
import yaml

## get_pred ------------------------------------
#model = get_pred_model('pidnet_s', num_classes=7)

## get_seg ------------------------------------
# Convert the dictionary to a DotDict
class DotDict(dict):
    """A dictionary that supports attribute-style access."""
    def __getattr__(self, name):
        value = self.get(name)
        if isinstance(value, dict):
            return DotDict(value)
        return value

    def __setattr__(self, name, value):
        self[name] = value

# Load the YAML configuration
with open('/content/PIDNet/configs/loveda/pidnet_small_loveda.yaml', 'r') as f:
    cfg_dict = yaml.safe_load(f)

# Convert to DotDict for attribute-style access
cfg = DotDict(cfg_dict)

imgnet = 'imagenet' in cfg.MODEL.PRETRAINED

model = get_seg_model(cfg, imgnet_pretrained=imgnet)

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = model.to(device)




In [18]:
import torch.nn as nn

criterion = nn.CrossEntropyLoss(ignore_index=-1)

optimizer = torch.optim.Adam(model.parameters(), lr=0.0001, weight_decay=1e-4)


In [19]:
import torch.nn.functional as F

def calculate_iou(predictions, ground_truth, num_classes):
    classwise_iou = []
    for cls in range(num_classes):
        tp = ((predictions == cls) & (ground_truth == cls)).sum().item()
        fp = ((predictions == cls) & (ground_truth != cls)).sum().item()
        fn = ((predictions != cls) & (ground_truth == cls)).sum().item()

        if tp + fp + fn == 0:
            iou = float('nan')
        else:
            iou = tp / (tp + fp + fn)
        classwise_iou.append(iou)

    return classwise_iou

# Training
model.train()
num_epochs = 20
num_classes = 7
class_labels = ["background", "building", "road", "water", "barren", "forest", "agriculture"]

for epoch in range(num_epochs):
    train_loss = 0.0

    for images, masks in train_loader:
        images, masks = images.to(device), masks.to(device)

        # Forward pass
        outputs = model(images)

        # Assuming 'outputs' is a list and the desired output is the first element:
        outputs = outputs[0]  # Select the first element of the list

        # Resize masks to match outputs in size= ...
        masks = F.interpolate(masks.unsqueeze(1).float(), size=(outputs.shape[2], outputs.shape[3]), mode='nearest').squeeze(1).long()
        loss = criterion(outputs, masks)

        # Backward pass
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        train_loss += loss.item()

    train_loss /= len(train_loader)
    print(f"Epoch {epoch+1}/{num_epochs}, Training Loss: {train_loss:.4f}")
    torch.save(model.state_dict(), 'step3a_trained_on_urban_eval_on_rural.pth')


# Validation phase
model.eval()
val_loss = 0.0
val_classwise_iou = np.zeros(num_classes)
num_batches = 0

with torch.no_grad():
  for images, masks in val_loader:
    images, masks = images.to(device), masks.to(device)

    # Forward pass
    outputs = model(images)

    outputs = outputs[0]
    # Resize masks to match outputs size=?????
    masks = F.interpolate(masks.unsqueeze(1).float(), size=(outputs.shape[2], outputs.shape[3]), mode='nearest').squeeze(1).long() # Resize masks to match outputs
    loss = criterion(outputs, masks)
    val_loss += loss.item()

    # Compute class-wise IoU
    preds = torch.argmax(outputs, dim=1)
    batch_iou = calculate_iou(preds, masks, num_classes)
    val_classwise_iou += np.nan_to_num(batch_iou)  # Accumulate IoU for each class
    num_batches += 1

val_loss /= len(val_loader)
mean_classwise_iou = val_classwise_iou / num_batches
mean_iou = np.nanmean(mean_classwise_iou)  # Overall mIoU
print(f"Validation Loss: {val_loss:.4f}, Validation mIoU: {mean_iou:.4f}")
print("Class-wise IoU:")
for cls, label in enumerate(class_labels):
    print(f"  {label}: {mean_classwise_iou[cls]:.4f}")


Epoch 1/20, Training Loss: 2.1118
Epoch 2/20, Training Loss: 1.0942
Epoch 3/20, Training Loss: 0.9358
Epoch 4/20, Training Loss: 0.8998
Epoch 5/20, Training Loss: 0.8287
Epoch 6/20, Training Loss: 0.7924
Epoch 7/20, Training Loss: 0.7597
Epoch 8/20, Training Loss: 0.7371
Epoch 9/20, Training Loss: 0.7148
Epoch 10/20, Training Loss: 0.6898
Epoch 11/20, Training Loss: 0.6714
Epoch 12/20, Training Loss: 0.6540
Epoch 13/20, Training Loss: 0.6419
Epoch 14/20, Training Loss: 0.6283
Epoch 15/20, Training Loss: 0.6084
Epoch 16/20, Training Loss: 0.5979
Epoch 17/20, Training Loss: 0.5935
Epoch 18/20, Training Loss: 0.5713
Epoch 19/20, Training Loss: 0.5564
Epoch 20/20, Training Loss: 0.5491
Validation Loss: 2.1376, Validation mIoU: 0.2480
Class-wise IoU:
  background: 0.5108
  building: 0.2952
  road: 0.2502
  water: 0.2469
  barren: 0.0663
  forest: 0.1004
  agriculture: 0.2661


lr=0.0001 with get_seg_model and no Augmentaton & Domain Shift(Trained of Urban, Val on Rural)

Epoch 1/20, Training Loss: 2.0786

Epoch 2/20, Training Loss: 1.0463

Epoch 3/20, Training Loss: 0.9087

Epoch 4/20, Training Loss: 0.8506

Epoch 5/20, Training Loss: 0.7955

Epoch 6/20, Training Loss: 0.7593

Epoch 7/20, Training Loss: 0.7413

Epoch 8/20, Training Loss: 0.7044

Epoch 9/20, Training Loss: 0.6886

Epoch 10/20, Training Loss: 0.6677

Epoch 11/20, Training Loss: 0.6507

Epoch 12/20, Training Loss: 0.6294

Epoch 13/20, Training Loss: 0.6180

Epoch 14/20, Training Loss: 0.6018

Epoch 15/20, Training Loss: 0.5814

Epoch 16/20, Training Loss: 0.5706

Epoch 17/20, Training Loss: 0.5618

Epoch 18/20, Training Loss: 0.5462

Epoch 19/20, Training Loss: 0.5390

Epoch 20/20, Training Loss: 0.5177

Validation Loss: 1.9366, Validation mIoU: 0.
2467
Class-wise IoU:
  background: 0.5047
  building: 0.2810
  road: 0.2278
  water: 0.2999
  barren: 0.0795
  forest: 0.0897
  agriculture: 0.2445