In [1]:
import torch

num_gpus = torch.cuda.device_count()
num_cpus = torch.get_num_threads()

print(f"Number of available CPU cores: {num_cpus}")
print(f"Number of available GPUs: {num_gpus}")

Number of available CPU cores: 1
Number of available GPUs: 1


In [None]:
!git clone https://github.com/aliabbasi2000/PIDNet.git
%cd /content/PIDNet/

In [None]:
!wget https://zenodo.org/record/5706578/files/Train.zip
!wget https://zenodo.org/record/5706578/files/Val.zip
!unzip Train.zip -d ./LoveDA
!unzip Val.zip -d ./LoveDA

In [None]:
!wget --content-disposition "https://drive.usercontent.google.com/u/0/uc?id=1hIBp_8maRr60-B3PF0NVtaA6TYBvO4y-&export=download"
!mv PIDNet_S_ImageNet.pth.tar /content/PIDNet/pretrained_models/imagenet

In [5]:
import os
from torch.utils.data import Dataset
from PIL import Image
import numpy as np
from albumentations import Compose, Normalize, Resize
from albumentations.pytorch import ToTensorV2
import cv2

class LoveDADataset(Dataset):
    def __init__(self, root, split='train', region='urban', transform=None):
        self.root = root
        self.split = split
        self.region = region
        self.image_dir = os.path.join(root, split, region, 'images_png')
        self.mask_dir = os.path.join(root, split, region, 'masks_png')
        self.images = sorted(os.listdir(self.image_dir))
        self.masks = sorted(os.listdir(self.mask_dir))
        self.transform = transform

    def __len__(self):
        return len(self.images)

    def __getitem__(self, idx):
      image_path = os.path.join(self.image_dir, self.images[idx])
      mask_path = os.path.join(self.mask_dir, self.masks[idx])

      # Load the image and mask
      image = cv2.imread(image_path)
      image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)  # Convert BGR to RGB
      mask = cv2.imread(mask_path, cv2.IMREAD_GRAYSCALE)

      # Convert mask to tensor and shift labels
      mask = np.array(mask, dtype=np.int64) - 1  # Shift labels to range [0, 6]
      mask = np.clip(mask, 0, num_classes - 1)  # Ensure no negative values

      # Apply transformations if specified
      if self.transform:
          augmented = self.transform(image=image, mask=mask)
          image = augmented["image"]
          mask = augmented["mask"]

      # Convert to PyTorch tensors
      image = image.clone().detach().float()  # HWC to CHW
      mask = mask.clone().detach().long()

      return image, mask


  check_for_updates()


In [10]:
from torch.utils.data import DataLoader
from albumentations import Compose, HorizontalFlip, RandomRotate90, RandomCrop, VerticalFlip, Normalize, OneOf, NoOp, GaussianBlur, RandomBrightnessContrast, MultiplicativeNoise
from torchvision.transforms import ToTensor
from torchvision import transforms

# Define transforms for training phase
from albumentations.pytorch import ToTensorV2

train_transform = Compose([
    Resize(512, 512),  # Resize both image and mask
    # Augmentation
    OneOf([
      HorizontalFlip(p=0.5),
      VerticalFlip(p=0.5),
      RandomRotate90(p=0.5),
      GaussianBlur(blur_limit=(3, 7), p=0.5),
      RandomBrightnessContrast(brightness_limit=0.2, contrast_limit=0.2, p=0.5),  # Adjust brightness and contrast
      MultiplicativeNoise(multiplier=(0.9, 1.1), p=0.5),

      NoOp()  # do no augmentation some times
    ], p=0.75),  # The entire block is applied with 75% probability

    Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225], always_apply=True),  # Normalize
    ToTensorV2()  # Convert to PyTorch tensors
])

# Define transforms for the evaluation phase

eval_transform = Compose([
    Resize(512, 512),  # Resize to match the input size of the model
    Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225], always_apply=True),  # Normalize with ImageNet stats
    ToTensorV2()  # Convert to PyTorch tensors
])

# Define dataset paths
data_root = '/content/PIDNet/LoveDA'


# Load datasets
train_dataset = LoveDADataset(root=data_root, split='Train', region='Urban', transform=train_transform)
val_dataset = LoveDADataset(root=data_root, split='Val', region='Rural', transform=eval_transform)


# Create DataLoaders
train_loader = DataLoader(train_dataset, batch_size=8, shuffle=True, num_workers=2)
val_loader = DataLoader(val_dataset, batch_size=8, shuffle=False, num_workers=2)



In [12]:
import torch
from models.pidnet import get_seg_model
import yaml

## get_pred ------------------------------------
#model = get_pred_model('pidnet_s', num_classes=7)

## get_seg ------------------------------------
# Convert the dictionary to a DotDict
class DotDict(dict):
    """A dictionary that supports attribute-style access."""
    def __getattr__(self, name):
        value = self.get(name)
        if isinstance(value, dict):
            return DotDict(value)
        return value

    def __setattr__(self, name, value):
        self[name] = value

# Load the YAML configuration
with open('/content/PIDNet/configs/loveda/pidnet_small_loveda.yaml', 'r') as f:
    cfg_dict = yaml.safe_load(f)

# Convert to DotDict for attribute-style access
cfg = DotDict(cfg_dict)

imgnet = 'imagenet' in cfg.MODEL.PRETRAINED

model = get_seg_model(cfg, imgnet_pretrained=imgnet)

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = model.to(device)




In [13]:
import torch.nn as nn

criterion = nn.CrossEntropyLoss(ignore_index=-1)

optimizer = torch.optim.Adam(model.parameters(), lr=0.0001, weight_decay=1e-4)


In [14]:
import torch.nn.functional as F

def calculate_iou(predictions, ground_truth, num_classes):
    classwise_iou = []
    for cls in range(num_classes):
        tp = ((predictions == cls) & (ground_truth == cls)).sum().item()
        fp = ((predictions == cls) & (ground_truth != cls)).sum().item()
        fn = ((predictions != cls) & (ground_truth == cls)).sum().item()

        if tp + fp + fn == 0:
            iou = float('nan')
        else:
            iou = tp / (tp + fp + fn)
        classwise_iou.append(iou)

    return classwise_iou

# Training
model.train()
num_epochs = 20
num_classes = 7
class_labels = ["background", "building", "road", "water", "barren", "forest", "agriculture"]

for epoch in range(num_epochs):
    train_loss = 0.0

    for images, masks in train_loader:
        images, masks = images.to(device), masks.to(device)

        # Forward pass
        outputs = model(images)

        # Assuming 'outputs' is a list and the desired output is the first element:
        outputs = outputs[0]  # Select the first element of the list

        # Resize masks to match outputs in size= ...
        masks = F.interpolate(masks.unsqueeze(1).float(), size=(outputs.shape[2], outputs.shape[3]), mode='nearest').squeeze(1).long()
        loss = criterion(outputs, masks)

        # Backward pass
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        train_loss += loss.item()

    train_loss /= len(train_loader)
    print(f"Epoch {epoch+1}/{num_epochs}, Training Loss: {train_loss:.4f}")
    torch.save(model.state_dict(), 'step3a_trained_on_urban_eval_on_rural.pth')


# Validation phase
model.eval()
val_loss = 0.0
val_classwise_iou = np.zeros(num_classes)
num_batches = 0

with torch.no_grad():
  for images, masks in val_loader:
    images, masks = images.to(device), masks.to(device)

    # Forward pass
    outputs = model(images)

    outputs = outputs[0]
    # Resize masks to match outputs size=?????
    masks = F.interpolate(masks.unsqueeze(1).float(), size=(outputs.shape[2], outputs.shape[3]), mode='nearest').squeeze(1).long() # Resize masks to match outputs
    loss = criterion(outputs, masks)
    val_loss += loss.item()

    # Compute class-wise IoU
    preds = torch.argmax(outputs, dim=1)
    batch_iou = calculate_iou(preds, masks, num_classes)
    val_classwise_iou += np.nan_to_num(batch_iou)  # Accumulate IoU for each class
    num_batches += 1

val_loss /= len(val_loader)
mean_classwise_iou = val_classwise_iou / num_batches
mean_iou = np.nanmean(mean_classwise_iou)  # Overall mIoU
print(f"Validation Loss: {val_loss:.4f}, Validation mIoU: {mean_iou:.4f}")
print("Class-wise IoU:")
for cls, label in enumerate(class_labels):
    print(f"  {label}: {mean_classwise_iou[cls]:.4f}")



Epoch 1/20, Training Loss: 2.9416
Epoch 2/20, Training Loss: 1.1593
Epoch 3/20, Training Loss: 0.9933
Epoch 4/20, Training Loss: 0.9188
Epoch 5/20, Training Loss: 0.8741
Epoch 6/20, Training Loss: 0.8405
Epoch 7/20, Training Loss: 0.8119
Epoch 8/20, Training Loss: 0.7856
Epoch 9/20, Training Loss: 0.7711
Epoch 10/20, Training Loss: 0.7543
Epoch 11/20, Training Loss: 0.7349
Epoch 12/20, Training Loss: 0.7246
Epoch 13/20, Training Loss: 0.7174
Epoch 14/20, Training Loss: 0.7083
Epoch 15/20, Training Loss: 0.6895
Epoch 16/20, Training Loss: 0.6813
Epoch 17/20, Training Loss: 0.6680
Epoch 18/20, Training Loss: 0.6676
Epoch 19/20, Training Loss: 0.6601
Epoch 20/20, Training Loss: 0.6448
Validation Loss: 1.9452, Validation mIoU: 0.2675
Class-wise IoU:
  background: 0.4959
  building: 0.3543
  road: 0.2131
  water: 0.2966
  barren: 0.0914
  forest: 0.1627
  agriculture: 0.2583


#Augmantation7: all Augs except GaussianBlur

Validation Loss: 1.9452, Validation mIoU: 0.2675

Class-wise IoU:
  background: 0.4959
  building: 0.3543
  road: 0.2131
  water: 0.2966
  barren: 0.0914
  forest: 0.1627
  agriculture: 0.2583

#Augmentation6: Only RandomRotate90

Validation Loss: 1.8232, Validation mIoU: 0.2637

Class-wise IoU:
  background: 0.4943
  building: 0.3063
  road: 0.2164
  water: 0.3013
  barren: 0.1157
  forest: 0.1566
  agriculture: 0.2550

#Augmentation5: Only GaussianBlur

Validation Loss: 1.9924, Validation mIoU: 0.2472

Class-wise IoU:
  background: 0.5000
  building: 0.3200
  road: 0.2273
  water: 0.3239
  barren: 0.0900
  forest: 0.0731
  agriculture: 0.1965

#Augmentation4: OneOf([HorizontalFlip(p=0.5), VerticalFlip(p=0.5), RandomRotate90(p=0.5), GaussianBlur(blur_limit=(3, 7), p=0.5),RandomBrightnessContras(brightness_limit=0.2, contrast_limit=0.2, p=0.5),MultiplicativeNoise(multiplier=(0.9, 1.1), p=0.5),NoOp()], p=0.75),

Epoch 1/20, Training Loss: 2.7239

Epoch 2/20, Training Loss: 1.1570

Epoch 3/20, Training Loss: 0.9961

Epoch 4/20, Training Loss: 0.9187

Epoch 5/20, Training Loss: 0.8634

Epoch 6/20, Training Loss: 0.8490

Epoch 7/20, Training Loss: 0.8249

Epoch 8/20, Training Loss: 0.7929

Epoch 9/20, Training Loss: 0.7793

Epoch 10/20, Training Loss: 0.7572

Epoch 11/20, Training Loss: 0.7410

Epoch 12/20, Training Loss: 0.7412

Epoch 13/20, Training Loss: 0.7206

Epoch 14/20, Training Loss: 0.7119

Epoch 15/20, Training Loss: 0.7008

Epoch 16/20, Training Loss: 0.6842

Epoch 17/20, Training Loss: 0.6835

Epoch 18/20, Training Loss: 0.6785

Epoch 19/20, Training Loss: 0.6655

Epoch 20/20, Training Loss: 0.6561

Validation Loss: 1.9382, Validation mIoU: 0.2736

Class-wise IoU:
  background: 0.5085
  building: 0.3271
  road: 0.2415
  water: 0.2559
  barren: 0.0853
  forest: 0.1348
  agriculture: 0.3621

# Augmentation3: RandomBrightnessContrast(brightness_limit=0.2, contrast_limit=0.2, p=0.5),MultiplicativeNoise(multiplier=(0.9, 1.1), p=0.5),

Validation Loss: 1.9632, Validation mIoU: 0.2383

Class-wise IoU:
  background: 0.4722
  building: 0.3635
  road: 0.1996
  water: 0.2604
  barren: 0.0809
  forest: 0.0923
  agriculture: 0.1994

# Augmentation2: OneOf([HorizontalFlip(p=0.5), VerticalFlip(p=0.5), RandomRotate90(p=0.5),GaussianBlur(blur_limit=(3, 7), p=0.5), NoOp()], p=0.75),


Epoch 1/20, Training Loss: 2.2140

Epoch 2/20, Training Loss: 1.0422

Epoch 3/20, Training Loss: 0.9286

Epoch 4/20, Training Loss: 0.8790

Epoch 5/20, Training Loss: 0.8348

Epoch 6/20, Training Loss: 0.8201

Epoch 7/20, Training Loss: 0.7918

Epoch 8/20, Training Loss: 0.7691

Epoch 9/20, Training Loss: 0.7515

Epoch 10/20, Training Loss: 0.7390

Epoch 11/20, Training Loss: 0.7260

Epoch 12/20, Training Loss: 0.7160

Epoch 13/20, Training Loss: 0.6964

Epoch 14/20, Training Loss: 0.7039

Epoch 15/20, Training Loss: 0.6896

Epoch 16/20, Training Loss: 0.6878

Epoch 17/20, Training Loss: 0.6713

Epoch 18/20, Training Loss: 0.6693

Epoch 19/20, Training Loss: 0.6601

Epoch 20/20, Training Loss: 0.6513

Validation Loss: 1.7113, Validation mIoU: 0.2669

Class-wise IoU:
  background: 0.4945
  building: 0.3915
  road: 0.2166
  water: 0.3269
  barren: 0.0921
  forest: 0.0876
  agriculture: 0.2590

# Augmentation1: Only RandomRotate90(p=0.5), GaussianBlur(blur_limit=(3, 7), p=0.5),

Validation Loss: 1.8494, Validation mIoU: 0.2513
Class-wise IoU:
  background: 0.4961
  building: 0.3070
  road: 0.2063
  water: 0.3012
  barren: 0.0636
  forest: 0.1090
  agriculture: 0.2762