[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/annanasnas/semantic_segmentation-25/blob/main/DeepLabV2.ipynb)

In [None]:
import random
import numpy as np
import torch

def set_seed(seed):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)

set_seed(42)

In [None]:
REPO = "https://github.com/annanasnas/semantic_segmentation-25.git"
!git clone $REPO
%cd /content/semantic_segmentation-25
!pip install -q -r requirements.txt pyyaml

Cloning into 'semantic_segmentation-25'...
remote: Enumerating objects: 150, done.[K
remote: Counting objects: 100% (150/150), done.[K
remote: Compressing objects: 100% (102/102), done.[K
remote: Total 150 (delta 63), reused 117 (delta 34), pack-reused 0 (from 0)[K
Receiving objects: 100% (150/150), 304.32 KiB | 10.87 MiB/s, done.
Resolving deltas: 100% (63/63), done.
/content/semantic_segmentation-25
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m50.2/50.2 kB[0m [31m4.2 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m42.2/42.2 kB[0m [31m3.7 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
  Building wheel for fvcore (setup.py) ... [?25l[?25hdone
  Building wheel for iopath (setup.py) ... [?25l[?25hdone


## Config

In [None]:
import yaml

with open("configs/bisenet.yaml", "r", encoding="utf-8") as f:
    cfg = yaml.safe_load(f)

!python scripts/download_data.py

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
batch_size = cfg["train"]["batch_size"]
epochs = cfg["train"]["epochs"]
data_dir = cfg["data"]["root"]
learning_rate = cfg["train"]["lr"]
img_size = cfg["data"]["img_size"]
name = cfg["model"]["name"]

## DataLoaders

In [None]:
from datasets.cityscapes import CityScapes
from torch.utils.data import DataLoader
from torchvision import transforms


imagenet_mean = [0.485, 0.456, 0.406]
imagenet_std = [0.229, 0.224, 0.225]

image_transforms = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(mean=imagenet_mean, std=imagenet_std)
])

train_dataset = CityScapes(
    root_dir=data_dir,
    split="train",
    image_transform=image_transforms,
    image_size=img_size
)

val_dataset = CityScapes(
    root_dir=data_dir,
    split="val",
    image_transform=image_transforms,
    image_size=img_size
)

train_dataloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=2)
val_dataloader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False, num_workers=2)

## Training

In [None]:
from google.colab import drive
drive.mount("/content/drive")

Mounted at /content/drive


In [None]:
import torch.optim as optim
import torch.nn as nn
from models.bisenet.build_bisenet import BiSeNet
from scripts.train import train_model
from torch.amp import autocast, GradScaler
from scripts.checkpoint import Checkpoint
from pathlib import Path
import pandas as pd

model = BiSeNet(num_classes=19, context_path='resnet18')
optimizer = optim.SGD(model.optim_parameters(lr=learning_rate), momentum=0.9, weight_decay=5e-4)
criterion = nn.CrossEntropyLoss(ignore_index=255)
scaler = GradScaler()

model.to(device)

ckpt_dir = Path("/content/drive/MyDrive/semantic segmentation/checkpoints") / name
log_csv  = ckpt_dir / "log.csv"
ckpt_mgr = Checkpoint(ckpt_dir)
ckpt = Checkpoint(ckpt_dir)

best_path = ckpt_dir / "best.pth"
if best_path.exists():
    ckpt = torch.load(best_path, map_location="cpu", weights_only=False)
    model.load_state_dict(ckpt["model"])
    optimizer.load_state_dict(ckpt["optimizer"])
    scaler.load_state_dict(ckpt["scaler"])
    start_epoch = ckpt["epoch"]
    iteration = ckpt["iteration"]
    best_miou   = ckpt["best_miou"]
    df_prev = pd.read_csv(log_csv)
    metrics = df_prev.to_dict("list")
else:
    start_epoch = 0
    best_miou   = 0
    iteration = 0
    metrics = {"epoch": [], "train_loss": [], "val_loss": [], "val_miou": []}

max_iter = len(train_dataloader) * epochs
train_model(model, train_dataloader, val_dataloader,
            device, epochs, autocast, scaler,
            optimizer, criterion, learning_rate,
            iteration, max_iter,
            ckpt_mgr, start_epoch, best_miou,
            log_csv, metrics)

# Results

In [None]:
from scripts.utils import create_final_table, evaluate_miou
import warnings, logging

logging.getLogger("fvcore.nn.jit_analysis").setLevel(logging.ERROR)

model = BiSeNet(num_classes=19, context_path='resnet18')
df = create_final_table(model, name, device, (img_size, img_size*2), epochs)

best_model = torch.load(best_path, map_location=device)
model.load_state_dict(best_model["model"])
df["mIoU (%)"] = evaluate_miou(model, val_dataloader, device) * 100
print(df.to_markdown(index=False))

| Model               | Latency                                                                  | FLOPs   | Params   |   mIoU (%) |
|:--------------------|:-------------------------------------------------------------------------|:--------|:---------|-----------:|
| bisenet - 50 epochs | Mean latency: 0.02 +/- 0.01, Mean FPS: 68.38 +/- 13.14 frames per second | 25.8 G  | 12.6 M   |    54.3385 |


In [None]:
from scripts.utils import plot_log

plot_log(log_csv)