[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/annanasnas/semantic_segmentation-25/blob/main/DeepLabV2.ipynb)

In [1]:
import random
import numpy as np
import torch

def set_seed(seed):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)

set_seed(42)

In [2]:
REPO = "https://github.com/annanasnas/semantic_segmentation-25.git"
!git clone $REPO
%cd /content/semantic_segmentation-25
!pip install -q -r requirements.txt pyyaml

Cloning into 'semantic_segmentation-25'...
remote: Enumerating objects: 136, done.[K
remote: Counting objects: 100% (136/136), done.[K
remote: Compressing objects: 100% (92/92), done.[K
remote: Total 136 (delta 53), reused 110 (delta 31), pack-reused 0 (from 0)[K
Receiving objects: 100% (136/136), 299.96 KiB | 1.26 MiB/s, done.
Resolving deltas: 100% (53/53), done.
/content/semantic_segmentation-25


## Config

In [3]:
import yaml

with open("configs/deeplabv2.yaml", "r", encoding="utf-8") as f:
    cfg = yaml.safe_load(f)

!python scripts/download_data.py

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
batch_size = cfg["train"]["batch_size"]
epochs = cfg["train"]["epochs"]
data_dir = cfg["data"]["root"]
learning_rate = cfg["train"]["lr"]
img_size = cfg["data"]["img_size"]
name = cfg["model"]["name"]

## DataLoaders

In [4]:
from datasets.cityscapes import CityScapes
from torch.utils.data import DataLoader
from torchvision import transforms


imagenet_mean = [0.485, 0.456, 0.406]
imagenet_std = [0.229, 0.224, 0.225]

image_transforms = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(mean=imagenet_mean, std=imagenet_std)
])

train_dataset = CityScapes(
    root_dir=data_dir,
    split="train",
    image_transform=image_transforms,
    image_size=img_size
)

val_dataset = CityScapes(
    root_dir=data_dir,
    split="val",
    image_transform=image_transforms,
    image_size=img_size
)

train_dataloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=2)
val_dataloader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False, num_workers=2)

## Training

In [None]:
from google.colab import drive
drive.mount("/content/drive")

In [2]:
import torch.optim as optim
import torch.nn as nn
from models.deeplabv2.deeplabv2 import get_deeplab_v2
from scripts.train import train_model
from torch.amp import autocast, GradScaler
from scripts.checkpoint import Checkpoint
from pathlib import Path
import pandas as pd

model = get_deeplab_v2()
optimizer = optim.SGD(model.optim_parameters(lr=learning_rate), momentum=0.9, weight_decay=5e-4)
criterion = nn.CrossEntropyLoss(ignore_index=255)
scheduler = optim.lr_scheduler.PolynomialLR(optimizer, total_iters=50, power=0.9)
scaler = GradScaler()

model.to(device)

ckpt_dir = f"/content/drive/MyDrive/semantic segmentation/checkpoints/{name}"
log_csv = f"/content/drive/MyDrive/semantic segmentation/checkpoints/{name}/log.csv"
ckpt_mgr = Checkpoint(ckpt_dir)
ckpt = Checkpoint(ckpt_dir)

best_path = ckpt_dir / "best.pth"
if best_path.exists():
    ckpt = torch.load(best_path, map_location="cpu", weights_only=False)
    model.load_state_dict(ckpt["model"])
    optimizer.load_state_dict(ckpt["optimizer"])
    scaler.load_state_dict(ckpt["scaler"])
    scheduler.load_state_dict(ckpt["scheduler"])
    start_epoch = ckpt["epoch"]
    best_miou   = ckpt["best_miou"]
    df_prev = pd.read_csv(log_csv)
    metrics = df_prev.to_dict("list")
else:
    start_epoch = 0
    best_miou   = 0
    metrics = {"epoch": [], "train_loss": [], "val_loss": [], "val_miou": []}

train_model(model, train_dataloader, val_dataloader,
            device, epochs, autocast, scaler,
            optimizer, criterion, scheduler,
            ckpt_mgr, start_epoch, best_miou,
            log_csv, metrics)

# Testing

In [None]:
from scripts.utils import create_final_table, evaluate_miou

model = get_deeplab_v2()
best_model = torch.load(best_path, map_location="cpu", weights_only=False)
best_model.load_state_dict(best_model["model"])

df = create_final_table(model, name, device, (img_size, img_size*2), epochs)
df["mIoU (%)"] = evaluate_miou(best_model, val_dataloader, device) * 100
print(df.to_markdown(index=False))