# <span style="color:red; font-weight:bold; ">A clean and modern RangeViT implementation for SemanticKITTI in PyTorch 2.4</span>  

## <span style="font-weight:bold">1. DataLoader</span>

### 1.1 Dataset Structure
The dataset should be structured as follows:
```
WoD/
├── validation/
│   ├── preprocess_mini/
│   │   ├── xxxx.bin
│   │   ├── xxxx.bin
```



In [1]:
import torch.optim as optim
import torch
import numpy as np

import os
import torch
from torch.utils.data import Dataset
import torchvision.transforms as transforms

from torch.utils.data import Dataset, DataLoader

import timm
import torch
import torch.nn as nn
import torch.nn.functional as F

from tqdm.notebook import tqdm

from model.WaymoSegmentationDataset import WaymoSegmentationDataset
from model.RangeViTSegmentationModel import RangeViTSegmentationModel

from segmentation_models_pytorch.losses import FocalLoss, LovaszLoss


In [2]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
dataset = WaymoSegmentationDataset('../WaymoDataset/train', training=True)
loader = DataLoader(dataset, batch_size=32, shuffle=True, num_workers=4)

val_dataset = WaymoSegmentationDataset('../WaymoDataset/validation', training=False)
val_loader = DataLoader(val_dataset, batch_size=8, shuffle=False, num_workers=4)


In [3]:
# Use torchmetrics or do manually
from torchmetrics.classification import MulticlassJaccardIndex
# create a metric and put it on gpu
metric = MulticlassJaccardIndex(num_classes=20, average=None, ignore_index=0).to(device)

num_classes = 20
in_channels = 9 # range, x, y, z, intensity, flag, R, G, B
num_epochs = 60
model = RangeViTSegmentationModel(n_classes=num_classes, in_channels=in_channels).to(device)
# criterion = LovaszLoss(mode='multiclass', ignore_index=0, per_image=False)
focal = FocalLoss(mode='multiclass', ignore_index=0)
lovasz = LovaszLoss(mode='multiclass', ignore_index=0, per_image=False)
def criterion(outputs, targets):
    return focal(outputs, targets) + lovasz(outputs, targets)
optimizer = optim.AdamW(model.parameters(), lr=0.0004, weight_decay=0.01, betas=(0.9, 0.999))

from torch.optim.lr_scheduler import CosineAnnealingLR
scheduler = CosineAnnealingLR(optimizer, T_max=num_epochs, eta_min=0)

In [4]:
def train_one_epoch(model, loader, optimizer, criterion, metric,epoch):
    model.train()
    total_loss = 0.0
    metric.reset()  # Reset the IoU metric for the next epoch
    batch_bar = tqdm(loader, desc=f"Training Epoch {epoch+1}", leave=False)
    for imgs, labels in batch_bar:
        imgs = imgs.to(device)
        labels = labels.to(device)
        optimizer.zero_grad()
        outputs = model(imgs)
        loss = criterion(outputs, labels)
        preds = outputs.argmax(dim=1)
        metric.update(preds, labels)
        ious = metric.compute()
        mean_iou = torch.mean(ious[ious != 0])
        loss.backward()
        optimizer.step()
        batch_bar.set_postfix(loss=loss.item(), mIoU=mean_iou.item())
        total_loss += loss.item()
    print(f"Epoch [{epoch+1}] Loss: {total_loss/len(loader):.4f}, mIoU: {mean_iou.item():.4f}")


In [5]:
def eval_model(model, loader, criterion, metric):
    model.eval()
    total_loss = 0.0
    metric.reset()  # Reset the IoU metric for the evaluation
    with torch.no_grad():
        batch_bar = tqdm(loader, desc="Evaluating", leave=False)
        for imgs, labels in batch_bar:
            imgs = imgs.to(device)
            labels = labels.to(device)
            outputs = model(imgs)
            loss = criterion(outputs, labels)
            preds = outputs.argmax(dim=1)
            metric.update(preds, labels)
            ious = metric.compute()
            mean_iou = torch.mean(ious[ious != 0])
            batch_bar.set_postfix(loss=loss.item(), mIoU=mean_iou.item())
            total_loss += loss.item()
    print(f"Evaluation Loss: {total_loss/len(loader):.4f}, mIoU: {mean_iou.item():.4f}")


In [6]:
# Load the model if you have a pre-trained one
pretrain_path = 'range_vit_waymo_5174.pth'
if os.path.exists(pretrain_path):
    print(f"Loading pre-trained model from {pretrain_path}")
    model.load_state_dict(torch.load(pretrain_path))

Loading pre-trained model from range_vit_waymo_5174.pth


In [7]:
### Train the model
best_val_mIoU = 0.0
# Training loop
for epoch in tqdm(range(num_epochs), desc="Epochs"):
    train_one_epoch(model, loader, optimizer, criterion, metric,epoch)
    if epoch % 5 == 0: # Evaluate every 5 epochs
        eval_model(model, val_loader, criterion, metric)
        ious = metric.compute()
        current_val_mIoU = torch.mean(ious[ious != 0]).item()
        if current_val_mIoU > best_val_mIoU:
            best_val_mIoU = current_val_mIoU
            torch.save(model.state_dict(), 'range_vit_waymo.pth')
    scheduler.step()


Epochs:   0%|          | 0/60 [00:00<?, ?it/s]

Training Epoch 1:   0%|          | 0/175 [00:00<?, ?it/s]

Epoch [1] Loss: 0.4650, mIoU: 0.7076


Evaluating:   0%|          | 0/50 [00:00<?, ?it/s]

Evaluation Loss: 0.7234, mIoU: 0.5209


Training Epoch 2:   0%|          | 0/175 [00:00<?, ?it/s]

Epoch [2] Loss: 0.4711, mIoU: 0.7293


Training Epoch 3:   0%|          | 0/175 [00:00<?, ?it/s]

Epoch [3] Loss: 0.4804, mIoU: 0.7058


Training Epoch 4:   0%|          | 0/175 [00:00<?, ?it/s]

Epoch [4] Loss: 0.4821, mIoU: 0.6951


Training Epoch 5:   0%|          | 0/175 [00:00<?, ?it/s]

Epoch [5] Loss: 0.4796, mIoU: 0.6720


Training Epoch 6:   0%|          | 0/175 [00:00<?, ?it/s]

Epoch [6] Loss: 0.4735, mIoU: 0.6915


Evaluating:   0%|          | 0/50 [00:00<?, ?it/s]

Evaluation Loss: 0.7784, mIoU: 0.4840


Training Epoch 7:   0%|          | 0/175 [00:00<?, ?it/s]

Epoch [7] Loss: 0.4600, mIoU: 0.6948


Training Epoch 8:   0%|          | 0/175 [00:00<?, ?it/s]

Epoch [8] Loss: 0.4817, mIoU: 0.6915


Training Epoch 9:   0%|          | 0/175 [00:00<?, ?it/s]

Epoch [9] Loss: 0.4509, mIoU: 0.7103


Training Epoch 10:   0%|          | 0/175 [00:00<?, ?it/s]

Epoch [10] Loss: 0.4722, mIoU: 0.6919


Training Epoch 11:   0%|          | 0/175 [00:00<?, ?it/s]

Epoch [11] Loss: 0.4673, mIoU: 0.6954


Evaluating:   0%|          | 0/50 [00:00<?, ?it/s]

Evaluation Loss: 0.6946, mIoU: 0.5279


Training Epoch 12:   0%|          | 0/175 [00:00<?, ?it/s]

Epoch [12] Loss: 0.4503, mIoU: 0.6980


Training Epoch 13:   0%|          | 0/175 [00:00<?, ?it/s]

Epoch [13] Loss: 0.4315, mIoU: 0.7165


Training Epoch 14:   0%|          | 0/175 [00:00<?, ?it/s]

Epoch [14] Loss: 0.4195, mIoU: 0.7295


Training Epoch 15:   0%|          | 0/175 [00:00<?, ?it/s]

Epoch [15] Loss: 0.4052, mIoU: 0.7434


Training Epoch 16:   0%|          | 0/175 [00:00<?, ?it/s]

Epoch [16] Loss: 0.4108, mIoU: 0.7292


Evaluating:   0%|          | 0/50 [00:00<?, ?it/s]

Evaluation Loss: 0.7154, mIoU: 0.5756


Training Epoch 17:   0%|          | 0/175 [00:00<?, ?it/s]

Epoch [17] Loss: 0.4170, mIoU: 0.7204


Training Epoch 18:   0%|          | 0/175 [00:00<?, ?it/s]

Epoch [18] Loss: 0.4075, mIoU: 0.7570


Training Epoch 19:   0%|          | 0/175 [00:00<?, ?it/s]

Epoch [19] Loss: 0.3976, mIoU: 0.7382


Training Epoch 20:   0%|          | 0/175 [00:00<?, ?it/s]

Epoch [20] Loss: 0.3981, mIoU: 0.7419


Training Epoch 21:   0%|          | 0/175 [00:00<?, ?it/s]

Epoch [21] Loss: 0.3994, mIoU: 0.7387


Evaluating:   0%|          | 0/50 [00:00<?, ?it/s]

Evaluation Loss: 0.6744, mIoU: 0.5675


Training Epoch 22:   0%|          | 0/175 [00:00<?, ?it/s]

Epoch [22] Loss: 0.4147, mIoU: 0.7283


Training Epoch 23:   0%|          | 0/175 [00:00<?, ?it/s]

Epoch [23] Loss: 0.4017, mIoU: 0.7272


Training Epoch 24:   0%|          | 0/175 [00:00<?, ?it/s]

Epoch [24] Loss: 0.4109, mIoU: 0.7217


Training Epoch 25:   0%|          | 0/175 [00:00<?, ?it/s]

Epoch [25] Loss: 0.3890, mIoU: 0.7418


Training Epoch 26:   0%|          | 0/175 [00:00<?, ?it/s]

Epoch [26] Loss: 0.3862, mIoU: 0.7704


Evaluating:   0%|          | 0/50 [00:00<?, ?it/s]

Evaluation Loss: 0.6962, mIoU: 0.5540


Training Epoch 27:   0%|          | 0/175 [00:00<?, ?it/s]

Epoch [27] Loss: 0.3751, mIoU: 0.7646


Training Epoch 28:   0%|          | 0/175 [00:00<?, ?it/s]

Epoch [28] Loss: 0.3630, mIoU: 0.7739


Training Epoch 29:   0%|          | 0/175 [00:00<?, ?it/s]

Epoch [29] Loss: 0.3572, mIoU: 0.8097


Training Epoch 30:   0%|          | 0/175 [00:00<?, ?it/s]

Epoch [30] Loss: 0.3578, mIoU: 0.7703


Training Epoch 31:   0%|          | 0/175 [00:00<?, ?it/s]

Epoch [31] Loss: 0.3610, mIoU: 0.7655


Evaluating:   0%|          | 0/50 [00:00<?, ?it/s]

Evaluation Loss: 0.6784, mIoU: 0.5528


Training Epoch 32:   0%|          | 0/175 [00:00<?, ?it/s]

Epoch [32] Loss: 0.3552, mIoU: 0.8017


Training Epoch 33:   0%|          | 0/175 [00:00<?, ?it/s]

Epoch [33] Loss: 0.3604, mIoU: 0.7718


Training Epoch 34:   0%|          | 0/175 [00:00<?, ?it/s]

Epoch [34] Loss: 0.3479, mIoU: 0.7755


Training Epoch 35:   0%|          | 0/175 [00:00<?, ?it/s]

Epoch [35] Loss: 0.3493, mIoU: 0.7745


Training Epoch 36:   0%|          | 0/175 [00:00<?, ?it/s]

Epoch [36] Loss: 0.3463, mIoU: 0.7793


Evaluating:   0%|          | 0/50 [00:00<?, ?it/s]

Evaluation Loss: 0.6731, mIoU: 0.5629


Training Epoch 37:   0%|          | 0/175 [00:00<?, ?it/s]

Epoch [37] Loss: 0.3427, mIoU: 0.7842


Training Epoch 38:   0%|          | 0/175 [00:00<?, ?it/s]

Epoch [38] Loss: 0.3402, mIoU: 0.7963


Training Epoch 39:   0%|          | 0/175 [00:00<?, ?it/s]

Epoch [39] Loss: 0.3341, mIoU: 0.7858


Training Epoch 40:   0%|          | 0/175 [00:00<?, ?it/s]

Epoch [40] Loss: 0.3320, mIoU: 0.7807


Training Epoch 41:   0%|          | 0/175 [00:00<?, ?it/s]

Epoch [41] Loss: 0.3359, mIoU: 0.7918


Evaluating:   0%|          | 0/50 [00:00<?, ?it/s]

Evaluation Loss: 0.6991, mIoU: 0.5579


Training Epoch 42:   0%|          | 0/175 [00:00<?, ?it/s]

Epoch [42] Loss: 0.3307, mIoU: 0.7931


Training Epoch 43:   0%|          | 0/175 [00:00<?, ?it/s]

Epoch [43] Loss: 0.3310, mIoU: 0.7975


Training Epoch 44:   0%|          | 0/175 [00:00<?, ?it/s]

Epoch [44] Loss: 0.3225, mIoU: 0.8090


Training Epoch 45:   0%|          | 0/175 [00:00<?, ?it/s]

Epoch [45] Loss: 0.3191, mIoU: 0.8213


Training Epoch 46:   0%|          | 0/175 [00:00<?, ?it/s]

Epoch [46] Loss: 0.3190, mIoU: 0.8123


Evaluating:   0%|          | 0/50 [00:00<?, ?it/s]

Evaluation Loss: 0.6856, mIoU: 0.5558


Training Epoch 47:   0%|          | 0/175 [00:00<?, ?it/s]

Epoch [47] Loss: 0.3198, mIoU: 0.7896


Training Epoch 48:   0%|          | 0/175 [00:00<?, ?it/s]

Epoch [48] Loss: 0.3171, mIoU: 0.7977


Training Epoch 49:   0%|          | 0/175 [00:00<?, ?it/s]

Epoch [49] Loss: 0.3184, mIoU: 0.8029


Training Epoch 50:   0%|          | 0/175 [00:00<?, ?it/s]

Epoch [50] Loss: 0.3153, mIoU: 0.7894


Training Epoch 51:   0%|          | 0/175 [00:00<?, ?it/s]

Epoch [51] Loss: 0.3131, mIoU: 0.8170


Evaluating:   0%|          | 0/50 [00:00<?, ?it/s]

Evaluation Loss: 0.6836, mIoU: 0.5553


Training Epoch 52:   0%|          | 0/175 [00:00<?, ?it/s]

Epoch [52] Loss: 0.3136, mIoU: 0.8029


Training Epoch 53:   0%|          | 0/175 [00:00<?, ?it/s]

Epoch [53] Loss: 0.3117, mIoU: 0.8111


Training Epoch 54:   0%|          | 0/175 [00:00<?, ?it/s]

Epoch [54] Loss: 0.3137, mIoU: 0.7985


Training Epoch 55:   0%|          | 0/175 [00:00<?, ?it/s]

Epoch [55] Loss: 0.3055, mIoU: 0.8109


Training Epoch 56:   0%|          | 0/175 [00:00<?, ?it/s]

Epoch [56] Loss: 0.3136, mIoU: 0.7923


Evaluating:   0%|          | 0/50 [00:00<?, ?it/s]

Evaluation Loss: 0.6608, mIoU: 0.5373


Training Epoch 57:   0%|          | 0/175 [00:00<?, ?it/s]

Epoch [57] Loss: 0.3093, mIoU: 0.8244


Training Epoch 58:   0%|          | 0/175 [00:00<?, ?it/s]

Epoch [58] Loss: 0.3113, mIoU: 0.8387


Training Epoch 59:   0%|          | 0/175 [00:00<?, ?it/s]

Epoch [59] Loss: 0.3038, mIoU: 0.8151


Training Epoch 60:   0%|          | 0/175 [00:00<?, ?it/s]

Epoch [60] Loss: 0.3126, mIoU: 0.7998


In [8]:
eval_model(model, val_loader, criterion, metric)
ious = metric.compute()
current_val_mIoU = torch.mean(ious[ious != 0]).item()
torch.save(model.state_dict(), 'range_vit_waymo.pth')

Evaluating:   0%|          | 0/50 [00:00<?, ?it/s]

Evaluation Loss: 0.6699, mIoU: 0.5430


In [9]:
# copy weights when two models doesn't have exact same architecture
# old_dict = torch.load('range_vit_segmentation_4616.pth')
# from model.model_utils import approximately_clone_state_dict
# new_dict = approximately_clone_state_dict(model.state_dict(), old_dict)
# model.load_state_dict(new_dict)
# torch.save(model.state_dict(), 'range_vit_segmentation.pth')

In [10]:
# Validation with the best model
# model = RangeViTSegmentationModel(n_classes=num_classes, in_channels=in_channels).to(device)
# model.load_state_dict(torch.load('range_vit_segmentation.pth'))
# eval_model(model, val_loader, criterion, metric)


In [11]:
# torch.save(model.state_dict(), 'range_vit_segmentation.pth')

In [12]:
# print structure of model
# print(model)

In [13]:
# clear cuda memory

torch.cuda.empty_cache()