# <span style="color:red; font-weight:bold; ">A clean and modern RangeViT implementation for SemanticKITTI in PyTorch 2.4</span>  

## <span style="font-weight:bold">1. DataLoader</span>

### 1.1 Dataset Structure
The dataset should be structured as follows:
```
sequences/
├── 00/
│   ├── preprocess/
│   │   ├── 000000.bin
│   │   ├── 000001.bin
├── 01/
│   ├── preprocess/
│   │   ├── 000000.bin
│   │   ├── 000001.bin
```



In [2]:
import torch.optim as optim
import torch
import numpy as np

import os
import torch
from torch.utils.data import Dataset
import torchvision.transforms as transforms

from torch.utils.data import Dataset, DataLoader

import timm
import torch
import torch.nn as nn
import torch.nn.functional as F

from tqdm.notebook import tqdm

from model.KITTISegmentationDataset import KITTISegmentationDataset
from model.RangeViTSegmentationModel import RangeViTSegmentationModel

from segmentation_models_pytorch.losses import FocalLoss, LovaszLoss


In [3]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
dataset = KITTISegmentationDataset('../sequences',['00', '01', '02', '03', '04', '05', '06', '07', '09', '10'], training=True)
loader = DataLoader(dataset, batch_size=32, shuffle=True, num_workers=4)

val_dataset = KITTISegmentationDataset('../sequences',['08'], training=False)
val_loader = DataLoader(val_dataset, batch_size=16, shuffle=False, num_workers=4)



In [None]:
# Use torchmetrics or do manually
from torchmetrics.classification import MulticlassJaccardIndex
# create a metric and put it on gpu
metric = MulticlassJaccardIndex(num_classes=20, average=None, ignore_index=0).to(device)

num_classes = 20
in_channels = 9 # range, x, y, z, intensity, flag, R, G, B
num_epochs = 60
model = RangeViTSegmentationModel(n_classes=num_classes, in_channels=in_channels).to(device)
criterion = LovaszLoss(mode='multiclass', ignore_index=0, per_image=False)
# criterion = LovaszLoss(mode='multiclass', ignore_index=0, per_image=True)
optimizer = optim.Adam(model.parameters(), lr=0.001, weight_decay=0.01)
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.5)

In [5]:
def train_one_epoch(model, loader, optimizer, criterion, metric,epoch):
    model.train()
    total_loss = 0.0
    metric.reset()  # Reset the IoU metric for the next epoch
    batch_bar = tqdm(loader, desc=f"Training Epoch {epoch+1}", leave=False)
    for imgs, labels in batch_bar:
        imgs = imgs.to(device)
        labels = labels.to(device)
        optimizer.zero_grad()
        outputs = model(imgs)
        loss = criterion(outputs, labels)
        preds = outputs.argmax(dim=1)
        metric.update(preds, labels)
        ious = metric.compute()
        mean_iou = torch.mean(ious[ious != 0])
        loss.backward()
        optimizer.step()
        batch_bar.set_postfix(loss=loss.item(), mIoU=mean_iou.item())
        total_loss += loss.item()
    print(f"Epoch [{epoch+1}] Loss: {total_loss/len(loader):.4f}, mIoU: {mean_iou.item():.4f}")


In [6]:
def eval_model(model, loader, criterion, metric):
    model.eval()
    total_loss = 0.0
    metric.reset()  # Reset the IoU metric for the evaluation
    with torch.no_grad():
        batch_bar = tqdm(loader, desc="Evaluating", leave=False)
        for imgs, labels in batch_bar:
            imgs = imgs.to(device)
            labels = labels.to(device)
            outputs = model(imgs)
            loss = criterion(outputs, labels)
            preds = outputs.argmax(dim=1)
            metric.update(preds, labels)
            ious = metric.compute()
            mean_iou = torch.mean(ious[ious != 0])
            batch_bar.set_postfix(loss=loss.item(), mIoU=mean_iou.item())
            total_loss += loss.item()
    print(f"Evaluation Loss: {total_loss/len(loader):.4f}, mIoU: {mean_iou.item():.4f}")


In [7]:
### Train the model
# Load the model if you have a pre-trained one
pretrain_path = 'range_vit_segmentation.pth'
if os.path.exists(pretrain_path):
    print(f"Loading pre-trained model from {pretrain_path}")
    model.load_state_dict(torch.load('range_vit_segmentation.pth'))
# Training loop
for epoch in tqdm(range(num_epochs), desc="Epochs"):
    train_one_epoch(model, loader, optimizer, criterion, metric,epoch)
    if epoch % 5 == 0:
        eval_model(model, val_loader, criterion, metric)
    scheduler.step()
    if (epoch == 0):
        ious = metric.compute()
        best_val_mIoU = torch.mean(ious[ious != 0])
    else:
        ious = metric.compute()
        current_val_mIoU = torch.mean(ious[ious != 0]).item()
        if current_val_mIoU > best_val_mIoU:
            best_val_mIoU = current_val_mIoU
            torch.save(model.state_dict(), 'range_vit_segmentation.pth')


Loading pre-trained model from range_vit_segmentation.pth


Epochs:   0%|          | 0/60 [00:00<?, ?it/s]

Training Epoch 1:   0%|          | 0/598 [00:00<?, ?it/s]

Epoch [1] Loss: 0.3781, mIoU: 0.6211


Evaluating:   0%|          | 0/255 [00:00<?, ?it/s]

Evaluation Loss: 0.5891, mIoU: 0.3442


Training Epoch 2:   0%|          | 0/598 [00:00<?, ?it/s]

Epoch [2] Loss: 0.3875, mIoU: 0.6102


Training Epoch 3:   0%|          | 0/598 [00:00<?, ?it/s]

Epoch [3] Loss: 0.3805, mIoU: 0.6201


Training Epoch 4:   0%|          | 0/598 [00:00<?, ?it/s]

Epoch [4] Loss: 0.3829, mIoU: 0.6191


Training Epoch 5:   0%|          | 0/598 [00:00<?, ?it/s]

Epoch [5] Loss: 0.3755, mIoU: 0.6211


Training Epoch 6:   0%|          | 0/598 [00:00<?, ?it/s]

Epoch [6] Loss: 0.3769, mIoU: 0.6225


Evaluating:   0%|          | 0/255 [00:00<?, ?it/s]

Evaluation Loss: 0.5801, mIoU: 0.3213


Training Epoch 7:   0%|          | 0/598 [00:00<?, ?it/s]

Epoch [7] Loss: 0.3698, mIoU: 0.6325


Training Epoch 8:   0%|          | 0/598 [00:00<?, ?it/s]

Epoch [8] Loss: 0.3716, mIoU: 0.6301


Training Epoch 9:   0%|          | 0/598 [00:00<?, ?it/s]

Epoch [9] Loss: 0.3632, mIoU: 0.6378


Training Epoch 10:   0%|          | 0/598 [00:00<?, ?it/s]

Epoch [10] Loss: 0.3668, mIoU: 0.6380


Training Epoch 11:   0%|          | 0/598 [00:00<?, ?it/s]

Epoch [11] Loss: 0.3351, mIoU: 0.6625


Evaluating:   0%|          | 0/255 [00:00<?, ?it/s]

Evaluation Loss: 0.5632, mIoU: 0.3362


Training Epoch 12:   0%|          | 0/598 [00:00<?, ?it/s]

Epoch [12] Loss: 0.3288, mIoU: 0.6708


Training Epoch 13:   0%|          | 0/598 [00:00<?, ?it/s]

Epoch [13] Loss: 0.3286, mIoU: 0.6719


Training Epoch 14:   0%|          | 0/598 [00:00<?, ?it/s]

Epoch [14] Loss: 0.3251, mIoU: 0.6730


Training Epoch 15:   0%|          | 0/598 [00:00<?, ?it/s]

Epoch [15] Loss: 0.3238, mIoU: 0.6767


Training Epoch 16:   0%|          | 0/598 [00:00<?, ?it/s]

Epoch [16] Loss: 0.3232, mIoU: 0.6796


Evaluating:   0%|          | 0/255 [00:00<?, ?it/s]

Evaluation Loss: 0.5600, mIoU: 0.3602


Training Epoch 17:   0%|          | 0/598 [00:00<?, ?it/s]

Epoch [17] Loss: 0.3198, mIoU: 0.6816


Training Epoch 18:   0%|          | 0/598 [00:00<?, ?it/s]

Epoch [18] Loss: 0.3207, mIoU: 0.6796


Training Epoch 19:   0%|          | 0/598 [00:00<?, ?it/s]

Epoch [19] Loss: 0.3169, mIoU: 0.6844


Training Epoch 20:   0%|          | 0/598 [00:00<?, ?it/s]

Epoch [20] Loss: 0.3180, mIoU: 0.6844


Training Epoch 21:   0%|          | 0/598 [00:00<?, ?it/s]

Epoch [21] Loss: 0.3086, mIoU: 0.6963


Evaluating:   0%|          | 0/255 [00:00<?, ?it/s]

Evaluation Loss: 0.5564, mIoU: 0.3376


Training Epoch 22:   0%|          | 0/598 [00:00<?, ?it/s]

Epoch [22] Loss: 0.3052, mIoU: 0.6993


Training Epoch 23:   0%|          | 0/598 [00:00<?, ?it/s]

Epoch [23] Loss: 0.3033, mIoU: 0.7029


Training Epoch 24:   0%|          | 0/598 [00:00<?, ?it/s]

Epoch [24] Loss: 0.3007, mIoU: 0.7052


Training Epoch 25:   0%|          | 0/598 [00:00<?, ?it/s]

Epoch [25] Loss: 0.2997, mIoU: 0.7099


Training Epoch 26:   0%|          | 0/598 [00:00<?, ?it/s]

Epoch [26] Loss: 0.2989, mIoU: 0.7106


Evaluating:   0%|          | 0/255 [00:00<?, ?it/s]

Evaluation Loss: 0.5557, mIoU: 0.3549


Training Epoch 27:   0%|          | 0/598 [00:00<?, ?it/s]

Epoch [27] Loss: 0.2978, mIoU: 0.7126


Training Epoch 28:   0%|          | 0/598 [00:00<?, ?it/s]

Epoch [28] Loss: 0.2957, mIoU: 0.7167


Training Epoch 29:   0%|          | 0/598 [00:00<?, ?it/s]

Epoch [29] Loss: 0.2949, mIoU: 0.7199


Training Epoch 30:   0%|          | 0/598 [00:00<?, ?it/s]

Epoch [30] Loss: 0.2954, mIoU: 0.7214


Training Epoch 31:   0%|          | 0/598 [00:00<?, ?it/s]

Epoch [31] Loss: 0.2906, mIoU: 0.7279


Evaluating:   0%|          | 0/255 [00:00<?, ?it/s]

Evaluation Loss: 0.5553, mIoU: 0.3350


Training Epoch 32:   0%|          | 0/598 [00:00<?, ?it/s]

Epoch [32] Loss: 0.2883, mIoU: 0.7314


Training Epoch 33:   0%|          | 0/598 [00:00<?, ?it/s]

Epoch [33] Loss: 0.2874, mIoU: 0.7329


Training Epoch 34:   0%|          | 0/598 [00:00<?, ?it/s]

Epoch [34] Loss: 0.2864, mIoU: 0.7339


Training Epoch 35:   0%|          | 0/598 [00:00<?, ?it/s]

Epoch [35] Loss: 0.2861, mIoU: 0.7350


Training Epoch 36:   0%|          | 0/598 [00:00<?, ?it/s]

Epoch [36] Loss: 0.2853, mIoU: 0.7367


Evaluating:   0%|          | 0/255 [00:00<?, ?it/s]

Evaluation Loss: 0.5552, mIoU: 0.3540


Training Epoch 37:   0%|          | 0/598 [00:00<?, ?it/s]

Epoch [37] Loss: 0.2843, mIoU: 0.7387


Training Epoch 38:   0%|          | 0/598 [00:00<?, ?it/s]

Epoch [38] Loss: 0.2852, mIoU: 0.7392


Training Epoch 39:   0%|          | 0/598 [00:00<?, ?it/s]

Epoch [39] Loss: 0.2827, mIoU: 0.7409


Training Epoch 40:   0%|          | 0/598 [00:00<?, ?it/s]

Epoch [40] Loss: 0.2827, mIoU: 0.7418


Training Epoch 41:   0%|          | 0/598 [00:00<?, ?it/s]

Epoch [41] Loss: 0.2790, mIoU: 0.7456


Evaluating:   0%|          | 0/255 [00:00<?, ?it/s]

Evaluation Loss: 0.5548, mIoU: 0.3542


Training Epoch 42:   0%|          | 0/598 [00:00<?, ?it/s]

Epoch [42] Loss: 0.2784, mIoU: 0.7464


Training Epoch 43:   0%|          | 0/598 [00:00<?, ?it/s]

Epoch [43] Loss: 0.2784, mIoU: 0.7475


Training Epoch 44:   0%|          | 0/598 [00:00<?, ?it/s]

Epoch [44] Loss: 0.2770, mIoU: 0.7485


Training Epoch 45:   0%|          | 0/598 [00:00<?, ?it/s]

Epoch [45] Loss: 0.2761, mIoU: 0.7488


Training Epoch 46:   0%|          | 0/598 [00:00<?, ?it/s]

Epoch [46] Loss: 0.2761, mIoU: 0.7497


Evaluating:   0%|          | 0/255 [00:00<?, ?it/s]

Evaluation Loss: 0.5549, mIoU: 0.3532


Training Epoch 47:   0%|          | 0/598 [00:00<?, ?it/s]

Epoch [47] Loss: 0.2759, mIoU: 0.7505


Training Epoch 48:   0%|          | 0/598 [00:00<?, ?it/s]

Epoch [48] Loss: 0.2741, mIoU: 0.7513


Training Epoch 49:   0%|          | 0/598 [00:00<?, ?it/s]

Epoch [49] Loss: 0.2733, mIoU: 0.7518


Training Epoch 50:   0%|          | 0/598 [00:00<?, ?it/s]

Epoch [50] Loss: 0.2761, mIoU: 0.7515


Training Epoch 51:   0%|          | 0/598 [00:00<?, ?it/s]

Epoch [51] Loss: 0.2732, mIoU: 0.7536


Evaluating:   0%|          | 0/255 [00:00<?, ?it/s]

Evaluation Loss: 0.5553, mIoU: 0.3527


Training Epoch 52:   0%|          | 0/598 [00:00<?, ?it/s]

Epoch [52] Loss: 0.2721, mIoU: 0.7543


Training Epoch 53:   0%|          | 0/598 [00:00<?, ?it/s]

Epoch [53] Loss: 0.2715, mIoU: 0.7547


Training Epoch 54:   0%|          | 0/598 [00:00<?, ?it/s]

Epoch [54] Loss: 0.2720, mIoU: 0.7550


Training Epoch 55:   0%|          | 0/598 [00:00<?, ?it/s]

Epoch [55] Loss: 0.2740, mIoU: 0.7550


Training Epoch 56:   0%|          | 0/598 [00:00<?, ?it/s]

Epoch [56] Loss: 0.2720, mIoU: 0.7552


Evaluating:   0%|          | 0/255 [00:00<?, ?it/s]

Evaluation Loss: 0.5538, mIoU: 0.3767


Training Epoch 57:   0%|          | 0/598 [00:00<?, ?it/s]

Epoch [57] Loss: 0.2700, mIoU: 0.7556


Training Epoch 58:   0%|          | 0/598 [00:00<?, ?it/s]

Epoch [58] Loss: 0.2704, mIoU: 0.7556


Training Epoch 59:   0%|          | 0/598 [00:00<?, ?it/s]

Epoch [59] Loss: 0.2712, mIoU: 0.7560


Training Epoch 60:   0%|          | 0/598 [00:00<?, ?it/s]

Epoch [60] Loss: 0.2696, mIoU: 0.7560


In [None]:
# pretrain_path = 'range_vit_segmentation.pth'
# torch.save(model.state_dict(), pretrain_path)

In [8]:
# Validation with the best model
model.load_state_dict(torch.load('range_vit_segmentation.pth'))
model.eval()
metric.reset()  # Reset the IoU metric for validation
with torch.no_grad():
    for images, targets in val_loader:
        images = images.to(device)
        targets = targets.to(device)
        outputs = model(images)
        loss = criterion(outputs, targets)
        metric.update(outputs, targets)
    ious = metric.compute()
    val_mIoU = torch.mean(ious[ious != 0]).item()
    print(f"Validation mIoU: {val_mIoU:.4f}")


Validation mIoU: 0.3764


In [None]:
# print structure of model
# print(model)

In [None]:
# clear gpu memory
# torch.cuda.empty_cache()

NameError: name 'torch' is not defined