# Test training code for S2AE

In [1]:
import math
import sys
import time
import datetime

import matplotlib.pyplot as plt
import numpy as np

import torch
from torch.utils.tensorboard import SummaryWriter
from torchsummary import summary
from tqdm.auto import tqdm

from data_splitter import DataSplitter
from external_splitter import ExternalSplitter
from training_set import TrainingSetLidarSeg
from loss import *
from model_unet import ModelUnet
from model_segnet import ModelSegnet
from model import Model
from sphere import Sphere
from visualize import Visualize
from metrics import *
from average_meter import AverageMeter

    
%matplotlib inline
%load_ext autoreload
%autoreload 2

## Initialize some parameter

In [2]:
print(f"Initializing CUDA...")
torch.cuda.set_device(0)
torch.backends.cudnn.benchmark = True

print(f"Setting parameters...")
bandwidth = 100
learning_rate = 1e-3
n_epochs = 1
batch_size = 5
num_workers = 32
n_classes = 7

print(f"Initializing data structures...")
net = Model(bandwidth=bandwidth, n_classes=n_classes).cuda()

# optimizer = torch.optim.SGD(net.parameters(), lr=learning_rate, momentum=0.9)
optimizer = torch.optim.Adam(net.parameters(), lr=learning_rate)

# criterion = MainLoss()
criterion = WceLovasz()

writer = SummaryWriter()
timestamp = datetime.datetime.now().strftime('%Y%m%d%H%M%S')
model_save = f'test_lidarseg_{timestamp}'

print('\n')
print(f'All instances initialized.')
print(f'Saving final model to {model_save}')

Initializing CUDA...
Setting parameters...
Initializing data structures...
[Model] We have [2, 20, 45, 140, 180, 140, 45, 20, 7] features.
[Model] We have [100, 40, 30, 15, 10, 8, 10, 15, 30, 40, 100] bandwidths.


All instances initialized.
Saving final model to test_lidarseg_20220506092404


## Load the dataset

In [3]:
# export_ds = '/mnt/data/datasets/nuscenes/processed'
export_ds = '/media/scratch/berlukas/nuscenes'

# training
cloud_filename = f"{export_ds}/test_training/sem_clouds2_tiny.npy"

print(f"Loading clouds from {cloud_filename}.")
cloud_features = np.load(cloud_filename)


sem_cloud_features = np.copy(cloud_features[:, 2, :, :])
cloud_features = cloud_features[:, 0:2, :, :]
print(f"Shape clouds is {cloud_features.shape} and sem clouds is {sem_cloud_features.shape}")

Loading clouds from /media/scratch/berlukas/nuscenes/test_training/sem_clouds2_tiny.npy.
Shape clouds is (500, 2, 200, 200) and sem clouds is (500, 200, 200)


In [4]:
# Initialize the data loaders
train_set = TrainingSetLidarSeg(cloud_features, sem_cloud_features)
print(f"Total size of the training set: {len(train_set)}")
split = DataSplitter(train_set, False, test_train_split=0.95, val_train_split=0.05, shuffle=True)

# Split the data into train, val and optionally test
train_loader, val_loader, test_loader = split.get_split(
    batch_size=batch_size, num_workers=num_workers)
train_size = split.get_train_size()
val_size = split.get_val_size()
test_size = split.get_test_size()


print("Training size: ", train_size)
print("Validation size: ", val_size)
if test_size == 0:
    print('Test size is 0. Configured for external tests')
else:
    print("Testing size: ", test_size)

Total size of the training set: 500
Training size:  451
Validation size:  24
Testing size:  25


In [5]:
# val_filename = f"{export_ds}/sem_clouds_val.npy"

# print(f"Loading clouds from {val_filename}.")
# cloud_val = np.load(val_filename)

# sem_val_features = np.copy(cloud_val[:, 2, :, :])
# val_features = cloud_val[:, 0:2, :, :]
# print(f"Shape clouds is {val_features.shape} and sem clouds is {sem_val_features.shape}")

# train_set = TrainingSetLidarSeg(cloud_features, sem_cloud_features)
# val_set = TrainingSetLidarSeg(val_features, sem_val_features)
# split = ExternalSplitter(train_set, val_set)

# # Split the data into train, val and optionally test
# train_loader, val_loader = split.get_split(batch_size=batch_size, num_workers=num_workers)
# train_size = split.get_train_size()
# val_size = split.get_val_size()
# test_size = 0
# print("Training size: ", train_size)
# print("Validation size: ", val_size)

In [6]:
def adjust_learning_rate_exp(optimizer, epoch_num, lr):
    decay_rate = 0.96
    new_lr = lr * math.pow(decay_rate, epoch_num)
    for param_group in optimizer.param_groups:
        param_group['lr'] = new_lr

    return new_lr

def train_lidarseg(net, criterion, optimizer, writer, epoch, n_iter, loss_, t0):
    net.train()
    for batch_idx, (cloud, lidarseg_gt) in enumerate(train_loader):
        cloud, lidarseg_gt = cloud.cuda().float(), lidarseg_gt.cuda().long()        
        enc_dec_cloud = net(cloud)
        loss = criterion(enc_dec_cloud, lidarseg_gt)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()        
        loss_ += float(loss)

        writer.add_scalar('Train/Loss', float(loss), n_iter)
        n_iter += 1

        if batch_idx % 10 == 9:
            t1 = time.time()
            print('[Epoch %d, Batch %4d] loss: %.8f time: %.5f lr: %.3e' %
                  (epoch + 1, batch_idx + 1, loss_ / 10, (t1 - t0) / 60, lr))
            t0 = t1
            loss_ = 0.0
    return n_iter

def validate_lidarseg(net, criterion, optimizer, writer, epoch, n_iter):
    avg_pixel_acc = AverageMeter()
    avg_pixel_acc_per_class = AverageMeter()
    avg_jacc = AverageMeter()
    avg_dice = AverageMeter()
    net.eval()
    with torch.no_grad():            
        for batch_idx, (cloud, lidarseg_gt) in enumerate(val_loader):            
            cloud, lidarseg_gt = cloud.cuda().float(), lidarseg_gt.cuda().long()
            enc_dec_cloud = net(cloud)
                        
            optimizer.zero_grad()
            loss = criterion(enc_dec_cloud, lidarseg_gt)                                                                                        
            writer.add_scalar('Validation/Loss', float(loss), n_iter)                        
            
            pred_segmentation = torch.argmax(enc_dec_cloud, dim=1)
            pixel_acc, pixel_acc_per_class, jacc, dice = eval_metrics(lidarseg_gt, pred_segmentation, num_classes = n_classes)
            avg_pixel_acc.update(pixel_acc)
            avg_pixel_acc_per_class.update(pixel_acc_per_class)
            avg_jacc.update(jacc)
            avg_dice.update(dice)

            n_iter += 1
            
        epoch_p_1 = epoch+1
        writer.add_scalar('Validation/AvgPixelAccuracy', avg_pixel_acc.avg, epoch_p_1)   
        writer.add_scalar('Validation/AvgPixelAccuracyPerClass', avg_pixel_acc_per_class.avg, epoch_p_1)   
        writer.add_scalar('Validation/AvgJaccardIndex', avg_jacc.avg, epoch_p_1)
        writer.add_scalar('Validation/AvgDiceCoefficient', avg_dice.avg, epoch_p_1)  
       
        print(f'[Validation for epoch {epoch_p_1}] Average Pixel Accuracy: {avg_pixel_acc.avg}')
        print(f'[Validation for epoch {epoch_p_1}] Average Pixel Accuracy per Class: {avg_pixel_acc_per_class.avg}')
        print(f'[Validation for epoch {epoch_p_1}] Average Jaccard Index: {avg_jacc.avg}')
        print(f'[Validation for epoch {epoch_p_1}] Average DICE Coefficient: {avg_dice.avg}')
        print('\n')

    return n_iter

def save_checkpoint(net, optimizer, criterion, lr, n_epoch):
    checkpoint_path = f'./checkpoints/{model_save}_{n_epoch}.pth'
    torch.save({
            'epoch': n_epoch,
            'model_state_dict': net.state_dict(),
            'optimizer_state_dict': optimizer.state_dict(),
            'loss': criterion,
            'lr': lr,
            }, checkpoint_path)
    print('================================')
    print(f'Saved checkpoint to {checkpoint_path}')
    print('================================')

def test_lidarseg(net, criterion, writer):
    all_input_clouds = [None] * test_size
    all_decoded_clouds = [None] * test_size
    all_gt_clouds = [None] * test_size
    k = 0
    avg_pixel_acc = AverageMeter()
    avg_pixel_acc_per_class = AverageMeter()
    avg_jacc = AverageMeter()
    avg_dice = AverageMeter()
    n_iter = 0
    net.eval()
    with torch.no_grad():            
        for batch_idx, (cloud, lidarseg_gt) in enumerate(test_loader):
            cloud, lidarseg_gt = cloud.cuda().float(), lidarseg_gt.cuda().long()
            enc_dec_cloud = net(cloud)
            
            pred_segmentation = torch.argmax(enc_dec_cloud, dim=1)
            pixel_acc, pixel_acc_per_class, jacc, dice = eval_metrics(lidarseg_gt, pred_segmentation, num_classes = n_classes)
            avg_pixel_acc.update(pixel_acc)
            avg_pixel_acc_per_class.update(pixel_acc_per_class)
            avg_jacc.update(jacc)
            avg_dice.update(dice)
            
            writer.add_scalar('Test/PixelAccuracy', pixel_acc, n_iter)   
            writer.add_scalar('Test/PixelAccuracyPerClass', pixel_acc_per_class, n_iter)   
            writer.add_scalar('Test/JaccardIndex', jacc, n_iter)
            writer.add_scalar('Test/DiceCoefficient', dice, n_iter)  
            
            n_batch = enc_dec_cloud.shape[0]
            for i in range(0, n_batch):                
                all_input_clouds[k] = cloud.cpu().data.numpy()[i,:,:,:]
                all_decoded_clouds[k] = enc_dec_cloud.cpu().data.numpy()[i,:,:,:]
                all_gt_clouds[k] = lidarseg_gt.cpu().data.numpy()[i,:,:]
                k = k + 1     
            n_iter += 1
            
        writer.add_scalar('Test/AvgPixelAccuracy', avg_pixel_acc.avg, n_iter)   
        writer.add_scalar('Test/AvgPixelAccuracyPerClass', avg_pixel_acc_per_class.avg, n_iter)   
        writer.add_scalar('Test/AvgJaccardIndex', avg_jacc.avg, n_iter)
        writer.add_scalar('Test/AvgDiceCoefficient', avg_dice.avg, n_iter)  
        
        print(f'[Test] Average Pixel Accuracy: {avg_pixel_acc.avg}')
        print(f'[Test] Average Pixel Accuracy per Class: {avg_pixel_acc_per_class.avg}')
        print(f'[Test] Average Jaccard Index: {avg_jacc.avg}')
        print(f'[Test] Average DICE Coefficient: {avg_dice.avg}')
        print('\n')
        
    return all_input_clouds, all_decoded_clouds, all_gt_clouds

## Training Loop

In [7]:
abort = False
train_iter = 0
val_iter = 0
loss_ = 0.0
print(f'Starting training using {n_epochs} epochs')
for epoch in tqdm(range(n_epochs)):    
    lr = adjust_learning_rate_exp(optimizer, epoch_num=epoch, lr=learning_rate)
    t0 = time.time()

    train_iter = train_lidarseg(net, criterion, optimizer, writer, epoch, train_iter, loss_, t0)    
    val_iter = validate_lidarseg(net, criterion, optimizer, writer, epoch, val_iter)
    writer.add_scalar('Train/lr', lr, epoch)
    save_checkpoint(net, optimizer, criterion, lr, epoch)
        
print("Training finished!")
final_save_path = f'./{model_save}.pkl'
torch.save(net.state_dict(), final_save_path)
print(f'Saved final weights to {final_save_path}.')

Starting training using 1 epochs


HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))

[Epoch 1, Batch   10] loss: 3.11602848 time: 1.50229 lr: 1.000e-03
[Epoch 1, Batch   20] loss: 1.73081434 time: 0.96311 lr: 1.000e-03
[Epoch 1, Batch   30] loss: 1.53966256 time: 0.94671 lr: 1.000e-03
[Epoch 1, Batch   40] loss: 1.43499188 time: 0.96252 lr: 1.000e-03
[Epoch 1, Batch   50] loss: 1.35359801 time: 0.95589 lr: 1.000e-03
[Epoch 1, Batch   60] loss: 1.30283409 time: 0.95533 lr: 1.000e-03
[Epoch 1, Batch   70] loss: 1.28955110 time: 0.95876 lr: 1.000e-03
[Epoch 1, Batch   80] loss: 1.24263601 time: 0.94980 lr: 1.000e-03
[Epoch 1, Batch   90] loss: 1.25323566 time: 0.97786 lr: 1.000e-03
[Validation for epoch 1] Average Pixel Accuracy: 0.722410261631012
[Validation for epoch 1] Average Pixel Accuracy per Class: 0.4645899832248688
[Validation for epoch 1] Average Jaccard Index: 0.2854815721511841
[Validation for epoch 1] Average DICE Coefficient: 0.38330507278442383


Saved checkpoint to ./checkpoints/test_lidarseg_20220506092404_0.pth

Training finished!
Saved final weights to 

## Testing

In [8]:
if test_size > 0:
    # testing
    dec_input = f"{export_ds}/decoded_input_lidar.npy"
    dec_clouds = f"{export_ds}/decoded_lidar.npy"
    dec_gt = f"{export_ds}/decoded_gt_lidar.npy"
    
    print("Starting testing...")
    torch.cuda.empty_cache()
    input_clouds, decoded_clouds, gt_clouds = test_lidarseg(net, criterion, writer)

    np.save(dec_input, input_clouds)
    np.save(dec_clouds, decoded_clouds)
    np.save(dec_gt, gt_clouds)

    writer.close()
    print("Testing finished!")

Starting testing...
[Test] Average Pixel Accuracy: 0.7101399898529053
[Test] Average Pixel Accuracy per Class: 0.48164525628089905
[Test] Average Jaccard Index: 0.2869625985622406
[Test] Average DICE Coefficient: 0.3884340226650238


Testing finished!
