# Test training code for S2AE

In [3]:
import math
import sys
import time

import matplotlib.pyplot as plt
import numpy as np
import open3d as o3d
from scipy import spatial

import torch
from torch.utils.tensorboard import SummaryWriter
from torchsummary import summary
from tqdm.auto import tqdm

from data_splitter import DataSplitter
from training_set import TrainingSetLidarSeg
from loss import *
# from model_encode_decode_simple import ModelEncodeDecodeSimple
from model_simple_for_testing import ModelSimpleForTesting
from sphere import Sphere
from visualize import Visualize
    
%matplotlib inline
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


## Initialize some parameter

In [4]:
print(f"Initializing CUDA...")
torch.cuda.set_device(0)
torch.backends.cudnn.benchmark = True

print(f"Setting parameters...")
bandwidth = 100
learning_rate = 4.5e-3
n_epochs = 5
batch_size = 2
num_workers = 32

print(f"Initializing data structures...")
net = ModelSimpleForTesting(bandwidth).cuda()
optimizer = torch.optim.SGD(net.parameters(), lr=learning_rate, momentum=0.9)

# criterion = L2Loss(alpha=0.5, margin=0.2)
criterion = CrossEntropyLoss(n_classes=32)
# criterion = NegativeLogLikelihoodLoss(n_classes = 32)

writer = SummaryWriter()
model_save = 'test_training_params.pkl'

print(f"All instances initialized.")

Initializing CUDA...
Setting parameters...
Initializing data structures...
All instances initialized.


## Load the dataset

In [5]:
# export_ds = '/mnt/data/datasets/nuscenes/processed'
export_ds = '/media/scratch/berlukas/nuscenes'
img_filename = f"{export_ds}/images.npy"
cloud_filename = f"{export_ds}/clouds.npy"
sem_clouds_filename = f"{export_ds}/sem_clouds.npy"
dec_clouds = f"{export_ds}/decoded.npy"
dec_indices = f"{export_ds}/indices.npy"

print(f"Loading from images from {img_filename}, clouds from {cloud_filename} and sem clouds from {sem_clouds_filename}")
img_features = np.load(img_filename)
cloud_features = np.load(cloud_filename)
sem_cloud_features = np.load(sem_clouds_filename)
sem_cloud_features = sem_cloud_features[:,1,:,:] # select the semantic class
print(f"Shape of images is {img_features.shape}, clouds is {cloud_features.shape} and sem clouds is {sem_cloud_features.shape}")

Loading from images from /media/scratch/berlukas/nuscenes/images.npy, clouds from /media/scratch/berlukas/nuscenes/clouds.npy and sem clouds from /media/scratch/berlukas/nuscenes/sem_clouds.npy
Shape of images is (850, 1, 200, 200), clouds is (850, 2, 200, 200) and sem clouds is (850, 200, 200)


In [6]:
# Initialize the data loaders
train_set = TrainingSetLidarSeg(bandwidth, cloud_features, sem_cloud_features)
print(f"Total size of the training set: {len(train_set)}")
split = DataSplitter(train_set, False, test_train_split=0.9, shuffle=True)

# Split the data into train, val and optionally test
train_loader, val_loader, test_loader = split.get_split(
    batch_size=batch_size, num_workers=num_workers)
train_size = split.get_train_size()
val_size = split.get_val_size()
test_size = split.get_test_size()


print("Training size: ", train_size)
print("Validation size: ", val_size)
if test_size == 0:
    print('Test size is 0. Configured for external tests')
else:
    print("Testing size: ", test_size)

Total size of the training set: 850
Training size:  688
Validation size:  77
Testing size:  85


In [7]:
def adjust_learning_rate_exp(optimizer, epoch_num, lr):
    decay_rate = 0.96
    new_lr = lr * math.pow(decay_rate, epoch_num)
    for param_group in optimizer.param_groups:
        param_group['lr'] = new_lr

    return new_lr

def train_lidarseg(net, criterion, optimizer, writer, epoch, n_iter, loss_, t0):
    net.train()
    for batch_idx, (cloud, lidarseg_gt) in enumerate(train_loader):
        cloud, lidarseg_gt = cloud.cuda().float(), lidarseg_gt.cuda().long()
        
        enc_dec_cloud = net(cloud)
        loss, loss_total = criterion(enc_dec_cloud, lidarseg_gt)
        #loss_embedd = embedded_a.norm(2) + embedded_p.norm(2) + embedded_n.norm(2)
        #loss = loss_triplet + 0.001 * loss_embedd

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        loss_ += loss_total.item()

        writer.add_scalar('Train/Loss', loss, n_iter)
        n_iter += 1

        if batch_idx % 20 == 19:
            t1 = time.time()
            print('[Epoch %d, Batch %4d] loss: %.8f time: %.5f lr: %.3e' %
                  (epoch + 1, batch_idx + 1, loss_ / 20, (t1 - t0) / 60, lr))
            t0 = t1
            loss_ = 0.0
    return n_iter

def validate_lidarseg(net, criterion, optimizer, writer, epoch, n_iter):
    net.eval()
    with torch.no_grad():            
        for batch_idx, (cloud, lidarseg_gt) in enumerate(val_loader):            
            cloud, lidarseg_gt = cloud.cuda().float(), lidarseg_gt.cuda().long()
            enc_dec_cloud = net(cloud)
                        
            optimizer.zero_grad()
            loss, loss_total = criterion(enc_dec_cloud, lidarseg_gt)                                    
                                    
            writer.add_scalar('Validation/Loss', loss, n_iter)                        
            n_iter += 1
    return n_iter

def test_lidarseg(net, criterion, writer):
    all_decoded_clouds = [None] * test_size
    k = 0
    net.eval()
    with torch.no_grad():            
        for batch_idx, (cloud, lidarseg_gt) in enumerate(test_loader):
            cloud, lidarseg_gt = cloud.cuda().float(), lidarseg_gt.cuda().long()
            enc_dec_cloud = net(cloud)
            n_batch = enc_dec_cloud.shape[0]
            for i in range(0, n_batch):
                all_decoded_clouds[k] = enc_dec_cloud.cpu().data.numpy()[i,:,:,:]
                k = k + 1
    return all_decoded_clouds            

## Training Loop

In [None]:
abort = False
train_iter = 0
val_iter = 0
loss_ = 0.0
print(f'Starting training using {n_epochs} epochs')
for epoch in tqdm(range(n_epochs)):    
    lr = adjust_learning_rate_exp(optimizer, epoch_num=epoch, lr=learning_rate)
    t0 = time.time()

    train_iter = train_lidarseg(net, criterion, optimizer, writer, epoch, train_iter, loss_, t0)    
    val_iter = validate_lidarseg(net, criterion, optimizer, writer, epoch, val_iter)
    writer.add_scalar('Train/lr', lr, epoch)

print("Training finished!")
torch.save(net.state_dict(), model_save)

Starting training using 5 epochs


HBox(children=(FloatProgress(value=0.0, max=5.0), HTML(value='')))



[Epoch 1, Batch   20] loss: 8.99576576 time: 0.13155 lr: 4.500e-03
[Epoch 1, Batch   40] loss: 4.14547040 time: 0.06140 lr: 4.500e-03
[Epoch 1, Batch   60] loss: 3.07147354 time: 0.06138 lr: 4.500e-03
[Epoch 1, Batch   80] loss: 2.23263977 time: 0.06158 lr: 4.500e-03
[Epoch 1, Batch  100] loss: 1.47192770 time: 0.06167 lr: 4.500e-03
[Epoch 1, Batch  120] loss: 1.17208176 time: 0.06144 lr: 4.500e-03
[Epoch 1, Batch  140] loss: 1.08096944 time: 0.06138 lr: 4.500e-03
[Epoch 1, Batch  160] loss: 1.07031105 time: 0.06136 lr: 4.500e-03
[Epoch 1, Batch  180] loss: 1.04862596 time: 0.06146 lr: 4.500e-03
[Epoch 1, Batch  200] loss: 1.03873737 time: 0.06147 lr: 4.500e-03
[Epoch 1, Batch  220] loss: 1.03602674 time: 0.06141 lr: 4.500e-03
[Epoch 1, Batch  240] loss: 0.99072495 time: 0.06139 lr: 4.500e-03
[Epoch 1, Batch  260] loss: 0.98027969 time: 0.06135 lr: 4.500e-03
[Epoch 1, Batch  280] loss: 0.98334565 time: 0.06139 lr: 4.500e-03
[Epoch 1, Batch  300] loss: 1.00736828 time: 0.06128 lr: 4.500

## Testing

In [8]:
print("Starting testing...")

torch.cuda.empty_cache()
decoded_clouds = test_lidarseg(net, criterion, writer)
test_indices = np.array(split.test_indices)

np.save(dec_indices, test_indices)
np.save(dec_clouds, decoded_clouds)

writer.close()
print("Testing finished!")

Starting testing...
Testing finished!


(85,)