# Test training code for S2AE

In [1]:
import math
import sys
import time

import matplotlib.pyplot as plt
import numpy as np
import open3d as o3d
from scipy import spatial

import torch
from torch.utils.tensorboard import SummaryWriter
from torchsummary import summary
from tqdm.auto import tqdm

from data_splitter import DataSplitter
from training_set import TrainingSetLidarSeg
from loss import *
# from model_encode_decode_simple import ModelEncodeDecodeSimple
from model_simple_for_testing import ModelSimpleForTesting
from sphere import Sphere
from visualize import Visualize
    
%matplotlib inline
%load_ext autoreload
%autoreload 2

## Initialize some parameter

In [2]:
print(f"Initializing CUDA...")
torch.cuda.set_device(0)
torch.backends.cudnn.benchmark = True

print(f"Setting parameters...")
bandwidth = 100
learning_rate = 4.5e-3
n_epochs = 5
batch_size = 2
num_workers = 32
n_classes = 9

print(f"Initializing data structures...")
net = ModelSimpleForTesting(bandwidth=bandwidth, n_classes=n_classes).cuda()
optimizer = torch.optim.SGD(net.parameters(), lr=learning_rate, momentum=0.9)

# criterion = L2Loss(alpha=0.5, margin=0.2)
# criterion = CrossEntropyLoss(n_classes=n_classes)
criterion = NegativeLogLikelihoodLoss(n_classes=n_classes)

writer = SummaryWriter()
model_save = 'test_training_params.pkl'

print(f"All instances initialized.")

Initializing CUDA...
Setting parameters...
Initializing data structures...
All instances initialized.


## Load the dataset

In [3]:
# export_ds = '/mnt/data/datasets/nuscenes/processed'
export_ds = '/media/scratch/berlukas/nuscenes'
img_filename = f"{export_ds}/images.npy"
cloud_filename = f"{export_ds}/clouds.npy"
sem_clouds_filename = f"{export_ds}/sem_classes_gt.npy"
dec_clouds = f"{export_ds}/decoded.npy"
dec_indices = f"{export_ds}/indices.npy"

print(f"Loading from images from {img_filename}, clouds from {cloud_filename} and sem clouds from {sem_clouds_filename}")
img_features = np.load(img_filename)
cloud_features = np.load(cloud_filename)
sem_cloud_features = np.load(sem_clouds_filename)
print(f"Shape of images is {img_features.shape}, clouds is {cloud_features.shape} and sem clouds is {sem_cloud_features.shape}")

Loading from images from /media/scratch/berlukas/nuscenes/images.npy, clouds from /media/scratch/berlukas/nuscenes/clouds.npy and sem clouds from /media/scratch/berlukas/nuscenes/sem_classes_gt.npy
Shape of images is (850, 1, 200, 200), clouds is (850, 2, 200, 200) and sem clouds is (850, 200, 200)


In [4]:
# Initialize the data loaders
train_set = TrainingSetLidarSeg(bandwidth, cloud_features, sem_cloud_features)
print(f"Total size of the training set: {len(train_set)}")
split = DataSplitter(train_set, False, test_train_split=0.9, shuffle=True)

# Split the data into train, val and optionally test
train_loader, val_loader, test_loader = split.get_split(
    batch_size=batch_size, num_workers=num_workers)
train_size = split.get_train_size()
val_size = split.get_val_size()
test_size = split.get_test_size()


print("Training size: ", train_size)
print("Validation size: ", val_size)
if test_size == 0:
    print('Test size is 0. Configured for external tests')
else:
    print("Testing size: ", test_size)

Total size of the training set: 850
Training size:  688
Validation size:  77
Testing size:  85


In [5]:
def adjust_learning_rate_exp(optimizer, epoch_num, lr):
    decay_rate = 0.96
    new_lr = lr * math.pow(decay_rate, epoch_num)
    for param_group in optimizer.param_groups:
        param_group['lr'] = new_lr

    return new_lr

def train_lidarseg(net, criterion, optimizer, writer, epoch, n_iter, loss_, t0):
    net.train()
    for batch_idx, (cloud, lidarseg_gt) in enumerate(train_loader):
        cloud, lidarseg_gt = cloud.cuda().float(), lidarseg_gt.cuda().long()
        
        enc_dec_cloud = net(cloud)
        loss, loss_total = criterion(enc_dec_cloud, lidarseg_gt)
        #loss_embedd = embedded_a.norm(2) + embedded_p.norm(2) + embedded_n.norm(2)
        #loss = loss_triplet + 0.001 * loss_embedd

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        loss_ += loss_total.item()

        writer.add_scalar('Train/Loss', loss, n_iter)
        n_iter += 1

        if batch_idx % 20 == 19:
            t1 = time.time()
            print('[Epoch %d, Batch %4d] loss: %.8f time: %.5f lr: %.3e' %
                  (epoch + 1, batch_idx + 1, loss_ / 20, (t1 - t0) / 60, lr))
            t0 = t1
            loss_ = 0.0
    return n_iter

def validate_lidarseg(net, criterion, optimizer, writer, epoch, n_iter):
    net.eval()
    with torch.no_grad():            
        for batch_idx, (cloud, lidarseg_gt) in enumerate(val_loader):            
            cloud, lidarseg_gt = cloud.cuda().float(), lidarseg_gt.cuda().long()
            enc_dec_cloud = net(cloud)
                        
            optimizer.zero_grad()
            loss, loss_total = criterion(enc_dec_cloud, lidarseg_gt)                                    
                                    
            writer.add_scalar('Validation/Loss', loss, n_iter)                        
            n_iter += 1
    return n_iter

def test_lidarseg(net, criterion, writer):
    all_decoded_clouds = [None] * test_size
    k = 0
    net.eval()
    with torch.no_grad():            
        for batch_idx, (cloud, lidarseg_gt) in enumerate(test_loader):
            cloud, lidarseg_gt = cloud.cuda().float(), lidarseg_gt.cuda().long()
            enc_dec_cloud = net(cloud)
            n_batch = enc_dec_cloud.shape[0]
            for i in range(0, n_batch):
                all_decoded_clouds[k] = enc_dec_cloud.cpu().data.numpy()[i,:,:,:]
                k = k + 1
    return all_decoded_clouds            

## Training Loop

In [6]:
abort = False
train_iter = 0
val_iter = 0
loss_ = 0.0
print(f'Starting training using {n_epochs} epochs')
for epoch in tqdm(range(n_epochs)):    
    lr = adjust_learning_rate_exp(optimizer, epoch_num=epoch, lr=learning_rate)
    t0 = time.time()

    train_iter = train_lidarseg(net, criterion, optimizer, writer, epoch, train_iter, loss_, t0)    
    val_iter = validate_lidarseg(net, criterion, optimizer, writer, epoch, val_iter)
    writer.add_scalar('Train/lr', lr, epoch)

print("Training finished!")
torch.save(net.state_dict(), model_save)

Starting training using 5 epochs


HBox(children=(FloatProgress(value=0.0, max=5.0), HTML(value='')))

shape of decoded is torch.Size([2, 9, 200, 200]) and teacher is torch.Size([2, 200, 200])




shape of decoded is torch.Size([2, 9, 200, 200]) and teacher is torch.Size([2, 200, 200])
shape of decoded is torch.Size([2, 9, 200, 200]) and teacher is torch.Size([2, 200, 200])
shape of decoded is torch.Size([2, 9, 200, 200]) and teacher is torch.Size([2, 200, 200])
shape of decoded is torch.Size([2, 9, 200, 200]) and teacher is torch.Size([2, 200, 200])
shape of decoded is torch.Size([2, 9, 200, 200]) and teacher is torch.Size([2, 200, 200])
shape of decoded is torch.Size([2, 9, 200, 200]) and teacher is torch.Size([2, 200, 200])
shape of decoded is torch.Size([2, 9, 200, 200]) and teacher is torch.Size([2, 200, 200])
shape of decoded is torch.Size([2, 9, 200, 200]) and teacher is torch.Size([2, 200, 200])
shape of decoded is torch.Size([2, 9, 200, 200]) and teacher is torch.Size([2, 200, 200])
shape of decoded is torch.Size([2, 9, 200, 200]) and teacher is torch.Size([2, 200, 200])
shape of decoded is torch.Size([2, 9, 200, 200]) and teacher is torch.Size([2, 200, 200])
shape of d

Exception ignored in: <function WeakValueDictionary.__init__.<locals>.remove at 0x7f5ec3dcb510>
Traceback (most recent call last):
  File "/home/berlukas/workspace/python/pytorch-venv/lib/python3.6/weakref.py", line 109, in remove
    def remove(wr, selfref=ref(self), _atomic_removal=_remove_dead_weakref):
KeyboardInterrupt
E0501 16:24:45.441156 140045051459392 ultratb.py:152] Internal Python error in the inspect module.
Below is the traceback from this internal error.



Traceback (most recent call last):
  File "/home/berlukas/workspace/python/pytorch-venv/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 3331, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-6-c3c15f675b1c>", line 10, in <module>
    train_iter = train_lidarseg(net, criterion, optimizer, writer, epoch, train_iter, loss_, t0)
  File "<ipython-input-5-5eedd93b3644>", line 22, in train_lidarseg
    loss_ += loss_total.item()
KeyboardInterrupt

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/home/berlukas/workspace/python/pytorch-venv/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2044, in showtraceback
    stb = value._render_traceback_()
AttributeError: 'KeyboardInterrupt' object has no attribute '_render_traceback_'

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/home/berlukas/w

KeyboardInterrupt: 

## Testing

In [8]:
print("Starting testing...")

torch.cuda.empty_cache()
decoded_clouds = test_lidarseg(net, criterion, writer)
test_indices = np.array(split.test_indices)

np.save(dec_indices, test_indices)
np.save(dec_clouds, decoded_clouds)

writer.close()
print("Testing finished!")

Starting testing...
Testing finished!


(85,)