In [1]:
import torch
import sys
from torch._C import LongStorageBase

sys.path.append("./Models")
from Models.utils import *
from Data.dataset import CarlaDataset
from torch.utils.data import Dataset, DataLoader
import torch.optim as optim
import math
from tqdm import tqdm
import numpy as np
import random
import argparse
import os
import json

import time
import numpy as np
import os
import json
import pdb
from PIL import Image
import psutil

from torch.utils.tensorboard import SummaryWriter

from Models.MotionSC import MotionSC

Jupyter environment detected. Enabling Open3D WebVisualizer.
[Open3D INFO] WebRTC GUI backend enabled.
[Open3D INFO] WebRTCWindowSystem: HTTP handshake server disabled.


In [2]:
# Put parameters here
seed = 42
x_dim = 128
y_dim = 128
z_dim = 8

model_name = "MotionSC"

num_classes = 23

B = 8
T = 16

in_dim = num_classes + 3 + 3 # Input feature size per point
enc_dim = 64 # Size of point pillars
out_dim = 23 # Number of semantic classes

train_dir = "./Data/Scenes/Cartesian/Train"
val_dir = "./Data/Scenes/Cartesian/Val"
cylindrical = False

lr = 0.001
epoch_num = 500

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

weights = torch.ones(num_classes)
weights[0] = 0.1

criterion = nn.CrossEntropyLoss(weight=weights.to(device))

decayRate = 0.96

writer = SummaryWriter("./Models/Runs/" + model_name)
save_dir = "./Models/Weights/" + model_name

MODEL_PATH = None

num_workers = 0

VISUALIZE = False

RuntimeError: CUDA error: out of memory
CUDA kernel errors might be asynchronously reported at some other API call,so the stacktrace below might be incorrect.
For debugging consider passing CUDA_LAUNCH_BLOCKING=1.

In [None]:
ratios_cartesian = np.asarray([88.01, 0.89, 0.18, 0.01, 0.02, 0.06, 0.14, 4.84, 2.38, 0.63, 
                    0.29, 0.33, 0.01, 0.01, 0.17, 0.01, 0.01, 0.19, 0.01, 
                    0.08, 0.07, 0.01, 1.68]) / 100

In [None]:
def setup_seed(seed=42):
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    np.random.seed(seed)
    random.seed(seed)
setup_seed(seed)

In [None]:
carla_ds = CarlaDataset(directory=train_dir, device=device, num_frames=T, cylindrical=cylindrical)
dataloader = DataLoader(carla_ds, batch_size=B, shuffle=True, collate_fn=carla_ds.collate_fn, num_workers=num_workers)

val_ds = CarlaDataset(directory=val_dir, device=device, num_frames=T, cylindrical=cylindrical)
dataloader_val = DataLoader(val_ds, batch_size=B, shuffle=True, collate_fn=val_ds.collate_fn, num_workers=num_workers)

test_ds = CarlaDataset(directory=val_dir, device=device, num_frames=T, cylindrical=cylindrical)
dataloader_test = DataLoader(test_ds, batch_size=1, shuffle=False, collate_fn=test_ds.collate_fn, num_workers=num_workers)

In [None]:
coor_ranges = carla_ds._eval_param['min_bound'] + carla_ds._eval_param['max_bound']
voxel_sizes = [abs(coor_ranges[3] - coor_ranges[0]) / x_dim, 
              abs(coor_ranges[4] - coor_ranges[1]) / y_dim,
              abs(coor_ranges[5] - coor_ranges[2]) / z_dim] # since BEV

model = MotionSC(voxel_sizes, coor_ranges, [x_dim, y_dim, z_dim], T=T, device=device)

if MODEL_PATH:
    model.load_state_dict(torch.load(MODEL_PATH))
    model.eval()
if VISUALIZE:
    visualize_set(model, dataloader_test, carla_ds, cylindrical)

In [None]:
optimizer = optim.Adam(model.parameters(), lr=lr)
my_lr_scheduler = torch.optim.lr_scheduler.ExponentialLR(optimizer=optimizer, gamma=decayRate)

train_count = 0
for epoch in range(epoch_num):
    # Training
    model.train()
    for input_data, output, counts in dataloader:
        optimizer.zero_grad()
        input_data = torch.tensor(input_data).to(device)
        output = torch.tensor(output).to(device)
        counts = torch.tensor(counts).to(device)
        preds = model(input_data)
         
        counts = counts.view(-1)
        output = output.view(-1).long()
        preds = preds.contiguous().view(-1, preds.shape[4])

        # Criterion requires input (NxC), output (N) dimension
        mask = counts > 0
        output_masked = output[mask]
        preds_masked = preds[mask]
        loss = criterion(preds_masked, output_masked)
        
        loss.backward()
        optimizer.step()
        
        # Accuracy
        with torch.no_grad():
            probs = nn.functional.softmax(preds_masked, dim=1)
            preds_masked = np.argmax(probs.detach().cpu().numpy(), axis=1)
            outputs_np = output_masked.detach().cpu().numpy()
            accuracy = np.sum(preds_masked == outputs_np) / outputs_np.shape[0]
            
        # Record
        writer.add_scalar(model_name + '/Loss/Train', loss.item(), train_count)
        writer.add_scalar(model_name + '/Accuracy/Train', accuracy, train_count)
            
        train_count += input_data.shape[0]
        
    # Save model, decreaser learning rate
    my_lr_scheduler.step()
    torch.save(model.state_dict(), os.path.join(save_dir, "Epoch" + str(epoch) + ".pt"))

    # Validation
    model.eval()
    with torch.no_grad():
        running_loss = 0.0
        counter = 0
        num_correct = 0
        num_total = 0
        for input_data, output, counts in dataloader_val:
            optimizer.zero_grad()
            input_data = torch.tensor(input_data).to(device)
            output = torch.tensor(output).to(device)
            counts = torch.tensor(counts).to(device)
            preds = model(input_data)

            counts = counts.view(-1)
            output = output.view(-1).long()
            preds = preds.contiguous().view(-1, preds.shape[4])

            # Criterion requires input (NxC), output (N) dimension
            mask = counts > 0
            output_masked = output[mask]
            preds_masked = preds[mask]
            loss = criterion(preds_masked, output_masked)

            running_loss += loss.item()
            counter += input_data.shape[0]

            # Accuracy
            probs = nn.functional.softmax(preds_masked, dim=1)
            preds_masked = np.argmax(probs.detach().cpu().numpy(), axis=1)
            outputs_np = output_masked.detach().cpu().numpy()
            num_correct += np.sum(preds_masked == outputs_np)
            num_total += outputs_np.shape[0]
        
        print(f'Eppoch Num: {epoch} ------ average val loss: {running_loss/counter}')
        print(f'Eppoch Num: {epoch} ------ average val accuracy: {num_correct/num_total}')
        writer.add_scalar(model_name + '/Loss/Val', running_loss/counter, epoch)
        writer.add_scalar(model_name + '/Accuracy/Val', num_correct/num_total, epoch)
    
writer.close()