In [1]:
%load_ext autoreload
%autoreload 2

In [9]:
from src.models.train_s3dis import *

In [3]:

############################
# Initialize the environment
############################

# Set which gpu is going to be used
GPU_ID = '0'

# Set GPU visible device
os.environ['CUDA_VISIBLE_DEVICES'] = GPU_ID

###############
# Previous chkp
###############

# Choose here if you want to start training from a previous snapshot (None for new training)
# previous_training_path = 'Log_2020-03-19_19-53-27'
previous_training_path = ''

# Choose index of checkpoint to start from. If None, uses the latest chkp
chkp_idx = None
if previous_training_path:

    # Find all snapshot in the chosen training folder
    chkp_path = os.path.join('results', previous_training_path, 'checkpoints')
    chkps = [f for f in os.listdir(chkp_path) if f[:4] == 'chkp']

    # Find which snapshot to restore
    if chkp_idx is None:
        chosen_chkp = 'current_chkp.tar'
    else:
        chosen_chkp = np.sort(chkps)[chkp_idx]
    chosen_chkp = os.path.join('results', previous_training_path, 'checkpoints', chosen_chkp)

else:
    chosen_chkp = None

##############
# Prepare Data
##############

print()
print('Data Preparation')
print('****************')

# Initialize configuration class
config = S3DISConfig()
if previous_training_path:
    config.load(os.path.join('results', previous_training_path))
    config.saving_path = None

# Get path from argument if given
if len(sys.argv) > 1:
    config.saving_path = sys.argv[1]

# Initialize datasets
training_dataset = S3DISDataset(config, set='training', use_potentials=True)
test_dataset = S3DISDataset(config, set='validation', use_potentials=True)

# Initialize samplers
training_sampler = S3DISSampler(training_dataset)
test_sampler = S3DISSampler(test_dataset)

# Initialize the dataloader
training_loader = DataLoader(training_dataset,
                             batch_size=1,
                             sampler=training_sampler,
                             collate_fn=S3DISCollate,
                             num_workers=config.input_threads,
                             pin_memory=True)
test_loader = DataLoader(test_dataset,
                         batch_size=1,
                         sampler=test_sampler,
                         collate_fn=S3DISCollate,
                         num_workers=config.input_threads,
                         pin_memory=True)

# Calibrate samplers
training_sampler.calibration(training_loader, verbose=True)
test_sampler.calibration(test_loader, verbose=True)


Data Preparation
****************

Preparing ply files
Done in 0.0s

Found KDTree for cloud Area_1, subsampled at 0.030
146.6 MB loaded in 0.2s

Found KDTree for cloud Area_2, subsampled at 0.030
159.1 MB loaded in 0.2s

Found KDTree for cloud Area_3, subsampled at 0.030
62.6 MB loaded in 0.1s

Found KDTree for cloud Area_4, subsampled at 0.030
144.2 MB loaded in 0.2s

Found KDTree for cloud Area_6, subsampled at 0.030
137.9 MB loaded in 0.2s

Preparing potentials
Done in 0.0s


Preparing ply files
Done in 0.0s

Found KDTree for cloud Area_5, subsampled at 0.030
260.4 MB loaded in 0.3s

Preparing potentials
Done in 0.0s

Preparing reprojection indices for testing
Area_5 done in 0.3s


Starting Calibration (use verbose=True for more details)

Previous calibration found:
Check batch limit dictionary
[92m"potentials_1.500_0.030_6": 78201[0m
Check neighbors limit dictionary
[92m"0.030_0.075": 29[0m
[92m"0.060_0.150": 32[0m
[92m"0.120_0.720": 272[0m
[92m"0.240_1.440": 235[0m
[92

In [4]:

print('\nModel Preparation')
print('*****************')

# Define network model
t1 = time.time()
net = KPFCNN(config, training_dataset.label_values, training_dataset.ignored_labels)


Model Preparation
*****************


In [5]:

# Define a trainer class
trainer = ModelTrainer(net, config, chkp_path=chosen_chkp)
print('Done in {:.1f}s\n'.format(time.time() - t1))

Done in 43.2s



In [11]:
print('\nStart training')
print('**************')

# Training
trainer.train(net, training_loader, test_loader, config)


Start training
**************


RuntimeError: CUDA out of memory. Tried to allocate 394.00 MiB (GPU 0; 5.79 GiB total capacity; 3.52 GiB already allocated; 320.62 MiB free; 3.96 GiB reserved in total by PyTorch)