In [4]:
import laspy as lp
import pdal
import numpy as np
import json
import os
from tqdm import tqdm


In [5]:
# !pip install tqdm laspy

The data set is pre-split into 29 training files and 11 testing files, with the following categories: ground(1), vegetation(2), cars(3), trucks(4), power lines(5), fences(6), poles(7) and buildings(8).

In [None]:
def preprocess(inputfile, outputfile, tile_size=100, num_neighbors=15):
    #Since in DALES, ground classification is 1, we need to switch it to 2
    pipe1 = {
            "pipeline":[
            {
                "type": "readers.las",
                "filename": inputfile,
                "spatialreference": "EPSG:32615"
            },
            #Modify the classification values to make ground 2, adjust it based on 
            #the classification values of your dataset
            {
                "type": "filters.assign",
                "value": ["Classification = 9 WHERE Classification == 2"]
            },
            {
                "type": "filters.assign",
                "value": ["Classification = 2 WHERE Classification == 1"]
            },
            {
                "type": "filters.assign",
                "value": ["Classification = 1 WHERE Classification == 9"]
            },
            #End of classification modification
            {
                "type": "filters.hag_delaunay",
                "count": num_neighbors,
            },
            # Change the classifiiers to the ones in your dataset
            {
                "type": "filters.assign",
                "value": ["Classification = 9 WHERE Classification == 2"]
            },
            {
                "type": "filters.assign",
                "value": ["Classification = 2 WHERE Classification == 1"]
            },
            {
                "type": "filters.assign",
                "value": ["Classification = 1 WHERE Classification == 9"]
            },
            #Tilting the point cloud
            {
                "type":"filters.splitter",
                "length": tile_size
                # "origin_x":"638900.0",
                # "origin_y":"835500.0"
            },
            {
                "type":"writers.las",
                "filename": outputfile
            }
            ]
        }


    jsonstring = json.dumps(pipe1)
    p = pdal.Pipeline(jsonstring)
    p.execute()
    return 0 

In [None]:
TILE_SIZE = 50
DSET = "train"
PREPROCESSING_DIR = f'dataset/dales_las/{DSET}'
PREPROCESSED_DIR = f'tiles_{TILE_SIZE}/{DSET}'

all_las_files = os.listdir(PREPROCESSING_DIR)


print("Preprocessing started ...")
all_las_files = all_las_files[:2]
for las_file in tqdm(all_las_files, total=len(all_las_files)):
    print(f"Preprocessing las file: {las_file} ...")
    in_path = os.path.join(PREPROCESSING_DIR, las_file)    

    las_number = las_file.split(".")[0]
    out_path = os.path.join(PREPROCESSED_DIR, f"{las_number}_#.las")
    preprocess(in_path, out_path)

print("Preprocessing completed!")

Preprocessing started ...


  0%|          | 0/29 [00:00<?, ?it/s]

Preprocessing las file: 5080_54435.las ...


  0%|          | 0/29 [01:51<?, ?it/s]

Preprocessing completed!



