In [1]:
import json
import pdal
import pathlib

In [2]:
from dask.distributed import Client, as_completed

# Reclassify AHN2 ground points
The ground points have the field `raw_classification` set as 0 (never classified). It is necessary to set this to 2 (ground points) for the calculation of the pulse penetratio ratio.

## Set input

In [3]:
input_path = pathlib.Path('/data/local/home/eecolidar_webdav/00_Data/ALS/Netherlands/ahn2/terrain')
# input_path = pathlib.Path('/data/local/tmp')
output_path = pathlib.Path('/data/local/home/eecolidar_webdav/01_Escience/ALS/Netherlands/ahn2/terrain_reclassified')
# output_path = pathlib.Path('/data/local/tmp/reclassified')

run = 'from_file' # 'all', 'from_file'
filename = 'reclassification_failed.json'  # if run is 'from_file', set name of file with input file names
assert run in ['all', 'from_file']

In [4]:
files = [el for el in input_path.iterdir() if el.suffix == '.laz']
print('Found: {} LAZ files'.format(len(files)))
if run == 'from_file':
    with open(filename, 'r') as f:
        files_read = json.load(f)
    files_read = [pathlib.Path(f) for f in files_read]
    # check whether all files are available 
    assert all([f in files for f in files_read]), f'Some of the files in {filename} are not in remote dir'
    files = files_read
print('Retrieve and reclassify: {} LAZ files'.format(len(files)))

Found: 30080 LAZ files
Retrieve and reclassify: 45 LAZ files


## Connect to Dask cluster

In [5]:
client = Client('node1:8786')
client

0,1
Client  Scheduler: tcp://node1:8786  Dashboard: /proxy/8787/status,Cluster  Workers: 0  Cores: 0  Memory: 0 B


## Classification using PDAL

In [6]:
def classify_as_ground_points(input_file, output_file):
    PDAL_pipeline_dict = {
        "pipeline": [
            {
                "tag": "ground_laz",
                "type": "readers.las",
                "filename": input_file
            },
            {
                "type": "filters.assign",
                "assignment": "Classification[:]=2",
                "tag": "ground_classed"
            },
            {
                "type": "writers.las",
                "filename": output_file,
                "forward": ["scale_x", "scale_y", "scale_z"],
                "offset_x": "auto",
                "offset_y": "auto",
                "offset_z": "auto"
            }
        ]
    }
    PDAL_pipeline = pdal.Pipeline(json.dumps(PDAL_pipeline_dict))
    PDAL_pipeline.execute()

## Run!

In [7]:
out_files = [output_path/f.name.replace('.laz', '_reclassified.laz') for f in files]

In [10]:
futures = [client.submit(classify_as_ground_points,
                         input_file.as_posix(),
                         output_file.as_posix())
           for input_file, output_file in zip(files, out_files)]
map_key_to_index = {future.key: n for n, future in enumerate(futures)}
errors = [None] * len(files)
outcome = [future.status for future in futures]
for future, result in as_completed(futures,
                                   with_results=True,
                                   raise_errors=False):
    idx = map_key_to_index[future.key]
    outcome[idx] = future.status
    exc = future.exception()
    if exc is not None:
        errors[idx] = (type(exc), exc)
    future.release()

In [None]:
with open('reclassification.out', 'w') as fd:    
    for nt, (out, err, file) in enumerate(zip(outcome,
                                              errors,
                                              files)):
        if err is None:
            s = out
        else:
            s = '{}: {}, {}'.format(out, err[0].__name__, err[1])
        fd.write('{:03d} {:30s} {}\n'.format(nt+1, file.name, s))

In [12]:
failed = [f.as_posix() for out, f in zip(outcome, files) if out != 'finished']
if failed:
    with open('reclassification_failed.json', 'w') as f:
        json.dump(failed, f)
    raise RuntimeError('Some of the reclassifications have failed')

In [13]:
client.close()