In [None]:
import json 

from pathlib import Path
from urllib.request import urlretrieve

from lc_macro_pipeline.retiler import Retiler
from lc_macro_pipeline.data_processing import DataProcessing
from lc_macro_pipeline.geotiff_writer import Geotiff_writer
from lc_macro_pipeline.macro_pipeline import MacroPipeline

# Macro-ecology LiDAR point-cloud processing pipeline 

We start by checking whether the test data set is available locally, we otherwise retrieve it from the AHN3 repository.

In [None]:
testdata_dir = 'testdata'
testdata_files = ['C_41CZ2.LAZ']

file_paths = [Path(testdata_dir).joinpath(f) for f in testdata_files]

for file_path in file_paths:
    if not file_path.is_file():
        file_url = '/'.join(['https://geodata.nationaalgeoregister.nl/ahn3/extract/ahn3_laz', 
                                      file_path])
        urlretrieve(file_url, file_path)

Files generate by the pipeline will be saved in the `temp_folder` directory. 

In [None]:
temp_folder = Path('/tmp')

## 1. Retiling

The first step in the pipeline is to retile the retrieved point-cloud files to a regular grid, splitting the original data into smaller chuncks that are easier to handle for data processing. The boundaries of the grid and the number of tiles along each axis are set to:

In [None]:
grid = {
    'min_x': -113107.8100,
    'max_x': 398892.1900,
    'min_y': 214783.8700,
    'max_y': 726783.87,
    'n_tiles_side': 256
}

The retiling of multiple input files consists of independent tasks, which are thus efficiently parallelized. The input controlling all the steps of the retiling is organized in a dictionary.

In [None]:
retiling_macro = MacroPipeline()

#TODO: setup the dask cluster/client
# from dask.distributed import Client, SetupMyCluster
# cluster = SetupMyCluster()
# client = Client(cluster)
# retiling_macro.set_client(client)

for file_path in file_paths:
    retiler = Retiler()
    retiler.input = {
        'localfs': {
            'input_folder': file_path.parent.as_posix(),
            'input_file': file_path.name,
            'temp_folder': temp_folder
        },
        'tiling': grid,
        'split_and_redistribute': {},
        'validate': {}
    }
    retiling_macro.add_task(retiler)

res = retiling_macro.run()

## 2. Feature Extraction

Once the files are splitted into tiles of a manageable size, we proceed to the feature extraction stage. The base input dictionary for this step looks like:

In [None]:
dp_input_base = {
    "normalize": {
        "cell_size": 1
    },
    "generate_targets": {
        'min_x': -113107.8100,
        'max_x': 398892.1900,
        'min_y': 214783.8700,
        'max_y': 726783.87,
        'n_tiles_side': 256,
        "tile_mesh_size" : 10.0,
        "validate" : True,
    },
    "extract_features": {
        "feature_names": ["coeff_var_normalized_height"],
        "volume_type": "cell",
        "volume_size": 10
    }
}

The tiles to which the original input file has been retiled are listed in a record file located in the temporary directory:

In [None]:
tiles = []
for file_path in file_paths:
    record_file = '_'.join([file_path.stem, 'retile_record.js'])
    with Path(temp_folder/file_path.stem/record_file).open() as f:
        record = json.load(f)
    assert record['validated']
    tiles += [Path(temp_folder/file_path.stem/tile)
              for tile in record['redistributed_to']]
print([t.as_posix() for t in tiles])

Each tile can be processed independently, so that again one can run the tasks in a parallel fashion.

In [None]:
dp_macro = MacroPipeline()
# data_processing_macro.set_client(client)

for tile in [tiles[0]]:
    dp_input = dp_input_base.copy()
    dp_input.update({
        'load': {'path': tile.as_posix()},
        'export_targets': {'path': tile.with_suffix('.PLY').as_posix()}
    })
    dp_input['generate_targets'].update({'index_tile_x': int(tile.name.split('_')[1]), 
                                         'index_tile_y': int(tile.name.split('_')[2])})
    dp = DataProcessing()
    dp.input = dp_input
    dp_macro.add_task(dp)

res = dp_macro.run()