In [1]:
import pdal
import glob
import geopandas as gpd
import numpy as np
import json
import pandas as pd
from shapely import MultiPoint

In [2]:
files = glob.glob('downloads/LiDAR/*.laz')

In [3]:
scms = [file[20:][:8] for file in files]

In [4]:
gdf_articulacao = gpd.read_file("zip://downloads/SIRGAS_SHP_quadriculamdt.zip!/SIRGAS_SHP_quadriculamdt/")

In [5]:
_ = gdf_articulacao.set_crs(epsg=31983, inplace=True)

In [6]:
resolution = 0.5

def dem_pipeline(scm, width, height, origin_x, origin_y, resolution):
    return [
        {
            "type": "readers.las",
            "filename": f'downloads/LiDAR/MDS_{scm}_1000.laz',
            "override_srs": "EPSG:31983"
        },
        {
            "type":"filters.range",
            "limits":"Classification[2:2]"
        },
        {
            "type": "filters.delaunay"
        },
        {
            "type": "filters.faceraster",
            "resolution":resolution,
            "width": width,
            "height": height,
            "origin_x": origin_x,
            "origin_y": origin_y,
        },        
        {
            "filename":f"results/DEM/MDT-{scm}-50cm.tiff",
            "gdaldriver":"GTiff",
            "type": "writers.raster",
            "gdalopts":"COMPRESS=ZSTD, PREDICTOR=3, BIGTIFF=YES",
            "nodata":"0",
            "data_type": "float32",
            # "default_srs": "EPSG:31983"
        }
    ]

def laz_pipeline(scm, width, height, origin_x, origin_y, resolution):
    return [
        {
            "type":"readers.las",
            "filename":f"downloads/LiDAR/MDS_{scm}_1000.laz"
        },
        {
            "filename":f"results/BHM-Z-{scm}.tiff",
            "gdaldriver":"GTiff",
            "width": width,
            "height": height,
            "origin_x": origin_x,
            "origin_y": origin_y,
            "radius": f'{resolution * 2 * np.sqrt(2)}',
            "override_srs": "EPSG:31983",
            "output_type":"max",
            "resolution":resolution,
            "dimension": "Z",
            "data_type": "float32",
            "type": "writers.gdal",
            "gdalopts":"COMPRESS=ZSTD, PREDICTOR=3, BIGTIFF=YES",
            "where": "(Classification == 6)",
        },
        {
            "type":"filters.hag_dem",
            "raster": f"results/DEM/MDT-{scm}-50cm.tiff"
        },
        {
            "filename":f"results/BHM-{scm}.tiff",
            "gdaldriver":"GTiff",
            "output_type":"max",
            "resolution": resolution,
            "radius": f'{resolution * 2 * np.sqrt(2)}',
            "dimension":"HeightAboveGround",
            "width": width,
            "height": height,
            "origin_x": origin_x,
            "origin_y": origin_y,
            # "nodata":"0",
            "data_type": "float32",
            "type": "writers.gdal",
            "where": "(Classification == 6)",
            "override_srs": "EPSG:31983"
        },
        {
            "filename":f"results/VHM-{scm}.tiff",
            "gdaldriver":"GTiff",
            "output_type":"max",
            "resolution":resolution,
            "radius": f'{resolution * 2 * np.sqrt(2)}',
            "dimension":"HeightAboveGround",
            "width": width,
            "height": height,
            "origin_x": origin_x,
            "origin_y": origin_y,
            # "nodata":"0",
            "data_type": "float32",
            "type": "writers.gdal",
            "where": "(Classification == 4) || (Classification == 3)",
            "override_srs": "EPSG:31983"
        },
        {
            "type":"filters.range",
            "limits":"Classification[6:6]"
        },
        {
            "type":"filters.voxeldownsize",
            "cell":0.5,
            "mode":"center"
        },
        {
            "type":"filters.dbscan",
            "min_points":5,
            "eps": (resolution + 0.10) * np.sqrt(2),
            "dimensions":"X,Y,Z"
        },
        {
            "type":"filters.ferry",
            "dimensions":"HeightAboveGround => Z"
        },
        {
            "type":"writers.las",
            "filename":f"results/Cluster-{scm}.laz",
            "extra_dims": "all",
            # "output_dims":"X,Y,Z,ClusterID"
        },
    ]

In [7]:
agg = {
    'coords':list,  
    'Z':['count', 'median', 'sum'], 
    'Intensity':'median', 
    'Infrared':'median',
    'Red':'median',
    'Green':'median',
    'Blue':'median'  
}

columns = {
    ('coords', 'list'):'coords',
    ('Z', 'count'):'count',
    ('Z', 'median'):'z_median',
    ('Z', 'sum'):'z_sum',
    ('Intensity', 'median'):'intensity_median',
    ('Infrared', 'median'):'infrared_median',
    ('Red', 'median'):'red_median',
    ('Green', 'median'):'green_median',
    ('Blue', 'median'):'blue_median',
}

for _, scm in gdf_articulacao.loc[gdf_articulacao.qmdt_cod.isin(scms)].iterrows():
    print(scm.qmdt_cod)
    coords = [[xy[0], xy[1]] for xy in scm.geometry.exterior.coords]
    xy_max = np.max(np.array(coords), axis=0) 
    xy_min = np.min(np.array(coords), axis=0)
    width_height = np.ceil(xy_max * 2) - np.ceil(xy_min * 2)
    # print(width_height)
    origin_xy = np.floor(xy_min * 2)/2
    # print(origin_xy)
    dem = dem_pipeline(scm.qmdt_cod, width_height[0], width_height[1], xy_min[0], xy_min[1], resolution)

    pipeline = pdal.Pipeline(json.dumps(dem))
    # pipeline.validate()   
    n_points = pipeline.execute()
    print(f'Pipeline selected {n_points} points')
    
    laz = laz_pipeline(scm.qmdt_cod, width_height[0], width_height[1], xy_min[0], xy_min[1], resolution)

    pipeline = pdal.Pipeline(json.dumps(laz))
    # pipeline.validate()   
    n_points = pipeline.execute()
    print(f'Pipeline selected {n_points} points')

    arr = pipeline.arrays[0]
    df = pd.DataFrame(arr)
    df.loc[:, 'coords'] = list(np.dstack([df.X, df.Y])[0])
    df.set_index(['X', 'Y']).loc[:, 'Z'] = df.groupby(['X', 'Y']).agg({'Z':'max'})
    df.drop_duplicates(subset=['X', 'Y'], keep='last', inplace=True)

    df = df[(df.Z > 2.0) & (df.Z < 200.0)].reset_index()

    df_agg = df[df.ClusterID > 0].groupby('ClusterID').agg(agg)

    df_agg.columns = df_agg.columns.to_flat_index()

    df_agg.rename(columns=columns, inplace=True)
    
    df_agg.loc[:, 'geometry'] = df_agg.coords.apply(MultiPoint)

    gdf_agg = gpd.GeoDataFrame(df_agg)

    gdf_agg.set_crs(epsg=31983, inplace=True)

    gdf_agg.drop(columns=['coords']).to_file(f'results/{scm.qmdt_cod}-multipoint.gpkg', driver='GPKG')
    break



3313-144
Pipeline selected 1724943 points
Pipeline selected 395292 points


You are setting values through chained assignment. Currently this works in certain cases, but when using Copy-on-Write (which will become the default behaviour in pandas 3.0) this will never work to update the original DataFrame or Series, because the intermediate object on which we are setting values will behave as a copy.
A typical example is when you are setting values in a column of a DataFrame, like:

df["col"][row_indexer] = value

Use `df.loc[row_indexer, "col"] = values` instead, to perform the assignment in a single step and ensure this keeps updating the original `df`.

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy

  df.set_index(['X', 'Y']).loc[:, 'Z'] = df.groupby(['X', 'Y']).agg({'Z':'max'})
  df.set_index(['X', 'Y']).loc[:, 'Z'] = df.groupby(['X', 'Y']).agg({'Z':'max'})


In [8]:
gdf_agg.drop(columns=['coords'])

Unnamed: 0_level_0,count,z_median,z_sum,intensity_median,infrared_median,red_median,green_median,blue_median,geometry
ClusterID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2,7,7.460738,52.537555,37.0,34048.0,39680.0,31744.0,31232.0,"MULTIPOINT ((321921.743 7393126.468), (321922...."
5,132,9.242988,1225.977183,36.0,31232.0,32512.0,33152.0,32768.0,"MULTIPOINT ((321979.243 7393130.468), (321978...."
6,60,2.757433,165.520663,49.0,12672.0,10752.0,11520.0,14848.0,"MULTIPOINT ((321956.243 7393127.968), (321955...."
8,613,5.673263,3475.370778,31.0,34304.0,35072.0,35328.0,35584.0,"MULTIPOINT ((321941.743 7393179.968), (321942...."
9,1270,3.076462,3872.194177,28.0,30720.0,27520.0,27904.0,28160.0,"MULTIPOINT ((321922.743 7393179.968), (321923...."
...,...,...,...,...,...,...,...,...,...
611,1319,14.985307,19882.859811,58.0,54272.0,60160.0,59904.0,59648.0,"MULTIPOINT ((322435.743 7393691.968), (322442...."
612,91,11.912934,1082.537739,25.0,32512.0,27392.0,28160.0,28672.0,"MULTIPOINT ((322445.243 7393651.468), (322444...."
613,69,2.527480,176.464863,34.0,17408.0,8448.0,9984.0,13312.0,"MULTIPOINT ((322445.743 7393665.968), (322445...."
614,308,14.133179,4345.034490,43.0,45056.0,50432.0,51200.0,51968.0,"MULTIPOINT ((322446.743 7393675.468), (322446...."
