In [1]:
import pdal
import json
import pandas as pd
import geopandas as gpd
import numpy as np
import math
import alphashape
from sqlalchemy import create_engine
import shapely
# from sklearn.cluster import DBSCAN #, OPTICS
# from sklearn import preprocessing


In [2]:
gdf_articulacao = gpd.read_file("zip://data/SIRGAS_SHP_quadriculamdt.zip!/SIRGAS_SHP_quadriculamdt/")

In [3]:
engine = create_engine("postgresql://postgres:1234@localhost:5432/faveLiDAR")

In [4]:
_ = gdf_articulacao.set_crs(epsg=31983, inplace=True)

In [5]:
#gdf_articulacao.set_index('qmdt_cod').iloc[3315-361].geometry.exterior.coords
coords = [[xy[0], xy[1]] for xy in gdf_articulacao.set_index('qmdt_cod').loc['3315-361'].geometry.exterior.coords]
xy_max = np.max(np.array(coords), axis=0) 
xy_min = np.min(np.array(coords), axis=0)

In [6]:
np.ceil(xy_max * 2) - np.ceil(xy_min * 2)

array([1077., 1166.])

In [7]:
np.floor(xy_min * 2)/2

array([ 323586., 7386800.])

In [8]:
resolution = 0.5

In [9]:
laz = [
    {
        "type":"readers.las",
        "filename":"sample-data/sao-paulo/MDS_3315-361_1000.laz"
    },
    {
        "filename":f"sample-results/sao-paulo/BHM-Z-3315-361.tiff",
        "gdaldriver":"GTiff",
        "width": 1077,
        "height": 1166,
        "origin_x": 323586,
        "origin_y": 7386800,
        "radius": f'{resolution * 2 * np.sqrt(2)}',
        "override_srs": "EPSG:31983",
        "output_type":"max",
        "resolution":resolution,
        "dimension": "Z",
        "data_type": "float32",
        "type": "writers.gdal",
        "gdalopts":"COMPRESS=ZSTD, PREDICTOR=3, BIGTIFF=YES",
        "where": "(Classification == 6)",
    },
    {
        "type":"filters.range",
        "limits":"Classification[6:6]"
    },
    ## TODO
    ## Tentar experimentar os dois tipos de clusteres
    # {
    #     "type":"filters.cluster",
    #     "min_points":100,
    #     "tolerance":0.3
    # },
    {
        "type":"filters.voxeldownsize",
        "cell":0.5,
        "mode":"center"
    },
    {
        "type":"filters.dbscan",
        "min_points":5,
        "eps":0.60,
        "dimensions":"X,Y,Z"
    },
    {
        "type":"writers.las",
        "filename":"sample-results/sao-paulo/Cluster-3315-361.laz",
        "extra_dims": "all",
        # "output_dims":"X,Y,Z,ClusterID"
    },
    {
        "type":"filters.hag_dem",
        "raster": "sample-data/sao-paulo/MDT-3315-361.tiff"
    },
    {
        "type":"filters.ferry",
        "dimensions":"HeightAboveGround => Z"
    },
    {
        "filename":f"sample-results/sao-paulo/BHM-3315-361.tiff",
        "gdaldriver":"GTiff",
        "output_type":"max",
        "resolution":"0.5",
        "width": 1077,
        "height": 1166,
        "origin_x": 323586,
        "origin_y": 7386800,
        # "nodata":"0",
        "data_type": "float32",
        "type": "writers.gdal",
        "where": "(Classification == 6)",
        "override_srs": "EPSG:31983"
    },
]

In [10]:
pipeline = pdal.Pipeline(json.dumps(laz))
# pipeline.validate()
n_points = pipeline.execute()
print(f'Pipeline selected {n_points} points')

Pipeline selected 1083238 points


In [11]:
arr = pipeline.arrays[0]
df = pd.DataFrame(arr)
# print(df.head().to_latex(index=False))
df.columns

Index(['X', 'Y', 'Z', 'Intensity', 'ReturnNumber', 'NumberOfReturns',
       'ScanDirectionFlag', 'EdgeOfFlightLine', 'Classification',
       'ScanAngleRank', 'UserData', 'PointSourceId', 'GpsTime', 'ScanChannel',
       'ClassFlags', 'Red', 'Green', 'Blue', 'Infrared', 'ClusterID',
       'HeightAboveGround'],
      dtype='object')

In [12]:
len(df.ClusterID.unique())

13027

In [13]:
(df.ClusterID.value_counts() > 16).value_counts()

False    7191
True     5836
Name: ClusterID, dtype: int64

In [14]:
df.loc[:, 'coords'] = list(np.dstack([df.X, df.Y])[0])

## Removendo os pontos sobrepostos

In [15]:
df.groupby(['X', 'Y']).agg(
    {'Z':['max', 'count']}
)['Z']['count'].value_counts()

1     826588
2     112958
3       8007
4       1058
5        258
6         90
7         37
8         20
9          9
14         4
11         3
10         2
17         1
12         1
13         1
Name: count, dtype: int64

In [16]:
df.set_index(['X', 'Y']).loc[:, 'Z'] = df.groupby(['X', 'Y']).agg({'Z':'max'})

In [17]:
df.drop_duplicates(subset=['X', 'Y'], keep='last', inplace=True)

In [18]:
# Remover os valores discrepantes de Z [(df.Z > 2.0) & (df.Z < 200.0)]
df = df[(df.Z > 2.0) & (df.Z < 200.0)].reset_index()

## Agregando por Cluster

In [19]:
agg = {
    'coords':list,  
    'Z':['count', 'median', 'sum'], 
    'Intensity':'median', 
    'Infrared':'median',  
}

In [20]:
df_agg = df[df.ClusterID > 0].groupby('ClusterID').agg(agg)

In [21]:
df_agg.columns = df_agg.columns.to_flat_index()

In [22]:
list(df_agg.columns)

[('coords', 'list'),
 ('Z', 'count'),
 ('Z', 'median'),
 ('Z', 'sum'),
 ('Intensity', 'median'),
 ('Infrared', 'median')]

In [23]:
columns = {
    ('coords', 'list'):'coords',
    ('Z', 'count'):'count',
    ('Z', 'median'):'z_median',
    ('Z', 'sum'):'z_sum',
    ('Intensity', 'median'):'intensity_median',
    ('Infrared', 'median'):'infrared_median'
}

In [24]:
df_agg.rename(columns=columns, inplace=True)

In [25]:
from shapely import MultiPoint
df_agg.loc[:, 'geometry'] = df_agg.coords.apply(MultiPoint)

In [26]:
gdf_agg = gpd.GeoDataFrame(df_agg)

In [27]:
gdf_agg.set_crs(epsg=31983, inplace=True)

Unnamed: 0_level_0,coords,count,z_median,z_sum,intensity_median,infrared_median,geometry
ClusterID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
1,"[[323609.4, 7386819.99], [323608.9, 7386819.99...",398,6.001718,2403.109247,35.0,33408.0,"MULTIPOINT (323609.400 7386819.990, 323608.900..."
2,"[[323602.4, 7386819.99], [323602.9, 7386819.99...",42,8.008798,336.729261,34.0,37120.0,"MULTIPOINT (323602.400 7386819.990, 323602.900..."
3,"[[323603.4, 7386818.99], [323603.9, 7386818.99...",43,11.170267,482.176627,82.0,52992.0,"MULTIPOINT (323603.400 7386818.990, 323603.900..."
4,"[[323592.4, 7386819.99], [323590.4, 7386819.49...",723,8.751626,6263.142853,38.0,35072.0,"MULTIPOINT (323592.400 7386819.990, 323590.400..."
5,"[[323586.4, 7386819.99], [323586.4, 7386819.49...",18,17.335306,311.614115,63.0,55424.0,"MULTIPOINT (323586.400 7386819.990, 323586.400..."
...,...,...,...,...,...,...,...
13018,"[[324096.9, 7387378.99], [324097.4, 7387378.99...",3,6.450113,19.366879,79.0,46080.0,"MULTIPOINT (324096.900 7387378.990, 324097.400..."
13021,"[[324123.4, 7387380.49], [324122.9, 7387380.49...",25,2.339334,58.939160,31.0,27904.0,"MULTIPOINT (324123.400 7387380.490, 324122.900..."
13022,"[[324123.9, 7387377.99], [324123.4, 7387377.99]]",2,2.591287,5.182574,10.0,17024.0,"MULTIPOINT (324123.900 7387377.990, 324123.400..."
13023,"[[324121.4, 7387377.49], [324121.4, 7387377.99]]",2,2.255472,4.510943,28.0,14208.0,"MULTIPOINT (324121.400 7387377.490, 324121.400..."


In [28]:
## Tentativa de usar o PostGis para processar o AlphaShape
# gdf_agg.loc[df_agg.loc[:, 'count'] >= 16].to_postgis("seila", engine, if_exists='replace')

In [29]:
ashapes = gdf_agg.loc[df_agg.loc[:, 'count'] >= 16].coords.apply(lambda x: alphashape.alphashape(x, alpha=0.5))



In [30]:
gdf_agg.loc[:, "multipoint"] = gdf_agg.geometry

In [31]:
gdf_agg.geometry = ashapes

In [32]:
gdf_agg.loc[:, "convex_hull"] = gdf_agg.loc[df_agg.loc[:, 'count'] >= 16].multipoint.convex_hull

In [33]:
gdf_agg.loc[:, "oriented_envelope"] = gdf_agg.geometry.apply(shapely.oriented_envelope)

## Criando as dimensões

In [34]:
gdf_agg.columns

Index(['coords', 'count', 'z_median', 'z_sum', 'intensity_median',
       'infrared_median', 'geometry', 'multipoint', 'convex_hull',
       'oriented_envelope'],
      dtype='object')

In [35]:
gdf_agg.loc[:, "volume_construido"] = gdf_agg.z_sum * resolution * resolution

In [36]:
gdf_agg.loc[:, "gabarito"] = gdf_agg.z_median

In [38]:
gdf_agg.loc[:, "area_de_projecao"] = gdf_agg.loc[:, "count"] * resolution * resolution

## Resultados preliminares

In [41]:
gdf_agg.volume_construido.sum() / 3

543559.4368338218

## Salvando os resultados

In [39]:
gdf_agg.drop(['coords', 'multipoint'], axis=1).to_file('sample-results/sao-paulo/result.gpkg', driver='GPKG')