# Exploring some numpy tricks for use in pointcloud analysis

This notebook takes a look at how we might use [dask](https://dask.org) in our pointcloud analyses. 


In [1]:
filename = 'uhnb1_con_b_c_xyz.csv'

In [12]:
import numpy as np
import pandas as pd
import xarray as xr
import dask.dataframe as dd
import dask.array as da

In [181]:
# df = dd.read_csv(filename)
n_points = 1000000
pcloud_np = da.random.uniform(0.0, 100.0, size=(n_points,3), chunks=(1000,3))

### Generate a new, transposed array

This array will contain only a list of all the `X` values and a list of all the `Y` values.

Uses the [np.T](https://docs.scipy.org/doc/numpy/reference/generated/numpy.ndarray.T.html) command

### Discritize the array into the desired resolution

In [182]:
resolution = 1 # Target resolution in meters.
xy = pcloud_np.T[:2]
xy = ((xy + resolution / 2) // resolution).astype(int)

### Find the min and max values

In [183]:
mn, mx = xy.min(axis=1), xy.max(axis=1)
sz = mx + 1 - mn

In [184]:
# Map the xy locations into a single index for faster access
flatidx = np.ravel_multi_index(xy-mn[:, None], sz.compute())
# Sort the index values, returning sorted index locations, not values
z_order = np.argsort(flatidx)
z_reordered = pcloud_np[z_order,2] # This is generating a PerformanceWarning because we are slicing out of order.
sorted_idx = flatidx[z_order]
bin_boundaries = np.where(sorted_idx[:-1] != sorted_idx[1:])[0]



In [185]:
max_height = np.maximum.reduceat(z_reordered.compute(), bin_boundaries)
min_height = np.minimum.reduceat(z_reordered.compute(), bin_boundaries)
print("Min Heights: average:{avg:5.2f}, max:{maximum:5.2f}, min:{minimum:5.2f}".format(
    avg=min_height.mean(),
    maximum=min_height.max(),
    minimum=min_height.min())
     )
print("Max Heights: average:{avg:5.2f}, max:{maximum:5.2f}, min:{minimum:5.2f}".format(
    avg=max_height.mean(),
    maximum=max_height.max(),
    minimum=max_height.min())
     )

Min Heights: average: 1.05, max:19.51, min: 0.00
Max Heights: average:98.95, max:100.00, min:83.75


In [107]:
histo = (np.bincount(flatidx, pcloud_np[:, 2], sz.prod().compute()) / 
         np.maximum(1, np.bincount(flatidx, None, sz.prod().compute()))
        )

[ 619 1255 1241 1233 1282 1262 1214 1288 1236 1222 1307 1277 1284 1234
 1287 1277 1263 1228 1135 1220  617 1248 2457 2498 2418 2462 2474 2483
 2503 2482 2435 2498 2441 2499 2562 2506 2503 2456 2511 2439 2447 1229
 1258 2429 2520 2491 2545 2445 2554 2481 2558 2494 2533 2451 2550 2371
 2487 2593 2471 2544 2468 2517 1300 1290 2383 2497 2393 2452 2562 2470
 2421 2459 2448 2477 2516 2409 2443 2520 2506 2436 2478 2544 2536 1211
 1219 2454 2593 2528 2542 2473 2562 2408 2489 2480 2412 2512 2456 2488
 2415 2472 2580 2514 2470 2483 1243 1276 2496 2525 2555 2500 2463 2369
 2534 2491 2471 2519 2439 2504 2568 2504 2495 2453 2505 2478 2527 1253
 1245 2464 2542 2430 2562 2483 2458 2443 2498 2558 2503 2527 2398 2443
 2582 2496 2456 2478 2600 2517 1208 1271 2531 2501 2491 2460 2479 2647
 2451 2411 2494 2428 2603 2515 2502 2563 2508 2443 2534 2443 2531 1238
 1241 2456 2493 2554 2522 2528 2546 2460 2505 2483 2537 2489 2515 2518
 2603 2571 2514 2522 2500 2510 1256 1261 2515 2482 2565 2525 2464 2593
 2525 