In [110]:
import math
import os
import subprocess
from glob import glob
from itertools import combinations
from urllib.error import URLError

import earthpy as et
import earthpy.appeears as etapp
import geopandas as gpd
import holoviews as hv
import hvplot as hv
import hvplot.pandas
import hvplot.xarray
import json
import laspy
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import pdal
import pylas
import requests
import rasterio
import rioxarray as rxr
import rioxarray.merge as rxrm
import skfuzzy as fuzz
from skfuzzy import control as ctrl
from IPython.display import Image
import xarray as xr
import xrspatial
import warnings
import zipfile

from osgeo import gdal, gdal_array, osr
from rasterio.transform import from_origin
from scipy.interpolate import griddata
from shapely.geometry import Polygon

### Pseudocode for process

import project area shapefile

import LIDAR index grid

intersect index grid and project areas shapefile to identify tiles to download

for each project area:
* download tiles
* process tiles with LASTools into canopy height dem
* clip to project area
* merge if necessary

Need to install PDAL, this requires installing visual studio build tools:
    
https://visualstudio.microsoft.com/visual-cpp-build-tools/

then run pip install pdal

may need to install cmake from here too:

https://cmake.org/download/

In [3]:
# Set up directory
data_dir = os.path.join(et.io.HOME, et.io.DATA_NAME)
project_dir = os.path.join(data_dir, "treebeard")
# Create the directory if it doesn't exist
os.makedirs(data_dir, exist_ok=True)

las_index_path = os.path.join(
    data_dir,
    'earthpy-downloads',
    'lidar_index_cspn_q2',
    'lidar_index_cspn_q2.shp'
)

# Download LIDAR index tiles
if not os.path.exists(las_index_path):
    las_index_url = ('https://gisdata.drcog.org:8443/geoserver/DRCOGPUB/'
             'ows?service=WFS&version=1.0.0&request=GetFeature&'
             'typeName=DRCOGPUB:lidar_index_cspn_q2&outputFormat=SHAPE-ZIP')

    las_index_shp = et.data.get_data(url=las_index_url)

las_index_gdf = (
    gpd.read_file(las_index_path).set_index('tile')
#    .loc[['N3W345']]
)

las_index_gdf = las_index_gdf.to_crs('EPSG:4269')

las_index_plot = las_index_gdf.hvplot(
    tiles = 'OSM',
    geo = True,
    line_color='black',
    line_width=2,
    fill_alpha=0
)
las_index_plot

In [9]:
# Open project areas shapefile
proj_zip_path = '../assets/project_areas_merged.zip'

with zipfile.ZipFile(proj_zip_path, 'r') as zip_ref:
    temp_dir = '/tmp/extracted_shapefile'  # You can specify any temporary directory
    zip_ref.extractall(temp_dir)
    
extracted_shapefile_path = temp_dir + '/'

proj_area_gdf = gpd.read_file(extracted_shapefile_path)

proj_area_gdf = proj_area_gdf.to_crs("EPSG:4326")

proj_area_plot = proj_area_gdf.hvplot(
    x='x',
    y='y',
    aspect='equal',
    tiles='EsriImagery',
    geo=True,
    line_color='blue',
    line_width=2,
    fill_alpha=0
)

proj_area_plot


In [10]:
# Identify the tiles that intersect each project area
select_tiles_gdf = gpd.sjoin(las_index_gdf, proj_area_gdf, how='inner', op='intersects')

select_tiles_gdf.reset_index(drop=False)
select_tiles_gdf.hvplot(
    x='x',
    y='y',
    aspect='equal',
    tiles='EsriImagery',
    geo=True,
    line_color='blue',
    line_width=2,
    fill_alpha=0
)

  if await self.run_code(code, result, async_=asy):
Use `to_crs()` to reproject one of the input geometries to match the CRS of the other.

Left CRS: EPSG:4269
Right CRS: EPSG:4326

  select_tiles_gdf = gpd.sjoin(las_index_gdf, proj_area_gdf, how='inner', op='intersects')


In [11]:
select_tiles_gdf = select_tiles_gdf.reset_index(drop=False)
select_tiles_gdf

Unnamed: 0,tile,gid,area,storage,geometry,index_right,Shape_Leng,Shape_Area,Acreage,Proj_ID
0,N4W264,191,CSPN_Q2,lidararchive,"POLYGON ((-105.27729 40.21980, -105.29620 40.2...",0,806.343609,32945.419705,0.0,Unnamed 1
1,N4W351,761,CSPN_Q2,lidararchive,"POLYGON ((-105.52309 40.23450, -105.54201 40.2...",1,0.017313,1.5e-05,0.0,Zumwinkel
2,N4W399,993,CSPN_Q2,lidararchive,"POLYGON ((-105.37191 40.17646, -105.39080 40.1...",3,0.0,0.0,0.0,Conifer Hill
3,N4W397,1090,CSPN_Q2,lidararchive,"POLYGON ((-105.40970 40.17649, -105.42860 40.1...",3,0.0,0.0,0.0,Conifer Hill
4,N4W389,1405,CSPN_Q2,lidararchive,"POLYGON ((-105.37188 40.19095, -105.39078 40.1...",3,0.0,0.0,0.0,Conifer Hill
5,N4W396,1593,CSPN_Q2,lidararchive,"POLYGON ((-105.42860 40.17651, -105.44749 40.1...",3,0.0,0.0,0.0,Conifer Hill
6,N4W388,1712,CSPN_Q2,lidararchive,"POLYGON ((-105.39078 40.19097, -105.40968 40.1...",3,0.0,0.0,0.0,Conifer Hill
7,N4W290,1787,CSPN_Q2,lidararchive,"POLYGON ((-105.35301 40.17643, -105.37191 40.1...",3,0.0,0.0,0.0,Conifer Hill
8,N4W398,1872,CSPN_Q2,lidararchive,"POLYGON ((-105.39080 40.17648, -105.40970 40.1...",3,0.0,0.0,0.0,Conifer Hill
9,N3W308,1978,CSPN_Q2,lidararchive,"POLYGON ((-105.39083 40.16198, -105.40972 40.1...",3,0.0,0.0,0.0,Conifer Hill


In [12]:
# Generate list of all tiles per project area
tiles_by_area = select_tiles_gdf.groupby('Proj_ID')['tile'].apply(list).reset_index()
tiles_by_area

Unnamed: 0,Proj_ID,tile
0,Conifer Hill,"[N4W399, N4W397, N4W389, N4W396, N4W388, N4W29..."
1,Unnamed 1,[N4W264]
2,Unnamed 2,"[N4W381, N4W391]"
3,Zumwinkel,[N4W351]


In [13]:
# Use PDAL to process LAS files into ground and first returns TIFs
def convert_las_to_tif(input_las, output_tif, return_type):
    """
    Process a LAS file into a GeoTIFF based on specified return type.

    Parameters:
    - input_las (str): Path to the input LAS file.
    - output_tif (str): Path to save the output GeoTIFF file.
    - return_type (str): Type of returns to process ("first" or "ground").

    Returns:
    - None
    """
    
    def get_crs_from_las(input_las):
        """
        Get the Coordinate Reference System (CRS) information from the header of a LAS file.

        Parameters:
        - input_las (str): Path to the input LAS file.

        Returns:
        - crs (str): The CRS information.
        """
        pipeline = {
            "pipeline": [
                {
                    "type": "readers.las",
                    "filename": input_las
                }
            ]
        }

        pipeline_manager = pdal.Pipeline(json.dumps(pipeline))
        pipeline_manager.execute()

        metadata = pipeline_manager.metadata
        if "metadata" in metadata and "readers.las" in metadata["metadata"]:
            crs = metadata["metadata"]["readers.las"]["comp_spatialreference"]
        else:
            crs = None

        return crs

    # Get CRS from LAS header
    crs_info = get_crs_from_las(input_las)
    
    # Define PDAL pipeline in JSON format based on return type
    if return_type == "first":
        pipeline = {
            "pipeline": [
                {
                    "type": "readers.las",
                    "filename": input_las
                },
                {
                    "type": "filters.range",
                    "limits": "ReturnNumber[1:1]"  # Filter for first returns
                },
                {
                    "type": "writers.gdal",
                    "filename": output_tif,
                    "resolution": 1,  # Adjust as needed
                    "output_type": "idw"  # Interpolation method (Inverse Distance Weighting)
                    #"crs": crs_info
                }
            ]
        }
    elif return_type == "ground":
        pipeline = {
            "pipeline": [
                {
                    "type": "readers.las",
                    "filename": input_las
                },
                {
                    "type": "filters.range",
                    "limits": "Classification[2:2]"  # Filter for ground returns
                },
                {
                    "type": "writers.gdal",
                    "filename": output_tif,
                    "resolution": 1,  # Adjust as needed
                    "output_type": "idw"  # Interpolation method (Inverse Distance Weighting)
                    #"crs": crs_info
                }
            ]
        }
    else:
        raise ValueError("Invalid return_type. Use 'first' or 'ground'.")

    # Execute PDAL pipeline
    pipeline_manager = pdal.Pipeline(json.dumps(pipeline))
    pipeline_manager.execute()

In [184]:
# Process tiles for each project area

las_root_url = 'https://lidararchive.s3.amazonaws.com/2020_CSPN_Q2/'
canopy_dict = {}
for index, row in tiles_by_area.iterrows():
    tiles = row['tile']
    proj_area_name = row['Proj_ID']
    sel_proj_area_gdf = proj_area_gdf[proj_area_gdf['Proj_ID'] == proj_area_name]
    # Download all tiles for project area, process, and clip/merge
    tile_agg = []
    print("Processing LIDAR for " + proj_area_name)
    for tile in tiles:
        file_name = tile + ".las"
        print("Processing LIDAR tile " + tile)
        tile_path = os.path.join(
            data_dir,
            'earthpy-downloads',
            file_name
        )
        download_url = las_root_url + tile + ".las"
        if not os.path.exists(tile_path):
            et.data.get_data(url=download_url)
        # PDAL is required for this step, see readme for install instructions

        # Output path for first returns DEM
        output_fr_tif = os.path.join(
            project_dir,
            tile +'_fr.tif'
        )
        if not os.path.exists(output_fr_tif):
            convert_las_to_tif(tile_path, output_fr_tif, "first")
        
        # Output path for ground DEM
        output_gr_tif = os.path.join(
            project_dir,
            tile +'_gr.tif'
        )
        if not os.path.exists(output_gr_tif):
            convert_las_to_tif(tile_path, output_gr_tif, "ground")
        
        # Process ground and first return data to canopy height
        fr_dem = rxr.open_rasterio(output_fr_tif)
        fr_dem = fr_dem.rio.reproject("EPSG:4326")

        gr_dem = rxr.open_rasterio(output_gr_tif)
        gr_dem = gr_dem.rio.reproject("EPSG:4326")
        gr_dem = gr_dem.rio.reproject_match(fr_dem)

        canopy_dem = fr_dem - gr_dem

        canopy_dem = canopy_dem.where(canopy_dem >= 1, -9999)
        canopy_dem = canopy_dem.where(canopy_dem <= 500, np.nan)
        canopy_dem.name = tile + "_Canopy"
        tile_agg.append(canopy_dem)
    print("Merging LIDAR tiles for " + proj_area_name)
    canopy_merged = rxrm.merge_arrays(tile_agg).rio.clip(sel_proj_area_gdf.geometry)
    canopy_dict[proj_area_name] = canopy_merged
                                               
    # Merge all tiles that intersect with the project area and clip to project area
    #for tile in tile_agg:
        

Processing LIDAR for Conifer Hill
Processing LIDAR tile N4W399
Processing LIDAR tile N4W397
Processing LIDAR tile N4W389
Processing LIDAR tile N4W396
Processing LIDAR tile N4W388
Processing LIDAR tile N4W290
Processing LIDAR tile N4W398
Processing LIDAR tile N3W308
Processing LIDAR tile N3W306
Processing LIDAR tile N3W307
Merging LIDAR tiles for Conifer Hill
Processing LIDAR for Unnamed 1
Processing LIDAR tile N4W264
Merging LIDAR tiles for Unnamed 1
Processing LIDAR for Unnamed 2
Processing LIDAR tile N4W381
Processing LIDAR tile N4W391
Merging LIDAR tiles for Unnamed 2
Processing LIDAR for Zumwinkel
Processing LIDAR tile N4W351
Merging LIDAR tiles for Zumwinkel


In [185]:
test = canopy_dict['Zumwinkel']
test.hvplot(
    height=600,
    width=600,
    geo=True,
    rasterize=True,
    aspect='equal',
    kind='image',
    tiles = 'EsriImagery',
    alpha=0.5,
    title = "LIDAR Canopy Example",
    clabel= 'Height in feet',
    crs = canopy_dem.rio.crs
)