# data processing notebook
This notebook performs basic geospatial operations to convert between data types and prepare all the materials for modeling to begin.

Steps:
* Compute average tree heights on a per-pixel basis
    * sum all tree height measurements from each location
    * divide by the total number of measurements made
* Um, *shuffles nervously*, that's it for now!

In [26]:
# import packages
import os
import ccb
import gdal
import numpy as np
import otbApplication as otb

# raise gdal runtime errors
gdal.UseExceptions()

# set the working directory
wd = 'data'

# set the nodata value
nodata = -9999

In [5]:
# set the vector we'll compute height values from
vector_path = os.path.join(wd, 'lvis-lidar-pts-32616.shp')

# set the raster that determines the spatial extent and pixel size of the output files
raster_path = os.path.join(wd, 'tree-cover.tif')

# read the raster metadata
raster = ccb.read.raster(raster_path)

## computing average tree heights
Everything here assumes the input raster and vector are the same projection. Double check that this is still true if you're changing any of the input files!

In [4]:
# first, create a temporary data directory to store outputs
td = os.path.join(wd, 'temp')
if not os.path.exists(td):
    os.mkdir(td)

In [22]:
# next, compute the sum of all height measurements
height_sum_path = os.path.join(td, 'lvis-height-sum.tif')
rasterize_options = gdal.RasterizeOptions(
    format = 'GTiff',
    outputType = gdal.GDT_Float32,
    creationOptions = ['COMPRESS=DEFLATE', 'TILED=YES'],
    noData = nodata,
    initValues = 0,
    xRes = raster.xps,
    yRes = np.abs(raster.yps),
    outputBounds = [
        raster.xmin,
        raster.ymin,
        raster.xmax,
        raster.ymax
    ],
    attribute = 'tree-heigh',
    options = ['-add']
)

# then run the rasterization command
print('computing sum of tree height values')
print('estimated time: 30s')
ref = gdal.Rasterize(
    height_sum_path,
    vector_path,
    options = rasterize_options
)

# and write to disk
ref.FlushCache()
print('done!')

computing sum of tree height values
estimated time: 30s


In [23]:
# next, compute the count of all height measurements
height_count_path = os.path.join(td, 'lvis-height-count.tif')
rasterize_options = gdal.RasterizeOptions(
    format = 'GTiff',
    outputType = gdal.GDT_Float32,
    creationOptions = ['COMPRESS=DEFLATE', 'TILED=YES'],
    noData = nodata,
    initValues = 0,
    xRes = raster.xps,
    yRes = np.abs(raster.yps),
    outputBounds = [
        raster.xmin,
        raster.ymin,
        raster.xmax,
        raster.ymax
    ],
    burnValues = [1],
    options = ['-add']
)

# then run the rasterization command
print('computing count of tree height values')
print('estimated time: 30s')
ref = gdal.Rasterize(
    height_count_path,
    vector_path,
    options = rasterize_options
)

# and write to disk
ref.FlushCache()
print('done!')

computing count of tree height values
estimated time: 30s


In [33]:
# finally, divide the summed height by the number of measurements to get the average
tree_height_path = os.path.join(wd, 'tree-height-lvis.tif')
creation_options = "&gdal:co:COMPRESS=DEFLATE&gdal:co:TILED=YES"
output_file = '{file}?{options}'.format(file = tree_height_path, options = creation_options)

# create the band math expression
expression = "(im1b1 > 0 ? im1b1 / im2b1 : {nodata})".format(nodata = nodata)

# set up the orfeo toolbox command
band_math = otb.Registry.CreateApplication("BandMath")
band_math.SetParameterStringList("il", [height_sum_path, height_count_path])
band_math.SetParameterString("out", output_file)
band_math.SetParameterString("exp", expression)

# run the command
band_math.ExecuteAndWriteOutput()

2020-05-17 19:49:41 (INFO) BandMath: Default RAM limit for OTB is 256 MB
2020-05-17 19:49:41 (INFO) BandMath: GDAL maximum cache size is 801 MB
2020-05-17 19:49:41 (INFO) BandMath: OTB will use at most 8 threads
2020-05-17 19:49:41 (INFO) BandMath: Image #1 has 1 components

2020-05-17 19:49:41 (INFO) BandMath: Image #2 has 1 components

2020-05-17 19:49:41 (INFO): Estimated memory for full processing: 4903.44MB (avail.: 256 MB), optimal image partitioning: 20 blocks
2020-05-17 19:49:41 (INFO): File data/tree-height-lvis.tif will be written in 25 blocks of 2816x2560 pixels
Writing data/tree-height-lvis.tif?&gdal:co:COMPRESS=DEFLATE&gdal:co:TILED=YES...: 100% [**************************************************] (18s)


AttributeError: module 'gdal' has no attribute 'GetRasterBand'

In [34]:
# then manually set the no-data value
ref = gdal.Open(tree_height_path, gdal.GA_Update)
band = ref.GetRasterBand(1)
band.SetNoDataValue(nodata)
band.FlushCache()
ref.FlushCache()
band = None
ref = None

In [28]:
tree_height_path + "?&gdal:co:COMPRESS=DEFLATE&gdal:co:TILED=YES"

'data/tree-height-lvis.tif?&gdal:co:COMPRESS=DEFLATE&gdal:co:TILED=YES'