From 24615716adead8dfd30ed3b24ed60abfb5fb5836 Mon Sep 17 00:00:00 2001 From: albhasan Date: Mon, 6 Mar 2023 20:23:48 -0300 Subject: [PATCH] Lesson 4 updated by replacing calls to raster and rgdal packages with calls to terra package. #368 #363 --- episodes/04-raster-calculations-in-r.Rmd | 155 +++++++++++------------ 1 file changed, 77 insertions(+), 78 deletions(-) diff --git a/episodes/04-raster-calculations-in-r.Rmd b/episodes/04-raster-calculations-in-r.Rmd index b280a60ee..69e9639a5 100644 --- a/episodes/04-raster-calculations-in-r.Rmd +++ b/episodes/04-raster-calculations-in-r.Rmd @@ -24,8 +24,7 @@ source("setup.R") :::::::::::::::::::::::::::::::::::::::::::::::::: ```{r load-libraries, echo=FALSE, results="hide", message=FALSE, warning=FALSE} -library(raster) -library(rgdal) +library(terra) library(ggplot2) library(dplyr) ``` @@ -33,22 +32,26 @@ library(dplyr) ```{r load-data, echo=FALSE} # Learners will have these data loaded from earlier episode # DSM data for Harvard Forest -DSM_HARV <- raster("data/NEON-DS-Airborne-Remote-Sensing/HARV/DSM/HARV_dsmCrop.tif") +DSM_HARV <- + rast("data/NEON-DS-Airborne-Remote-Sensing/HARV/DSM/HARV_dsmCrop.tif") DSM_HARV_df <- as.data.frame(DSM_HARV, xy = TRUE) # DTM data for Harvard Forest -DTM_HARV <- raster("data/NEON-DS-Airborne-Remote-Sensing/HARV/DTM/HARV_dtmCrop.tif") +DTM_HARV <- + rast("data/NEON-DS-Airborne-Remote-Sensing/HARV/DTM/HARV_dtmCrop.tif") DTM_HARV_df <- as.data.frame(DTM_HARV, xy = TRUE) # DSM data for SJER -DSM_SJER <- raster("data/NEON-DS-Airborne-Remote-Sensing/SJER/DSM/SJER_dsmCrop.tif") +DSM_SJER <- + rast("data/NEON-DS-Airborne-Remote-Sensing/SJER/DSM/SJER_dsmCrop.tif") DSM_SJER_df <- as.data.frame(DSM_SJER, xy = TRUE) # DTM data for SJER -DTM_SJER <- raster("data/NEON-DS-Airborne-Remote-Sensing/SJER/DTM/SJER_dtmCrop.tif") +DTM_SJER <- + rast("data/NEON-DS-Airborne-Remote-Sensing/SJER/DTM/SJER_dtmCrop.tif") DTM_SJER_df <- as.data.frame(DTM_SJER, xy = TRUE) ``` @@ -58,26 +61,27 @@ DTM_SJER_df <- as.data.frame(DTM_SJER, xy = TRUE) ## Things You'll Need To Complete This Episode See the [lesson homepage](.) for detailed information about the software, -data, and other prerequisites you will need to work through the examples in this episode. +data, and other prerequisites you will need to work through the examples in +this episode. :::::::::::::::::::::::::::::::::::::::::::::::::: -We often want to combine values of and perform calculations on rasters to create -a new output raster. This episode covers how to subtract one raster from -another using basic raster math and the `overlay()` function. It also covers how -to extract pixel values from a set of locations - for example a buffer region -around plot locations at a field site. +We often want to combine values of and perform calculations on rasters to +create a new output raster. This episode covers how to subtract one raster from +another using basic raster math and the `lapp()` function. It also covers +how to extract pixel values from a set of locations - for example a buffer +region around plot locations at a field site. ## Raster Calculations in R We often want to perform calculations on two or more rasters to create a new -output raster. For example, if we are interested in mapping the heights of trees -across an entire field site, we might want to calculate the difference between -the Digital Surface Model (DSM, tops of trees) and the -Digital Terrain Model (DTM, ground level). The resulting dataset is referred to -as a Canopy Height Model (CHM) and represents the actual height of trees, -buildings, etc. with the influence of ground elevation removed. +output raster. For example, if we are interested in mapping the heights of +trees across an entire field site, we might want to calculate the difference +between the Digital Surface Model (DSM, tops of trees) and the Digital Terrain +Model (DTM, ground level). The resulting dataset is referred to as a Canopy +Height Model (CHM) and represents the actual height of trees, buildings, etc. +with the influence of ground elevation removed. ![](fig/dc-spatial-raster/lidarTree-height.png){alt='Source: National Ecological Observatory Network (NEON)'} @@ -93,26 +97,25 @@ buildings, etc. with the influence of ground elevation removed. ### Load the Data -For this episode, we will use the DTM and DSM from the -NEON Harvard Forest Field site and San Joaquin Experimental -Range, -which we already have loaded from previous episodes. +For this episode, we will use the DTM and DSM from the NEON Harvard Forest +Field site and San Joaquin Experimental Range, which we already have loaded +from previous episodes. ::::::::::::::::::::::::::::::::::::::: challenge ## Exercise -Use the `GDALinfo()` function to view information about the DTM and -DSM data files. Do the two rasters have the same or different CRSs -and resolutions? Do they both have defined minimum and maximum values? +Use the `describe()` function to view information about the DTM and DSM data +files. Do the two rasters have the same or different CRSs and resolutions? Do +they both have defined minimum and maximum values? ::::::::::::::: solution ## Solution ```{r} -GDALinfo("data/NEON-DS-Airborne-Remote-Sensing/HARV/DTM/HARV_dtmCrop.tif") -GDALinfo("data/NEON-DS-Airborne-Remote-Sensing/HARV/DSM/HARV_dsmCrop.tif") +describe("data/NEON-DS-Airborne-Remote-Sensing/HARV/DTM/HARV_dtmCrop.tif") +describe("data/NEON-DS-Airborne-Remote-Sensing/HARV/DSM/HARV_dsmCrop.tif") ``` ::::::::::::::::::::::::: @@ -150,7 +153,7 @@ We can calculate the difference between two rasters in two different ways: or for more efficient processing - particularly if our rasters are large and/or the calculations we are performing are complex: -- using the `overlay()` function. +- using the `lapp()` function. ## Raster Math \& Canopy Height Models @@ -172,7 +175,7 @@ We can now plot the output CHM. ```{r harv-chm-plot} ggplot() + geom_raster(data = CHM_HARV_df , - aes(x = x, y = y, fill = layer)) + + aes(x = x, y = y, fill = HARV_dsmCrop)) + scale_fill_gradientn(name = "Canopy Height", colors = terrain.colors(10)) + coord_quickmap() ``` @@ -182,17 +185,18 @@ Canopy Height Model (CHM). ```{r create-hist} ggplot(CHM_HARV_df) + - geom_histogram(aes(layer)) + geom_histogram(aes(HARV_dsmCrop)) ``` -Notice that the range of values for the output CHM is between 0 and 30 -meters. Does this make sense for trees in Harvard Forest? +Notice that the range of values for the output CHM is between 0 and 30 meters. +Does this make sense for trees in Harvard Forest? ::::::::::::::::::::::::::::::::::::::: challenge ## Challenge: Explore CHM Raster Values -It's often a good idea to explore the range of values in a raster dataset just like we might explore a dataset that we collected in the field. +It's often a good idea to explore the range of values in a raster dataset just +like we might explore a dataset that we collected in the field. 1. What is the min and maximum value for the Harvard Forest Canopy Height Model (`CHM_HARV`) that we just created? 2. What are two ways you can check this range of data for `CHM_HARV`? @@ -208,26 +212,26 @@ It's often a good idea to explore the range of values in a raster dataset just l `na.rm = TRUE`. ```{r} -min(CHM_HARV_df$layer, na.rm = TRUE) -max(CHM_HARV_df$layer, na.rm = TRUE) +min(CHM_HARV_df$HARV_dsmCrop, na.rm = TRUE) +max(CHM_HARV_df$HARV_dsmCrop, na.rm = TRUE) ``` 2) Possible ways include: - Create a histogram -- Use the `min()` and `max()` functions. +- Use the `min()`, `max()`, and `range()` functions. - Print the object and look at the `values` attribute. 3) ```{r chm-harv-hist} ggplot(CHM_HARV_df) + - geom_histogram(aes(layer)) + geom_histogram(aes(HARV_dsmCrop)) ``` 4) ```{r chm-harv-hist-green} ggplot(CHM_HARV_df) + - geom_histogram(aes(layer), colour="black", + geom_histogram(aes(HARV_dsmCrop), colour="black", fill="darkgreen", bins = 6) ``` @@ -235,7 +239,8 @@ ggplot(CHM_HARV_df) + ```{r chm-harv-raster} custom_bins <- c(0, 10, 20, 30, 40) CHM_HARV_df <- CHM_HARV_df %>% - mutate(canopy_discrete = cut(layer, breaks = custom_bins)) + mutate(canopy_discrete = cut(HARV_dsmCrop, + breaks = custom_bins)) ggplot() + geom_raster(data = CHM_HARV_df , aes(x = x, y = y, @@ -248,7 +253,7 @@ ggplot() + :::::::::::::::::::::::::::::::::::::::::::::::::: -## Efficient Raster Calculations: Overlay Function +## Efficient Raster Calculations Raster math, like we just did, is an appropriate approach to raster calculations if: @@ -258,48 +263,43 @@ if: However, raster math is a less efficient approach as computation becomes more complex or as file sizes become large. -The `overlay()` function is more efficient when: -1. The rasters we are using are larger in size. -2. The rasters are stored as individual files. -3. The computations performed are complex. - -The `overlay()` function takes two or more rasters and applies a function to +The `lapp()` function takes two or more rasters and applies a function to them using efficient processing methods. The syntax is -`outputRaster <- overlay(raster1, raster2, fun=functionName)` +`outputRaster <- lapp(x, fun=functionName)` + +In which raster can be either a SpatRaster or a SpatRasterDataset which is an +object that holds rasters. See `help(sds)`. ::::::::::::::::::::::::::::::::::::::::: callout ## Data Tip -If the rasters are stacked and stored -as `RasterStack` or `RasterBrick` objects in R, then we should use `calc()`. -`overlay()` will not work on stacked rasters. - +To create a SpatRasterDataset, we call the function `sds` which can take a list +of raster objects (each one created by calling `rast`). :::::::::::::::::::::::::::::::::::::::::::::::::: Let's perform the same subtraction calculation that we calculated above using -raster math, using the `overlay()` function. +raster math, using the `lapp()` function. ::::::::::::::::::::::::::::::::::::::::: callout ## Data Tip -A custom function consists of a defined -set of commands performed on a input object. Custom functions are particularly -useful for tasks that need to be repeated over and over in the code. A -simplified syntax for writing a custom function in R is: +A custom function consists of a defined set of commands performed on a input +object. Custom functions are particularly useful for tasks that need to be +repeated over and over in the code. A simplified syntax for writing a custom +function in R is: `function_name <- function(variable1, variable2) { WhatYouWantDone, WhatToReturn}` :::::::::::::::::::::::::::::::::::::::::::::::::: ```{r raster-overlay} -CHM_ov_HARV <- overlay(DSM_HARV, - DTM_HARV, - fun = function(r1, r2) { return( r1 - r2) }) +CHM_ov_HARV <- lapp(sds(list(DSM_HARV, DTM_HARV)), + fun = function(r1, r2) { return( r1 - r2) }) ``` Next we need to convert our new object to a data frame for plotting with @@ -314,12 +314,12 @@ Now we can plot the CHM: ```{r harv-chm-overlay} ggplot() + geom_raster(data = CHM_ov_HARV_df, - aes(x = x, y = y, fill = layer)) + + aes(x = x, y = y, fill = HARV_dsmCrop)) + scale_fill_gradientn(name = "Canopy Height", colors = terrain.colors(10)) + coord_quickmap() ``` -How do the plots of the CHM created with manual raster math and the `overlay()` +How do the plots of the CHM created with manual raster math and the `lapp()` function compare? ## Export a GeoTIFF @@ -334,12 +334,13 @@ contains (CHM data) and for where (HARVard Forest). The `writeRaster()` function by default writes the output file to your working directory unless you specify a full file path. -We will specify the output format ("GTiff"), the no data value (`NAflag = -9999`. We will also tell R to overwrite any data that is already in -a file of the same name. +We will specify the output format ("GTiff"), the no data value `NAflag = -9999`. +We will also tell R to overwrite any data that is already in a file of the same +name. ```{r write-raster, eval=FALSE} writeRaster(CHM_ov_HARV, "CHM_HARV.tiff", - format="GTiff", + filetype="GTiff", overwrite=TRUE, NAflag=-9999) ``` @@ -348,7 +349,7 @@ writeRaster(CHM_ov_HARV, "CHM_HARV.tiff", The function arguments that we used above include: -- **format:** specify that the format will be `GTiff` or GeoTIFF. +- **filetype:** specify that the format will be `GTiff` or GeoTIFF. - **overwrite:** If TRUE, R will overwrite any existing file with the same name in the specified directory. USE THIS SETTING WITH CAUTION! - **NAflag:** set the GeoTIFF tag for `NoDataValue` to -9999, the National @@ -358,9 +359,9 @@ The function arguments that we used above include: ## Challenge: Explore the NEON San Joaquin Experimental Range Field Site -Data are often more interesting and powerful when we compare them across various -locations. Let's compare some data collected over Harvard Forest to data -collected in Southern California. The +Data are often more interesting and powerful when we compare them across +various locations. Let's compare some data collected over Harvard Forest to +data collected in Southern California. The [NEON San Joaquin Experimental Range (SJER) field site](https://www.neonscience.org/field-sites/field-sites-map/SJER) located in Southern California has a very different ecosystem and climate than the @@ -387,12 +388,10 @@ keep track of data from different sites! ## Answers -1) Use the `overlay()` function to subtract the two rasters \& create - the CHM. +1) Use the `lapp()` function to subtract the two rasters \& create the CHM. ```{r} -CHM_ov_SJER <- overlay(DSM_SJER, - DTM_SJER, +CHM_ov_SJER <- lapp(sds(list(DSM_SJER, DTM_SJER)), fun = function(r1, r2){ return(r1 - r2) }) ``` @@ -406,7 +405,7 @@ Create a histogram to check that the data distribution makes sense: ```{r sjer-chm-overlay-hist} ggplot(CHM_ov_SJER_df) + - geom_histogram(aes(layer)) + geom_histogram(aes(SJER_dsmCrop)) ``` 2) Create a plot of the CHM: @@ -415,7 +414,7 @@ ggplot(CHM_ov_SJER_df) + ggplot() + geom_raster(data = CHM_ov_SJER_df, aes(x = x, y = y, - fill = layer) + fill = SJER_dsmCrop) ) + scale_fill_gradientn(name = "Canopy Height", colors = terrain.colors(10)) + @@ -426,7 +425,7 @@ ggplot(CHM_ov_SJER_df) + ```{r} writeRaster(CHM_ov_SJER, "chm_ov_SJER.tiff", - format = "GTiff", + filetype = "GTiff", overwrite = TRUE, NAflag = -9999) ``` @@ -437,10 +436,10 @@ writeRaster(CHM_ov_SJER, "chm_ov_SJER.tiff", ```{r compare-chm-harv-sjer} ggplot(CHM_HARV_df) + - geom_histogram(aes(layer)) + geom_histogram(aes(HARV_dsmCrop)) ggplot(CHM_ov_SJER_df) + - geom_histogram(aes(layer)) + geom_histogram(aes(SJER_dsmCrop)) ``` ::::::::::::::::::::::::: @@ -452,7 +451,7 @@ ggplot(CHM_ov_SJER_df) + :::::::::::::::::::::::::::::::::::::::: keypoints - Rasters can be computed on using mathematical functions. -- The `overlay()` function provides an efficient way to do raster math. +- The `lapp()` function provides an efficient way to do raster math. - The `writeRaster()` function can be used to write raster data to a file. ::::::::::::::::::::::::::::::::::::::::::::::::::