/
example_other_stream_networks.Rmd
457 lines (361 loc) · 14.2 KB
/
example_other_stream_networks.Rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
---
title: "Usage of hydrographr with other stream networks"
subtitle: "Examples showing how to use the hydrographr package with the NHDPlusHR dataset"
author: ""
date: "`r Sys.Date()`"
output: html_vignette
vignette: >
%\VignetteIndexEntry{Usage of hydrographr with other stream networks}
%\VignetteEncoding{UTF-8}
%\VignetteEngine{knitr::rmarkdown}
bibliography: bib/references_nhdp.bib
link-citations: yes
linkcolor: blue
# csl: copernicus.csl
nocite: "@*"
editor_options:
markdown:
wrap: 72
---
```{r, include = FALSE, eval = F}
# writes out the references for all packages
knitr::write_bib(file = "bib/references_nhdp.bib")
```
```{r setup, include=F}
knitr::opts_chunk$set(echo = TRUE, warning=FALSE, message=FALSE,
out.width='50%', fig.align='center')
knitr::opts_knit$set(root.dir = "./data_cuba")
library(hydrographr)
library(nhdplusTools)
library(rgbif)
library(terra)
library(archive)
library(tools)
library(dplyr)
library(knitr)
library(kableExtra)
library(leaflet)
library(leafem)
library(htmlwidgets)
library(mapview)
library(here)
library(data.table)
library(igraph)
```
Load required libraries:
```{r, eval=T}
library(hydrographr)
library(nhdplusTools)
library(rgbif)
library(dplyr)
library(archive)
library(terra)
library(leaflet)
library(data.table)
library(igraph)
```
Define working directory:
```{r, eval=FALSE}
wdir <- paste0(here(), "/vignettes/data_other_networks")
if(!dir.exists(paste0(wdir, "/data"))) dir.create(paste0(wdir, "/data"))
```
```{r, eval=FALSE, include=FALSE}
wdir <- "/home/afroditi/Documents/PhD/hydrographr_case_study/nhdplus"
setwd(wdir)
dir.create("data")
```
```{r, eval = FALSE, include=FALSE}
wdir <- paste0(
"/home/mueblacker/Documents/Glowabio/Code/hydrographr/vignettes/data_hawaii")
```
### Download NHDPlusHR data
```{r, eval=F}
# Define the working directory for the region Hawaii
wdir <- "my/working/directory/data_hawaii"
data_dir <- paste0(wdir, "/data")
rast_dir <- paste0(data_dir, "/raster")
vect_dir <- paste0(data_dir, "/vector")
sp_dir <- paste0(data_dir, "/species")
out_dir <- paste0(wdir, "/output")
# Create a new folder in the working directories to store all the data
dir.create(data_dir)
dir.create(rast_dir)
dir.create(vect_dir)
dir.create(sp_dir)
dir.create(out_dir)
```
Data of the NHDPlusHR dataset can be downloaded using the R package
`nhdplusTools` or from this USGS website:
<https://apps.nationalmap.gov/downloader/>
```{r download, eval=F}
# Download NHDPlus raster data for Hawaii
download_nhdplusv2(outdir = rast_dir,
url = paste0(
"https://prd-tnm.s3.amazonaws.com/StagedProducts/",
"Hydrography/NHDPlusHR/Beta/GDB/",
"NHDPLUS_H_2006_HU4_RASTER.7z"),
progress = TRUE)
# Unzip -- can also be done manually
# When called with default arguments extracts all files in the archive.
archive_extract(archive = paste0(rast_dir, "/NHDPLUS_H_2006_HU4_RASTER.7z"),
dir = paste0(rast_dir, "/NHDPLUS_H_2006_HU4_RASTER")
)
```
```{r, eval=F}
# Download NHDPlus vector data for Hawaii
download_nhdplusv2(outdir = vect_dir,
url = paste0(
"https://prd-tnm.s3.amazonaws.com/StagedProducts/",
"Hydrography/NHDPlusHR/Beta/GDB/",
"NHDPLUS_H_2006_HU4_GDB.zip"),
progress = TRUE)
# Unzip -- can also be done manually
archive_extract(archive = paste0(vect_dir, "/NHDPLUS_H_2006_HU4_GDB.zip"),
dir = paste0(vect_dir, "/NHDPLUS_H_2006_HU4_GDB")
)
```
Check directory structure
```{r, eval = F}
list.files(paste0(rast_dir, "/NHDPLUS_H_2006_HU4_RASTER"), recursive = T,
pattern = ".tif$")
```
### Species data
Download occurrence records of *Stenogobius hawaiiensis* from GBIF.org
using the R package `rgbif`.
```{r, eval = FALSE}
# Download occurrence data with coordinates from GBIF
gbif_data <- occ_data(scientificName = "Stenogobius hawaiiensis",
hasCoordinate = TRUE)
# To cite the data use:
# gbif_citation(gbif_data)
# Clean the data
spdata <- gbif_data$data %>%
select(decimalLongitude, decimalLatitude, species, occurrenceStatus,
country, stateProvince, year) %>%
filter(!is.na(year),
stateProvince == "Hawaii") %>%
distinct() %>%
mutate(occurrence_id = 1:nrow(.)) %>%
rename("longitude" = "decimalLongitude",
"latitude" = "decimalLatitude") %>%
select(8, 1:7)
# Save the data
write.csv(spdata, paste0(sp_dir, "/stenogobius_hawaiiensis.csv"), row.names = F,
quote = F)
spdata
```
### Visualise species data
```{r, include = FALSE, eval = FALSE}
m <- leaflet() %>%
addProviderTiles('Esri.WorldShadedRelief') %>%
addCircles(data = spdata, color = "purple")
saveWidget(m, file=paste0(wdir, "../../man/figures/hawaii_map.html"))
```
```{r, eval = FALSE}
m <- leaflet() %>%
addProviderTiles('Esri.WorldShadedRelief') %>%
addCircles(data = spdata, color = "purple")
m
```
![](../man/figures/hawaii_map.png)
By inspecting the NHDPlusHR data, we observe that the coordinate
reference system is not WGS84, but the EPSG:26904.
```{r, eval=F}
rast(paste0(rast_dir,
"/NHDPLUS_H_2006_HU4_RASTER/HRNHDPlusRasters2006/cat.tif"))
```
Therefore we will need to convert the species points' coordinates to
EPSG:26904, so that they match with the spatial layers.
```{r, eval=F}
# Convert the coordinate columns to a matrix
spdata_coord <- spdata %>%
select(longitude, latitude) %>%
as.matrix()
# Project to the coordinate reference system of the NHDPlus dataset
spdata_coord <- project(spdata_coord,
from = "+proj=longlat +datum=WGS84",
to = "EPSG:26904")
# Replace the WGS84 coordinates with the EPSG:26904 coordinates in the species
# points' data frame
spdata <- spdata %>%
mutate(longitude = spdata_coord[,1],
latitude = spdata_coord[,2])
```
### Crop and merge raster data
```{r, eval = FALSE}
# The coordinates of the bounding box should also be in the NAD_1983_UTM_Zone_4N projection system
bbox <- c(582638, 2361564, 594373, 2370255)
crop_to_extent(raster_layer = paste0(rast_dir, "/NHDPLUS_H_2006_HU4_RASTER/HRNHDPlusRasters2006/cat.tif"),
bounding_box = bbox,
out_dir = out_dir,
file_name = "cat_crop.tif",
compression = "high"
)
# We can crop to another bounding box, and then try to merge the two cropped
# layers
bbox2 <- c(595603, 2371564, 604373, 2380255)
crop_to_extent(raster_layer = paste0(rast_dir, "/NHDPLUS_H_2006_HU4_RASTER/HRNHDPlusRasters2006/cat.tif"),
bounding_box = bbox2,
out_dir = out_dir,
file_name = "cat_crop2.tif",
compression = "high"
)
# Merge the two cropped layers
merge_tiles(tile_dir = out_dir,
tile_names = c("cat_crop.tif", "cat_crop2.tif"),
out_dir = out_dir,
file_name = "cat_merged.tif",
compression = "high")
```
### Extract IDs, Report no data value, Set no data value
```{r, eval=F}
spdata_ids <- extract_ids(data = spdata,
lon = "longitude", lat = "latitude",
id = "occurrence_id",
basin_layer = paste0(rast_dir, "/NHDPLUS_H_2006_HU4_RASTER/HRNHDPlusRasters2006/cat.tif"))
spdata_ids
# We observe that some points obtain basin_id=NA, because they fall out of the
# extent of the downloaded raster layer. We will filter these points out,
# keeping only those which overlap with the raster.
spdata_ids <- spdata_ids %>%
filter(!is.na(basin_id))
spdata_ids
# On the other hand, some other points obtain basin_id=-32768. This should be
# the no data value of the raster layer, meaning a value attributed to the sea
# cells. We can double-check this using the report_no_data function:
no_data_val <- report_no_data(data_dir = paste0(rast_dir, "/NHDPLUS_H_2006_HU4_RASTER/HRNHDPlusRasters2006"), var_layer = "cat.tif")
no_data_val
# We can filter out these points, too, so that we only keep the points sampled
# in rivers.
spdata_ids <- spdata_ids %>%
filter(basin_id != no_data_val$NoData)
spdata_ids
```
```{r, eval = FALSE}
# Don't run:
# If we wanted to change the no data value of a layer, we could do it using the
# function:
set_no_data(data_dir = paste0(rast_dir, "/NHDPLUS_H_2006_HU4_RASTER/HRNHDPlusRasters2006"),
var_layer = "cat.tif",
no_data = -32768)
# Remember to set it back to the original value!
```
### Extract zonal statistics
To extract zonal statistics of a raster layer using the NHDPlus basins
as zones, we first need to download and crop to our extent an example
raster layer. We will use the CHELSA Bioclim 12 (BIO12) layer of
1981-2010 for this example.
For this operation, we also need to convert the projection system of the
cat.tif raster to EPSG:4326 using the 'terra' package, so that it is
compatible with the coordinate system of CHELSA Bioclim files.
```{r, eval=F}
# Download bio12 in the rast_dir
download.file(
"https://os.zhdk.cloud.switch.ch/envicloud/chelsa/chelsa_V2/GLOBAL/climatologies/1981-2010/bio/CHELSA_bio12_1981-2010_V.2.1.tif",
destfile = paste0(rast_dir, "/bio12_1981-2010.tif"), mode = "wb")
# Crop bio12 to bounding box
# We first need to convert the bounding box coordinates to WGS84, that is the
# coordinate system of the CHELSA Bioclim layers.
bbox_wgs84 <- project(ext(bbox), from = "EPSG:26904", to = "EPSG:4326")
# Now crop
crop_to_extent(raster_layer = paste0(rast_dir, "/bio12_1981-2010.tif"),
bounding_box = bbox_wgs84,
out_dir = out_dir,
file_name = "bio12_crop.tif",
compression = "high"
)
# Load the cat.tif layer
cat_rast <- rast(paste0(rast_dir, "/NHDPLUS_H_2006_HU4_RASTER/HRNHDPlusRasters2006/cat.tif"))
# Reproject to WGS84 and write out the file with a new name
cat_rast <- project(cat_rast,
y = "epsg:4326",
filename = paste0(rast_dir, "/NHDPLUS_H_2006_HU4_RASTER/HRNHDPlusRasters2006/cat_wgs84.tif"))
# Extract the zonal statistics of bio12 based on the NHDPlus basins
zonal_stats <- extract_zonal_stat(data_dir = out_dir,
subc_id = "all",
subc_layer = paste0(rast_dir, "/NHDPLUS_H_2006_HU4_RASTER/HRNHDPlusRasters2006/cat_wgs84.tif"),
var_layer = "bio12_crop.tif",
quiet = F)
# Same as before, we need to filter out the no data values to get a better
# grasp of the resulting table
zonal_stats <- zonal_stats %>%
filter(bio12_crop_data_cells !=0 )
zonal_stats
```
### Reclassify raster
```{r, eval=F}
# Create example dataframe with reclassification rules to reclassify the value
# of all basins containing species data to 1, and all the rest to -99999.
reclass_rules <- data.frame(cat = as.integer(spdata_ids$basin_id),
new_val = as.integer(1))
# Apply reclassification
reclass_raster(
data = reclass_rules,
rast_val = "cat",
new_val = "new_val",
raster_layer = paste0(rast_dir, "/NHDPLUS_H_2006_HU4_RASTER/HRNHDPlusRasters2006/cat.tif"),
recl_layer = paste0(rast_dir, "/NHDPLUS_H_2006_HU4_RASTER/HRNHDPlusRasters2006/cat_reclass.tif"),
read = FALSE,
no_data = no_data_val$NoData)
```
### <!--# Extract from .GPKG -->
<!--# See Download NHDPlus data to download NHDPlus vector data for Hawaii. -->
```{r, eval=F, include = F}
gdb_dir <- paste0(vect_dir, "/NHDPLUS_H_2006_HU4_GDB")
# Write out a .gpkg file based on the .gdb files
get_nhdplushr(gdb_dir, file.path(gdb_dir, "nhdplus_2006-HU4.gpkg"))
extract_from_gpkg(data_dir = gdb_dir, subc_id = "all", var_layer = "nhdplus_2006-HU4.gpkg", quiet = F)
# I think that stream IDs don’t correspond to catchment ids, so it’s not
# possible to filter the .gpkg using the ids of the catchment raster
```
### Read a .GPKG
See [Download NHDPlus data] to download NHDPlus vector data for Hawaii.
```{r, eval=F}
gdb_dir <- paste0(vect_dir, "/NHDPLUS_H_2006_HU4_GDB")
# Write out a .gpkg file based on the .gdb files
get_nhdplushr(gdb_dir, file.path(gdb_dir, "nhdplus_2006-HU4.gpkg"))
# Import flow line attribute table as a data.table
flow_line <- read_geopackage(gpkg = paste0(gdb_dir, "/nhdplus_2006-HU4.gpkg"),
layer_name = "NHDFlowline")
# To import flow line as a graph, first import the attribute table as a
# data.table and then transform to a graph. Note that the first two columns
# need to be the "from" and "to" columns
setcolorder(flow_line, c("FromNode", "ToNode"))
# Create the graph
flow_line_graph <- graph_from_data_frame(flow_line, directed = TRUE)
# Import the catchment layer as a spatial vector
catchment_sp <- read_geopackage(
gpkg = paste0(gdb_dir, "/nhdplus_2006-HU4.gpkg"),
layer_name = "NHDPlusCatchment",
name = "REACHCODE",
import_as = "SpatVect")
```
### Snap points to the network
```{r, eval = F}
# Define full path to the stream network raster layer
stream_rast <- paste0(rast_dir, "/NHDPLUS_H_2006_HU4_RASTER/HRNHDPlusRasters2006/swnet.tif")
# Define full path to the flow accumulation raster layer
flow_rast <- paste0(rast_dir,
"/NHDPLUS_H_2006_HU4_RASTER/HRNHDPlusRasters2006/fac.tif")
# Define thresholds for the flow accumulation of the stream segment, where
# the point location should be snapped to
accu_threshold <- 700
# Define the distance radius
dist_radius <- 20
# Snap point locations to the stream network
point_locations_snapped <- snap_to_network(data = spdata_ids,
lon = "longitude",
lat = "latitude",
id = "occurrence_id",
stream_layer = stream_rast,
accu_layer = flow_rast,
method = "both",
distance = dist_radius,
accumulation = accu_threshold,
quiet = FALSE)
```
Note: The columns "subc_id_snap_dist" and "subc_id_snap_accu" of the
table "point_locations_snapped" are only meaningful in case that the
stream and flow accumulation raster layers were derived from the
Hydrography90m.
### References