title | author | date | output |
---|---|---|---|
src_borders.R |
Dean Koch |
June 10, 2020 |
github_document |
This is the initial setup script for reproducing the rasterbc dataset. It downloads some shapefiles to define boundaries, and sets up configuration details for GIS processing.
# download again to rewrite existing files?
force.download = FALSE
# load the helper functions
library(here)
source(here('utility_functions.R'))
## [1] "data directory H:/git-MPB/rasterbc/data exists"
# create the source data directory and metadata list
collection = 'borders'
cfg = MPB_metadata(collection)
## [1] "borders source subdirectory exists"
## [1] "source data storage: H:/git-MPB/rasterbc/data/borders/source"
## [1] "borders mapsheets subdirectory exists"
## [1] "mapsheets data storage: H:/git-MPB/rasterbc/data/borders/blocks"
The metadata get filled in as we go. At the end of the script, we save this list to data.dir
cfg.filename = file.path(data.dir, paste0(collection, '.rds'))
source information
These data are from the National Topographic System (NTS) of Canada (Natural Resources Canada). The web url below points to an ftp directory containing a zip archive with shapefiles for provincial borders, and for a tiling of BC's geographical area into smaller blocks (or mapsheets, indexed by a number-letter code). The Open Government Licence - Canada applies.
# define the source metadata
cfg.src = list(
# url to download from
web = 'http://ftp.geogratis.gc.ca/pub/nrcan_rncan/vector/index/nts_snrc.zip',
# filename strings of the shapefiles for BC provincial border and SNRC mapsheets
fname.prefix = c(prov = 'prov_territo',
snrc = 'nts_snrc_250k'),
# feature names to save
feat.name = c(NTS_SNRC = 'NTS_SNRC',
NAME_ENG = 'NAME_ENG',
NOM_FRA = 'NOM_FRA')
)
# update the metadata list
cfg = MPB_metadata(collection, cfg.in=cfg, cfg.src=cfg.src)
From the provincial border polygon we will construct a binary raster indicating whether the grid cell lies
in BC (prov
); In addition we store the latitude and longitude for each cell (latitude
, longitude
)
in separate layers. Copies of the reprojected provincial border and mapsheet polygons will be saved as shapefile.
# set up the source and output filenames
varnames = setNames(nm=c(names(cfg$src$fname.prefix), lat='latitude', long='longitude'))
cfg$src$fname = setNames(paste0(file.path(cfg$src$dir, cfg$src$fname.prefix), '.shp'), varnames[c('prov', 'snrc')])
cfg$out$fname$shp = setNames(paste0(file.path(data.dir, cfg$out$name, varnames[c('prov', 'snrc')]), '_std.shp'), varnames[c('prov', 'snrc')])
prefix.tif = setNames(file.path(data.dir, cfg$out$name, varnames[!(varnames %in% 'snrc')]), varnames[!(varnames %in% 'snrc')])
cfg$out$fname$tif$full = setNames(paste0(prefix.tif, '.tif'), varnames[!(varnames %in% 'snrc')])
prefix.tif
gives the first part of the filenames that will later be assigned to each of the mapsheets, along with a suffix
of the form '_<SNRC-code>.tif'. First we need to download and open the SNRC shapefile to find out which mapsheet codes are relevant.
downloads
This is a relatively small zip archive (12.2 MB, or 55.1 MB uncompressed)
# check if we need to download it again:
if(!all(file.exists(cfg$src$fname)) | force.download)
{
#+ eval=FALSE
# download the zip to temporary file (12.2 MB), extract to disk
borders.tmp = tempfile('borders_temp', cfg$src$dir, '.zip')
download.file(url=cfg$src$web, destfile=borders.tmp, mode='wb')
borders.paths = unzip(borders.tmp, exdir=cfg$src$dir)
unlink(borders.tmp)
#' verify that the two source files listed in `cfg$src$fname` are among the extracted files
all(cfg$src$fname %in% borders.paths)
} else {
print('using existing source files:')
print(cfg$src$fname)
}
## [1] "using existing source files:"
## prov snrc
## "H:/git-MPB/rasterbc/data/borders/source/prov_territo.shp" "H:/git-MPB/rasterbc/data/borders/source/nts_snrc_250k.shp"
Once the files are downloaded/extracted, the script will use the existing files instead of downloading them again (unless
force.download
is set to TRUE
)
processing
First, prepare a mask for in-province pixels and save it to disk (20.2 MB):
# load the provincial boundaries polygons and NTS/SNRC blocks, reprojecting to crs(bc.mask.tif)
prov.sf = sf::st_transform(sf::st_read(dsn=cfg$src$fname['prov']), MPB_crs()$epsg)
snrc.sf = sf::st_transform(sf::st_read(dsn=cfg$src$fname['snrc']), MPB_crs()$epsg)
# create a mask for in-province pixels (setting reference extent and resolution)
ref.tif = raster::crop(MPB_crs()$tif, prov.sf[prov.sf$PROV_TERRI=='BC',1], snap='out')
bc.sf = prov.sf[prov.sf$PROV_TERRI=='BC',1]
# replace character column with numeric, rasterize, and write
bc.sf$PROV_TERRI = 0
bc.mask.tif = fasterize::fasterize(sf=bc.sf, raster=ref.tif, field='PROV_TERRI', fun='any')
raster::writeRaster(bc.mask.tif, cfg$out$fname$tif$full['prov'], overwrite=TRUE)
Next, build a list of NTS/SNRC codes that overlap with BC landmass
# find which SNRC codes intersect with BC, Yukon, Northwest Territories
bc.blocks.idx = sf::st_intersects(sf::st_geometry(prov.sf[prov.sf$PROV_TERRI=='BC',1]), sf::st_geometry(snrc.sf), sparse=FALSE)
yt.blocks.idx = sf::st_intersects(sf::st_geometry(prov.sf[prov.sf$PROV_TERRI=='YT',1]), sf::st_geometry(snrc.sf), sparse=FALSE)
nt.blocks.idx = sf::st_intersects(sf::st_geometry(prov.sf[prov.sf$PROV_TERRI=='NT',1]), sf::st_geometry(snrc.sf), sparse=FALSE)
omit.blocks.idx = yt.blocks.idx | nt.blocks.idx | is.na(snrc.sf$NAME_ENG)
There seem to be some overlap/imprecision issues with the provincial border polygons: some of the coastal mapsheets (see below, in blue) are missing from this intersection (in yellow). I add them back manually...
missing.blocks = c('102O', '103O', '114O', '104C', '114I', '114P', '114O')
plot(st_geometry(bc.sf), col='yellow')
plot(st_geometry(snrc.sf)[bc.blocks.idx & !omit.blocks.idx], add=TRUE)
plot(st_geometry(snrc.sf)[snrc.sf$NTS_SNRC %in% missing.blocks], add=TRUE, col='blue')
incl.blocks.idx = bc.blocks.idx & !omit.blocks.idx | snrc.sf$NTS_SNRC %in% missing.blocks
incl.features.idx = which(names(snrc.sf) %in% cfg$src$feat.name)
... and save reprojected/simplified version of the source shapefile data (1.1 MB).
sf::st_write(snrc.sf[incl.blocks.idx, incl.features.idx], cfg$out$fname$shp['snrc'], append=FALSE)
sf::st_write(bc.sf, cfg$out$fname$shp['prov'], append=FALSE)
Now reload these files to overwrite the (larger) source shapefiles in memory:
The NTS/SNRC codes in cfg$out$code
should match those in the NTS mapsheet collection for BC
(PDF link).
snrc.sf = sf::st_read(cfg$out$fname$shp['snrc'])
prov.sf = sf::st_read(cfg$out$fname$shp['prov'])
cfg$out$code = snrc.sf$NTS_SN
Create latitude, longitude layers via sp
package (takes around 5 minutes), then save to disk (622 MB)
# load the in-province mask (raster), convert to points dataframe
bc.mask.tif = raster::raster(cfg$out$fname$tif$full['prov'])
bc.coords.df = data.frame(raster::coordinates(bc.mask.tif))
sp::coordinates(bc.coords.df) = c('x', 'y')
sp::proj4string(bc.coords.df) = raster::crs(bc.mask.tif)
# reproject to lat/long, apply mask, and write
bc.coords.df = sp::spTransform(bc.coords.df, CRS=sp::CRS('+proj=longlat +ellps=GRS80'))
raster::writeRaster(raster::mask(raster::setValues(bc.mask.tif, bc.coords.df$y), bc.mask.tif), cfg$out$fname$tif$full['latitude'], overwrite=TRUE)
raster::writeRaster(raster::mask(raster::setValues(bc.mask.tif, bc.coords.df$x), bc.mask.tif), cfg$out$fname$tif$full['longitude'], overwrite=TRUE)
Finally, split these layers up into mapsheets corresponding to the NTS/SNRC codes (638 MB)
# reload the mapsheet polygons
snrc.sf = sf::st_read(cfg$out$fname$shp['snrc'])
# function call to crop and save blocks (writes )
cfg.blocks = MPB_split(cfg, snrc.sf)
# update metadata list `cfg` and save it to `data.dir`.
cfg = MPB_metadata(collection, cfg.in=cfg, cfg.out=list(fname=list(tif=list(block=cfg.blocks))))
saveRDS(cfg, file=cfg.filename)
Metadata (including file paths) can now be loaded from 'borders.rds' located in data.dir
using readRDS()
.