prepr 
An R package to repair broken GIS polygons using the
prepair C++ library.
Installation
The prepair C++ library need these two libraries to compile:
The R package prepr solves the CGAL dependencies by shipping with a
subset of the CGAL header. We also use
rwinlib to provide GDAL on Windows in
order to build this package from source. You will need the latest
version of rtools in
order to build the source code on Windows.
prepair can also use these optional libraries:
They are disabled by default on Windows but required if you want to
build the package in a Linux/OS X environment. After installing all
these libraries, you can now install the development version of the
prepr R package from Gitlab using
the remotes R package with:
# install.packages("remotes")
remotes::install_gitlab("dickoa/prepr")
remotes::install_github("dickoa/prepr") ## mirror
remotes::install_git("https://git.ahmadoudicko.com/dickoa/prepr.git") ## mirrorA quick tutorial
This is a simple tutorial which shows you how to solve common problems we can find with polygons:
A ‘bowtie’ polygon:
library(prepr)
library(sf)
p1 <- st_as_sfc("POLYGON((0 0, 0 10, 10 0, 10 10, 0 0))")
st_is_valid(p1, reason = TRUE)
#> [1] "Self-intersection[5 5]"
st_probustness(p1)
#> [1] 5
p11 <- st_prepair(p1)
st_is_valid(p11)
#> [1] TRUE
st_probustness(p11)
#> [1] 0
st_as_text(p11)
#> [1] "MULTIPOLYGON (((0 10, 0 0, 5 5, 0 10)), ((5 5, 10 0, 10 10, 5 5)))"
par(mfrow = c(1, 2))
plot(p1, main = "RAW", col = "#D7722C")
plot(p11, main = "Repaired", col = "#D7722C")Square with wrong orientation:
p2 <- st_as_sfc("POLYGON((0 0, 0 10, 10 10, 10 0, 0 0))")
st_is_valid(p2, reason = TRUE)
#> [1] "Valid Geometry"
st_probustness(p2)
#> [1] 10
plot(p2, main = "RAW", col = "#D7722C")Inner ring with one edge sharing part of an edge of the outer ring:
p3 <- st_as_sfc("POLYGON((0 0, 10 0, 10 10, 0 10, 0 0),(5 2, 5 7, 10 7, 10 2, 5 2))")
st_is_valid(p3, reason = TRUE)
#> [1] "Self-intersection[10 2]"
st_probustness(p3)
#> [1] 2
p33 <- st_prepair(p3)
st_is_valid(p33)
#> [1] TRUE
st_probustness(p33)
#> [1] 2
st_as_text(p33)
#> [1] "MULTIPOLYGON (((10 2, 10 7, 10 10, 0 10, 0 0, 10 0, 10 2)))"
par(mfrow = c(1, 2))
plot(p3, main = "RAW", col = "#D7722C")
plot(p33, main = "Repaired", col = "#D7722C")Dangling edge:
p4 <- st_as_sfc("POLYGON((0 0, 10 0, 15 5, 10 0, 10 10, 0 10, 0 0))")
st_is_valid(p4, reason = TRUE)
#> [1] "Self-intersection[10 0]"
st_probustness(p4)
#> [1] 5
p44 <- st_prepair(p4)
st_is_valid(p44)
#> [1] TRUE
st_probustness(p44)
#> [1] 10
st_as_text(p44)
#> [1] "MULTIPOLYGON (((0 0, 10 0, 10 10, 0 10, 0 0)))"
par(mfrow = c(1, 2))
plot(p4, main = "RAW", col = "#D7722C")
plot(p44, main = "Repaired", col = "#D7722C")Two adjacent inner rings:
p6 <- st_as_sfc("POLYGON((0 0, 10 0, 10 10, 0 10, 0 0), (1 1, 1 8, 3 8, 3 1, 1 1), (3 1, 3 8, 5 8, 5 1, 3 1))")
st_is_valid(p6, reason = TRUE)
#> [1] "Self-intersection[3 1]"
st_probustness(p6)
#> [1] 1
p66 <- st_prepair(p6)
st_is_valid(p66)
#> [1] TRUE
st_probustness(p66)
#> [1] 1
st_as_text(p66)
#> [1] "MULTIPOLYGON (((10 0, 10 10, 0 10, 0 0, 10 0), (1 1, 1 8, 3 8, 5 8, 5 1, 3 1, 1 1)))"
par(mfrow = c(1, 2))
plot(p6, main = "RAW", col = "#D7722C")
plot(p66, main = "Repaired", col = "#D7722C")Polygon with an inner ring inside another inner ring:
p7 <- st_as_sfc("POLYGON((0 0, 10 0, 10 10, 0 10, 0 0), (2 8, 5 8, 5 2, 2 2, 2 8), (3 3, 4 3, 3 4, 3 3))")
st_is_valid(p7, reason = TRUE)
#> [1] "Holes are nested[3 3]"
st_probustness(p7)
#> [1] 0.7071068
p77 <- st_prepair(p7)
st_is_valid(p77)
#> [1] TRUE
st_probustness(p77)
#> [1] 0.7071068
st_as_text(p77)
#> [1] "MULTIPOLYGON (((10 0, 10 10, 0 10, 0 0, 10 0), (5 2, 2 2, 2 8, 5 8, 5 2)), ((3 4, 3 3, 4 3, 3 4)))"
par(mfrow = c(1, 2))
plot(p7, main = "RAW", col = "#D7722C")
plot(p77, main = "Repaired", col = "#D7722C")A exemple with a real dataset
Reading the data
(clc1 <- read_sf("https://github.com/tudelft3d/prepair/raw/master/data/CLC2006_180927.geojson"))
#> Simple feature collection with 1 feature and 0 fields
#> geometry type: POLYGON
#> dimension: XY
#> bbox: xmin: 17.25904 ymin: 67.98536 xmax: 21.11988 ymax: 69.54275
#> geographic CRS: WGS 84
#> # A tibble: 1 x 1
#> geometry
#> <POLYGON [°]>
#> 1 ((20.33495 69.31206, 20.32738 69.31229, 20.32749 69.31274, 20.…
(clc2 <- read_sf("https://github.com/tudelft3d/prepair/raw/master/data/CLC2006_2018418.geojson"))
#> Simple feature collection with 1 feature and 0 fields
#> geometry type: POLYGON
#> dimension: XY
#> bbox: xmin: 22.2989 ymin: 41.44139 xmax: 22.96658 ymax: 41.86164
#> geographic CRS: WGS 84
#> # A tibble: 1 x 1
#> geometry
#> <POLYGON [°]>
#> 1 ((22.71665 41.73939, 22.71759 41.73931, 22.71858 41.73926, 22.…
par(mfrow = c(1, 2))
plot(st_geometry(clc1), main = "CLC2006_180927", col = 'lightblue', axes = TRUE, graticule = TRUE, lwd = 0.2, cex.axis = 0.5)
plot(st_geometry(clc2), main = "CLC2006_2018418", col = "lightblue", axes = TRUE, graticule = TRUE, lwd = 0.2, cex.axis = 0.5)Check if it’s valid and repair it
st_is_valid(clc1, reason = TRUE)
#> [1] "Ring Self-intersection[20.1372785154605 68.32204464423]"
(clc1_rpr <- st_prepair(clc1))
#> Simple feature collection with 1 feature and 0 fields
#> geometry type: MULTIPOLYGON
#> dimension: XY
#> bbox: xmin: 17.25904 ymin: 67.98536 xmax: 21.11988 ymax: 69.54275
#> geographic CRS: WGS 84
#> # A tibble: 1 x 1
#> geometry
#> <MULTIPOLYGON [°]>
#> 1 (((20.33495 69.31206, 20.33491 69.31319, 20.33508 69.31385, 20…
st_is_valid(clc1_rpr)
#> [1] TRUE
st_is_valid(clc2, reason = TRUE)
#> [1] "Ring Self-intersection[22.7166526165073 41.7393917772103]"
(clc2_rpr <- st_prepair(clc2))
#> Simple feature collection with 1 feature and 0 fields
#> geometry type: MULTIPOLYGON
#> dimension: XY
#> bbox: xmin: 22.2989 ymin: 41.44139 xmax: 22.96658 ymax: 41.86164
#> geographic CRS: WGS 84
#> # A tibble: 1 x 1
#> geometry
#> <MULTIPOLYGON [°]>
#> 1 (((22.71665 41.73939, 22.71663 41.73884, 22.71634 41.73816, 22…
st_is_valid(clc2_rpr)
#> [1] TRUEHow fast is st_prepair ?
prepr::st_prepair is fast and can be in some cases faster than
sf::st_make_valid
(bnch1 <- bench::mark(st_make_valid(clc1), st_prepair(clc1), check = FALSE))
#> # A tibble: 2 x 6
#> expression min median `itr/sec` mem_alloc
#> <bch:expr> <bch:tm> <bch:tm> <dbl> <bch:byt>
#> 1 st_make_valid(clc1) 2.19s 2.19s 0.456 NA
#> 2 st_prepair(clc1) 459.44ms 463.18ms 2.16 NA
#> # … with 1 more variable: `gc/sec` <dbl>
summary(bnch1, relative = TRUE)
#> # A tibble: 2 x 6
#> expression min median `itr/sec` mem_alloc `gc/sec`
#> <bch:expr> <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1 st_make_valid(clc1) 4.77 4.73 1 NA NaN
#> 2 st_prepair(clc1) 1 1 4.73 NA NaN
(bnch2 <- bench::mark(st_make_valid(clc2), st_prepair(clc2), check = FALSE))
#> # A tibble: 2 x 6
#> expression min median `itr/sec` mem_alloc `gc/sec`
#> <bch:expr> <bch:t> <bch:> <dbl> <bch:byt> <dbl>
#> 1 st_make_valid(clc2) 747ms 747ms 1.34 NA 0
#> 2 st_prepair(clc2) 180ms 182ms 5.51 NA 0
summary(bnch2, relative = TRUE)
#> # A tibble: 2 x 6
#> expression min median `itr/sec` mem_alloc `gc/sec`
#> <bch:expr> <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1 st_make_valid(clc2) 4.15 4.11 1 NA NaN
#> 2 st_prepair(clc2) 1 1 4.12 NA NaNYou also have cases where it’s slower to sf::st_make_valid, let’s use
this data from a closed
issue in the sf R
package.
## need vsicurl
(agb <- read_sf("/vsicurl/http://files.hawaii.gov/dbedt/op/gis/data/2015AgBaseline.shp.zip"))
#> Simple feature collection with 5024 features and 6 fields
#> geometry type: MULTIPOLYGON
#> dimension: XY
#> bbox: xmin: 419564.3 ymin: 2095890 xmax: 938457.6 ymax: 2457686
#> projected CRS: NAD83 / UTM zone 4N
#> # A tibble: 5,024 x 7
#> OBJECTID CropCatego Island Acrage Shape_Leng Shape_Area
#> <dbl> <chr> <chr> <dbl> <dbl> <dbl>
#> 1 4529 Commercia… Kauai 2.73e2 6301. 1104891.
#> 2 4530 Coffee Kauai 7.44e0 1254. 30103.
#> 3 4531 Coffee Kauai 4.65e0 772. 18813.
#> 4 4532 Coffee Kauai 1.13e3 16248. 4572534.
#> 5 4533 Coffee Kauai 3.49e0 858. 14106.
#> 6 4534 Coffee Kauai 5.21e2 7868. 2106938.
#> 7 4535 Coffee Kauai 7.16e2 11290. 2896255.
#> 8 4536 Coffee Kauai 1.33e2 3705. 539524.
#> 9 4537 Flowers /… Kauai 3.52e1 1871. 142623.
#> 10 4538 Coffee Kauai 9.69e1 2930. 391962.
#> # … with 5,014 more rows, and 1 more variable:
#> # geometry <MULTIPOLYGON [m]>
all(st_is_valid(agb))
#> [1] FALSE
all(st_is_valid(st_make_valid(agb)))
#> [1] TRUE
all(st_is_valid(st_prepair(agb)))
#> [1] TRUE
plot(st_geometry(agb), main = "2015 Agriculture baseline", col = 'lightblue', axes = TRUE, graticule = TRUE, lwd = 0.2, cex.axis = 0.5)sf::st_make_valid is faster with this data.
(bnch3 <- bench::mark(st_make_valid(agb), st_prepair(agb), check = FALSE))
#> Warning: Some expressions had a GC in every iteration; so
#> filtering is disabled.
#> # A tibble: 2 x 6
#> expression min median `itr/sec` mem_alloc `gc/sec`
#> <bch:expr> <bch:t> <bch:t> <dbl> <bch:byt> <dbl>
#> 1 st_make_valid(agb) 1.84s 1.84s 0.544 NA 2.18
#> 2 st_prepair(agb) 4.27s 4.27s 0.234 NA 0.234
summary(bnch3, relative = TRUE)
#> Warning: Some expressions had a GC in every iteration; so
#> filtering is disabled.
#> # A tibble: 2 x 6
#> expression min median `itr/sec` mem_alloc `gc/sec`
#> <bch:expr> <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1 st_make_valid(agb) 1 1 2.32 NA 9.30
#> 2 st_prepair(agb) 2.32 2.32 1 NA 1Details and how to cite
Details of how we automatically repair broken polygons, and what results
you can expect, are available in this scientific article by the original
authors of prepair:
Ledoux, H., Arroyo Ohori, K., and Meijers, M. (2014). A triangulation-based approach to automatically repair GIS polygons. Computers & Geosciences 66:121–131. [DOI] [PDF]
If you use the R package prepr for a scientific project, please cite
their original work.
License
This package is released under the GPL-3 license.