version 1.0-3

cran · Jul 13, 2015 · 7d8c17a · 7d8c17a
commit 7d8c17a
Show file tree

Hide file tree

Showing 66 changed files with 5,162 additions and 0 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -0,0 +1,19 @@
+Package: speciesgeocodeR
+Type: Package
+Title: Prepare Species Distributions for the Use in Phylogenetic
+        Analyses
+Version: 1.0-3
+Date: 2015-07-13
+Author: Alexander Zizka [aut, cre]
+Maintainer: Alexander Zizka <alexander.zizka@bioenv.gu.se>
+Description: Preparation of species occurrences data in relation to geographical areas for the use in phylogenetic analyses. SpeciesgeocodeR is built for data cleaning, data exploration and data analysis and especially suited for biogeographical and ecological questions on large datasets. The package includes the easy creation of summary-tables and -graphs and geographical maps, the automatic cleaning of geographic occurrence data, the calculating of coexistence matrices and species ranges (EOO) as well as mapping diversity in geographic areas.
+License: GPL-3
+Depends: R (>= 3.0.0), maps
+Imports: stats, methods, utils, grDevices, graphics, maptools, sp,
+        raster
+Suggests: rgbif, geosphere
+LazyData: true
+NeedsCompilation: no
+Packaged: 2015-07-13 09:22:49 UTC; xzizal
+Repository: CRAN
+Date/Publication: 2015-07-14 15:58:09
diff --git a/MD5 b/MD5
@@ -0,0 +1,65 @@
+5f57bdeeba77c0fc8b0d616b91e5004d *DESCRIPTION
+06c4b085b6a24e317c66a08be2d9830f *NAMESPACE
+63d13e22f0423f5afec442ebbcbb0a26 *R/CalcRange.R
+bd7288b59b38a06a0e8e216bae9ad0fa *R/CoExClass.R
+1bae3cfa0b404bbe296100891e2ff467 *R/GeoClean.R
+fdc3e565903a1b82d6351a52b0902946 *R/GetElevation.R
+365b8a7882f8ea034e7fefbffe2d1cfd *R/MapGrid.R
+b4adacceb7516b5b31d6ab1fb7198058 *R/MapRichness.R
+fbdae9119717d77765e9dc9bc435aef2 *R/PlotHull.R
+2bd2ccb17954aab824aae3fc1719fdda *R/RangeRichness.R
+a9f3e425844d2bd2510d5d1f90704afd *R/ReadPoints.R
+27f22abda7ae87abfbfb3f17b3b4cdc4 *R/RichnessGrid.R
+c0fb165c779fee891a1a66511916cc5e *R/SpGeoCod.R
+4b91d40915a8bf3b52f21ebca28724d4 *R/SpGeoCodH.R
+063235cf155877be4fe9719b5ce6fb71 *R/SpeciesGeoCoder.R
+a1d621329a6cc96478b1c1c2f18f3efc *R/WriteOut.R
+dd3b739e69aa73e4323573a4ef14ceef *R/WwfLoad.R
+0b92b1d736b1f1171ae10c735fa14dd9 *R/plot.spgeoIN.R
+18ae061af1787ee29b851402c33ed59a *R/plot.spgeoOUT.R
+563781115ec6327ff8f9a0aee36a3604 *R/speciesgeocodeR-internal.R
+8d24d8b190be480a282c0a6b501b3f89 *R/summary.spgeoIN.R
+762d8fa8c2e61216dab23f011066a86d *R/summary.spgeoOUT.R
+4434774c6f31b00b7f481da327e63f54 *data/countryref.rda
+e12c02647c26b06f29a2f6efdb6a3312 *data/lemurs.rda
+6c0176054905a1662051ecd9f4189244 *data/lemurs_in.rda
+b5f256275ca22234aa1ceabc19fdb4f3 *data/lemurs_test.rda
+f6f7d528fa9d83c8d37e97ea073549fe *data/mdg_biomes.rda
+3dc935633061362929977229b542ef53 *data/mdg_poly.rda
+04ccca11a2affcc0ae93c4d21af77124 *inst/doc/data_cleaning_and_exploration.Rmd
+733d2fefbf42b9a9cbf2ea7099171bb9 *inst/doc/data_cleaning_and_exploration.html
+7c761f5c73e1947eaceff7d217e3389f *inst/doc/data_cleaning_and_exploration.pdf
+39e9a9d9d48bca7bdacd4ea5dacba975 *inst/extdata/lemur_test.txt
+385c81507c63ba673e591d48329607ae *inst/extdata/lemurs.txt
+025495e6289e7c275708ad9fb0c6d950 *inst/extdata/mdg_biomes_simple.dbf
+e729936bf5360b37a15365fc295a1901 *inst/extdata/mdg_biomes_simple.prj
+f34dee5ac9bad1bc567ccb2246dc228e *inst/extdata/mdg_biomes_simple.qpj
+57433fe9a439f3a25ef37683e74aaab8 *inst/extdata/mdg_biomes_simple.shp
+e3b562be94d4fd3a6c78681df29de6d9 *inst/extdata/mdg_biomes_simple.shx
+4b48a3f7b8f851a96f4a1edb4dd9c888 *inst/extdata/mdg_biomes_simple.txt
+2a84b4bae4ded77a1e164b79b45185e9 *man/CalcRange.Rd
+a1bf764966dad266fab670f1ad55ad28 *man/CoExClass.Rd
+2fdb8b7290f71d1d7d629e16b28ac8a8 *man/GeoClean.Rd
+d1bab67bd5a25e9a3c8ebce0dbd00c34 *man/GetElevation.Rd
+8a67a8cf33a48ea68054f8e0ac31e29b *man/MapGrid.Rd
+4b1f9c5a098372220f78dd762eb6bcf2 *man/MapRichness.Rd
+b6c34efad5a7a79d00839af444eb7180 *man/PlotHull.Rd
+234e0aa76878bcb2d17049f848cd62f3 *man/RangeRichness.Rd
+8909b3c856e24fb7b4b30d9eb0b61be9 *man/ReadPoints.Rd
+c0aaeb8a08af3f9d6da5fdcfcd75b275 *man/RichnessGrid.Rd
+2d15f70b0bcff9fbac2c8bd12199f893 *man/SpGeoCod.Rd
+a92149fa919355bb0c314a3ec1890bd5 *man/SpGeoCodH.Rd
+284bd3f27535ab4183c76494afc42f34 *man/SpeciesGeoCoder.Rd
+cd26e2ba5f231274df01c02dc93ce92b *man/WriteOut.Rd
+3d58ec39940a861a0fe10d3d2ca432f6 *man/WwfLoad.Rd
+1fac18a7f8b1e04d3f7f62370c8ad358 *man/countryref.Rd
+e1ef516e62fdd06a550ce0fbfb7d4f1a *man/lemurs.Rd
+f25317ba944d1916ae5761a1fd6cb481 *man/lemurs_in.Rd
+80430b58eab90d23fe1757bb26b42b43 *man/lemurs_test.Rd
+4a71350d8990458df31b1c1a2ecac03d *man/mdg_biomes.Rd
+961fc00191a04c3e3f11ed0216870844 *man/mdg_poly.Rd
+783ff6395dd095a06c12fef5157f4f59 *man/plot.spgeoIN.Rd
+bad3c3d4ee39b818788ac7bb9015a610 *man/plot.spgeoOUT.Rd
+04299263926e738ccd835cc27506821d *man/speciesgeocodeR-package.Rd
+456e18baa0025bc13394725a230d648b *man/summary.spgeoIN.Rd
+ac1e3f65b73635c386449fe517aee05f *man/summary.spgeoOUT.Rd
diff --git a/NAMESPACE b/NAMESPACE
@@ -0,0 +1,13 @@
+S3method(plot, spgeoIN)
+S3method(plot, spgeoOUT)
+S3method(summary, spgeoIN)
+S3method(summary, spgeoOUT)
+exportPattern("^[^\\.]")
+import(graphics)
+importFrom("grDevices", chull, dev.off, heat.colors, pdf, rainbow, rgb)
+importFrom("methods", slot, is)
+importFrom("stats", complete.cases, median, aggregate)
+importFrom("utils", read.table, write.table, download.file, unzip)
+import(raster)
+importFrom("maps", map)
+import(sp)
diff --git a/R/CalcRange.R b/R/CalcRange.R
@@ -0,0 +1,43 @@
+CalcRange <- function(x, mode = "EOO", value = c("area", "shape")) {
+    if (class(x) == "spgeoOUT"){
+      dat1 <- data.frame(identifier = x$identifier_in, x$species_coordinates_in)
+    }else{
+      dat1 <- x[, 1:3]
+      names(dat1) <- c("identifier", "XCOOR", "YCOOR")
+    }
+    dat <- unique(dat1)
+    if (!is.factor(dat[, 1]) | !is.numeric(dat[, 2]) | !is.numeric(dat[, 3])) {
+        stop("wrong input format, x must be a data.frame with three columns: speciesname, longitude, latitude.\n")
+    }
+    if (max(dat[, 2]) > 180 | min(dat[, 2]) < -180 | max(dat[, 3]) > 90 | min(dat[, 3]) < -90) {
+        stop("invallid input coordinates. Check for column order and valid coordinates.")
+    }
+    if (!dim(dat)[1] == dim(dat1)[1]) {
+        warning((dim(dat1)[1] - dim(dat)[1]), " points were excluded due to duplicated coordinate")
+    }
+    filt <- tapply(dat$XCOOR, dat$identifier, length)
+    filterd <- filt[filt > 2]
+    dat$identifier <- as.character(dat$identifier)
+    dat.filt <- subset(dat, dat$identifier %in% as.character(names(filterd)))
+
+    sortout <- filt[filt <= 2]
+    if (length(sortout) > 0) {
+        warning("the following species have less than 3 occurrence, values set to NA:", paste("\n", names(sortout)))
+    }
+
+    inp <- split(dat.filt, f = dat.filt$identifier)
+
+    if (mode == "EOO") {
+      if (value == "area"){
+          out <- lapply(inp, function(x) .eoo(x))
+          out <- data.frame(do.call("rbind", out))
+          names(out) <- "EOO"
+          out <- rbind(out, data.frame(row.names = rownames(sortout), EOO = rep("NA", length(sortout))))
+          return(out)
+      }
+    }
+    if(value == "shape"){
+      out <- lapply(inp, function(x) .ConvHull (x))
+      return(out)
+    }
+} 
diff --git a/R/CoExClass.R b/R/CoExClass.R
@@ -0,0 +1,9 @@
+CoExClass <- function(x) {
+    if (class(x) == "spgeoOUT") {
+        pp <- .CoExClassH(x$spec_table)
+        x$coexistence_classified <- pp
+        return(x)
+    } else {
+        stop("function is only defined for class \"SpgeoOUT\". \n  See .CoExClassH() for single \"data.frames\"")
+    }
+} 
diff --git a/R/GeoClean.R b/R/GeoClean.R
@@ -0,0 +1,159 @@
+GeoClean <- function(x, isna = TRUE, isnumeric = TRUE, coordinatevalidity = TRUE, containszero = TRUE, zerozero = TRUE, zerozerothresh = 1,
+                     latequallong = TRUE, GBIFhead = FALSE, countrycentroid = FALSE, contthresh = 0.5, capitalcoords = FALSE, capthresh = 0.5, 
+                     countrycheck = FALSE, polygons, referencecountries = countryref, verbose = FALSE) {
+
+    dat <- x
+
+    if ("lon" %in% names(dat)) {
+        dat$XCOOR <- unlist(dat["lon"])
+    }
+    if ("lat" %in% names(dat)) {
+        dat$YCOOR <- unlist(dat["lat"])
+    }
+    if ("longitude" %in% names(dat)) {
+        dat$XCOOR <- unlist(dat["longitude"])
+    }
+    if ("latitude" %in% names(dat)) {
+        dat$YCOOR <- unlist(dat["latitude"])
+    }
+
+    # old GBIF format
+    if (dim(x)[2] == 25) {
+        dat$identifier <- dat$species
+        dat$country <- dat$ISO2
+    }
+    if (dim(x)[2] == 225) {
+        dat$identifier <- dat$species
+        dat$XCOOR <- dat$decimalLongitude
+        dat$YCOOR <- dat$decimalLatitude
+        dat$country <- dat$countryCode
+        dat <- dat[c("identifier", "XCOOR", "YCOOR", "country")]
+        dat <- data.frame(unlist(apply(dat, 2, function(x) gsub("^$|^ $", NA, x))))
+    }
+
+    dat <- dat[c("identifier", "XCOOR", "YCOOR", "country")]
+    verb <- dat
+    dat$clean <- T
+
+    if (isna == T) {
+        dat$clean[which(is.na(dat$XCOOR) | is.na(dat$YCOOR))] <- FALSE
+
+        verb$isna <- T
+        verb$isna[which(is.na(dat$XCOOR) | is.na(dat$YCOOR))] <- FALSE
+    }
+
+    if (isnumeric == T) {
+        # is numeric
+        dat$clean[which(suppressWarnings(is.na(as.numeric(as.character(dat$XCOOR)))))] <- FALSE
+        dat$clean[which(suppressWarnings(is.na(as.numeric(as.character(dat$YCOOR)))))] <- FALSE
+
+        verb$isnumeric <- TRUE
+        verb$isnumeric[which(suppressWarnings(is.na(as.numeric(as.character(dat$XCOOR)))))] <- FALSE
+        verb$isnumeric[which(suppressWarnings(is.na(as.numeric(as.character(dat$YCOOR)))))] <- FALSE
+    }
+
+    if (coordinatevalidity == T) {
+        # -180 < long < 180
+        dat$clean[which(suppressWarnings(as.numeric(as.character(dat$XCOOR))) > 180 | suppressWarnings(as.numeric(as.character(dat$XCOOR))) < 
+            -180)] <- FALSE
+
+        dat$clean[which(suppressWarnings(as.numeric(as.character(dat$YCOOR))) > 90 | suppressWarnings(as.numeric(as.character(dat$YCOOR))) < 
+            -90)] <- FALSE
+
+        verb$coordinatevalidity <- T
+        verb$coordinatevalidity[which(suppressWarnings(as.numeric(as.character(dat$XCOOR))) > 180 | suppressWarnings(as.numeric(as.character(dat$XCOOR))) < 
+            -180)] <- FALSE
+
+        verb$coordinatevalidity[which(suppressWarnings(as.numeric(as.character(dat$YCOOR))) > 90 | suppressWarnings(as.numeric(as.character(dat$YCOOR))) < 
+            -90)] <- FALSE
+
+    }
+
+    if (containszero == T) {
+        # lat == 0 or long == 0
+        dat$clean[which(suppressWarnings(as.numeric(as.character(dat$XCOOR))) == 0 | suppressWarnings(as.numeric(as.character(dat$YCOOR))) == 
+            0)] <- FALSE
+        verb$haszero <- TRUE
+        verb$haszero[which(suppressWarnings(as.numeric(as.character(dat$XCOOR))) == 0 | suppressWarnings(as.numeric(as.character(dat$YCOOR))) == 
+            0)] <- FALSE
+
+    }
+    if (zerozero == T) {
+
+        loncap <- suppressWarnings(as.numeric(as.character(dat$XCOOR)) > (0 - zerozerothresh)) & 
+                  suppressWarnings(as.numeric(as.character(dat$XCOOR))) <  (0 + zerozerothresh) 
+        latcap <- suppressWarnings(as.numeric(as.character(dat$YCOOR)) > (0 - zerozerothresh)) & 
+                  suppressWarnings(as.numeric(as.character(dat$YCOOR))) <  (0 + zerozerothresh) 
+
+        dat$clean[which(loncap == T & latcap == T)] <- FALSE
+        verb$zerozero <- TRUE
+        verb$zerozero[which(loncap == T & latcap == T)] <- FALSE
+    }
+
+    if (latequallong == T) {
+        # lat == long
+        dat$clean[which(suppressWarnings(as.numeric(as.character(dat$XCOOR))) == suppressWarnings(as.numeric(as.character(dat$YCOOR))))] <- FALSE
+
+        verb$latequallong <- TRUE
+        verb$latequallong[which(suppressWarnings(as.numeric(as.character(dat$XCOOR))) == suppressWarnings(as.numeric(as.character(dat$YCOOR))))] <- FALSE
+    }
+
+    if (GBIFhead == T) {
+        # degree around copenhagen
+        loncop <- suppressWarnings(as.numeric(as.character(dat$XCOOR))) > 12.1 & suppressWarnings(as.numeric(as.character(dat$XCOOR))) < 
+            12.8
+        latcop <- suppressWarnings(as.numeric(as.character(dat$YCOOR))) > 55.5 & suppressWarnings(as.numeric(as.character(dat$YCOOR))) < 
+            55.8
+        dat$clean[which(loncop == T & latcop == T)] <- FALSE
+
+        verb$GBIFhead <- T
+        verb$GBIFhead[which(loncop == T & latcop == T)] <- FALSE
+
+    }
+
+    if (countrycentroid == T) {
+        # 0.1 degree around country center
+        countryref <- referencecountries
+        conttest <- apply(dat, 1, function(x) .testcordcountr(x, countryref, contthresh))
+        dat$clean[which(conttest == FALSE)] <- FALSE
+        verb$countrycentroid <- conttest
+    }
+    if (capitalcoords == T) {
+        # 0.1 degree around country capital #TESTTHIS
+        countryref <- referencecountries
+        captest <- apply(dat, 1, function(x) .testcordcap(x, countryref, capthresh))
+        dat$clean[which(captest == FALSE)] <- FALSE
+        verb$capitalcoordinates <- captest
+    }
+    if (countrycheck == T) {
+        dat$XCOOR <- as.numeric(as.character(dat$XCOOR))
+        dat$YCOOR <- as.numeric(as.character(dat$YCOOR))
+
+        inp <- ReadPoints(dat[c("identifier", "XCOOR", "YCOOR")], polygons)
+
+        if (all(nchar(as.character(dat$country)) <= 2, na.rm = T)) {
+            contest <- SpGeoCodH(inp, areanames = "ISO2")
+        }
+        if (all(nchar(as.character(dat$country)) <= 3, na.rm = T) & !all(nchar(as.character(dat$country)) <= 2, na.rm = T)) {
+            contest <- SpGeoCodH(inp, areanames = "ISO3")
+        }
+        if (!all(nchar(as.character(dat$country)) <= 3, na.rm = T)) {
+            contest <- SpGeoCodH(inp, areanames = "NAME")
+            warning("found country information with more than 3 letters; Country information should be ISO2 or ISO3")
+        }
+
+        if (!length(as.character(dat$country)) == length(as.character(contest$sample_table[, 2]))) {
+            stop("coordinates include non-numerical or invalid elements; please check this before using the countrycheck argument")
+        } else {
+            verb$country.check <- as.character(dat$country) == as.character(contest$sample_table[, 2])
+            dat$clean[which(verb$country.check == FALSE)] <- FALSE
+            dat$clean[is.na(verb$country.check)] <- FALSE
+        }
+    }
+    if (verbose == T) {
+        # IF verbose == T give out data.frame instead of vector, where each test is a column
+        return(verb)
+    } else {
+        return(dat$clean)
+    }
+} 
diff --git a/R/GetElevation.R b/R/GetElevation.R
@@ -0,0 +1,39 @@
+GetElevation <- function(x) {
+    if (class(x) == "data.frame") {
+        inp <- x
+    }
+    if (class(x) == "spgeoIN" | class(x) == "spgeoOUT") {
+        inp <- data.frame(identifier = x$identifier, XCOOR = x$species_coordinates[, 1], YCOOR = x$species_coordinates[, 2])
+    }
+    if (class(x) == "character" & length(grep(".txt", x)) == 0) {
+      if (!requireNamespace("rgbif", quietly = TRUE)) {
+        stop("rgbif needed for species name option. Please install it.",
+             call. = FALSE)
+      }  
+      coords <- rgbif::occ_search(scientificName = x, return = "data", 
+                                  limit = 200000, hasCoordinate = T, spatialIssues = F,
+                                  fields = c("species", "decimalLongitude","decimalLatitude"))
+      coords <- do.call("rbind", coords)
+      names(coords) <- c("identifier", "XCOOR", "YCOOR")
+      coords <- data.frame(coords[complete.cases(coords),])
+        warning(paste(dim(inp)[1], "geo-referenced records found in GBIF; no data cleaning was performed", sep = " "))
+    }
+    if (class(x) == "character" & length(grep(".txt", x)) > 0) {
+        inp <- read.table(x, sep = "\t", header = T)
+        names(inp) <- c("identifier", "XCOOR", "YCOOR")
+    }
+
+    tt <- list()
+    for(i in 1:dim(inp)[1]){
+      tt[[i]] <- .getEle(inp[i,])
+    }
+
+    ele.vector <- suppressWarnings(as.numeric(unlist(tt)))
+
+    if (class(x) == "character" & length(grep(".txt", x)) == 0) {
+        ele.vector <- cbind(inp, ele.vector)
+        return(ele.vector)
+    } else {
+        return(ele.vector)
+    }
+} 
diff --git a/R/MapGrid.R b/R/MapGrid.R
@@ -0,0 +1,8 @@
+MapGrid <- function(rast, ...) {
+#     layout(matrix(c(1), 1, 1))
+#     par(mar = c(4, 4, 4, 4))
+    loadNamespace("raster")
+    plot(rast, ...)  #, col = colo)
+    maps::map("world", add = T, col = "grey")
+
+}