-
Notifications
You must be signed in to change notification settings - Fork 0
/
gbif_demo.R
103 lines (79 loc) · 4.08 KB
/
gbif_demo.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
##rgbif demo
library(rgbif)
#retrieving a machine readable key for gbif
key<-name_backbone(name="Pinus sylvestris")$usageKey
key<-name_backbone(name="Tracheophyta")$usageKey
#if naming issues are likely: get the accepted name first from taxize: POW, IPNI, etc...
library(taxize)
ppow<-get_pow("Pinus sylvestris", accepted = TRUE, rows = 1, messages=FALSE)
ppow_data<-pow_lookup(ppow[1])
key<-name_backbone(name=paste(ppow_data$meta$name))$usageKey
#retrieve data from gbif, for all options see: https://www.rdocumentation.org/packages/rgbif/versions/3.4.0/topics/occ_search
occ<-occ_search(taxonKey=key, country = "SE", year="1000,2021", fields="all", hasCoordinate = T, hasGeospatialIssue = F,limit=100)
#access the data
names(occ$data)
occ$data$country
occ$data$year
#download a study-extent, here: Sweden/Norrbotten
library(raster)
ext <- getData('GADM', country='SWE', level=1)
ext1<-subset(ext, ext$NAME_1=="Norrbotten")
occ<-occ_search(taxonKey=key, geometry=c(bbox(ext1)), year="1000,2021", fields="all", hasCoordinate = T, hasGeospatialIssue = F,limit=100)
#read the data
occ$data
#e.g. year of observation
occ$data$year
#bases of record
occ$data$basisOfRecord
#and other
names(occ$data)
#plot the data in space:
occ_points <- data.frame(x=occ$data$decimalLongitude,y=occ$data$decimalLatitude)
#convert to spatial points
occ_points <- SpatialPoints(occ_points, proj4string=CRS("+proj=longlat +datum=WGS84"))
#or spatial points dataframe
occ_points <- SpatialPointsDataFrame(occ_points, occ$data, proj4string=CRS("+proj=longlat +datum=WGS84"))
#plot the extent and all retrieved data
plot(ext1)
plot(occ_points, add=T)
#remove points outside the original polygon
ext1 <- spTransform(ext1,CRS("+proj=longlat +datum=WGS84"))
occ_points <- occ_points[!is.na(sp::over(occ_points, sp::geometry(ext1))), ]
#plot the extent and all retrieved data
plot(ext1)
plot(occ_points, add=T)
#Basis of record, as defined in our BasisOfRecord enum here https://gbif.github.io/gbif-api/apidocs/org/gbif/api/vocabulary/BasisOfRecord.html Acceptable values are:
#FOSSIL_SPECIMEN An occurrence record describing a fossilized specimen.
#HUMAN_OBSERVATION An occurrence record describing an observation made by one or more people.
#LITERATURE An occurrence record based on literature alone.
#LIVING_SPECIMEN An occurrence record describing a living specimen, e.g.
#MACHINE_OBSERVATION An occurrence record describing an observation made by a machine.
#OBSERVATION An occurrence record describing an observation.
#PRESERVED_SPECIMEN An occurrence record describing a preserved specimen.
#UNKNOWN Unknown basis for the record.
occ<-occ_search(taxonKey=key, geometry=c(bbox(ext1)), year="1000,2021", fields="all", basisOfRecord = "PRESERVED_SPECIMEN",hasCoordinate = T, hasGeospatialIssue = F,limit=100)
occ$data$basisOfRecord
#counting the number of observations based on different parameters see: https://www.rdocumentation.org/packages/rgbif/versions/3.3.0/topics/occ_count
occ_count(taxonKey=key, country="SE",basisOfRecord = "HUMAN_OBSERVATION", georeferenced = TRUE)
#by counting how many observations there are in total we can set a start parameter and retrieve the oldest 500 observations:
occ<-occ_search(taxonKey=key, country = "SE", fields=c('name','latitude','longitude','year'), basisOfRecord = "HUMAN_OBSERVATION",hasCoordinate = T, hasGeospatialIssue = F,
limit=500, year = "1500,2000",
start = as.numeric(occ_count(taxonKey=key, country="SE",basisOfRecord = "HUMAN_OBSERVATION", georeferenced = TRUE)-500))
occ
nrow(occ$data)
occ$data$year
min(na.exclude(occ$data$year))
#exemplary workaround for large datasets:
i<-1
while(is.null(occ$data$year)){
occ<-occ_search(taxonKey=key, country = "SE", fields="all", basisOfRecord = "HUMAN_OBSERVATION",hasCoordinate = T, hasGeospatialIssue = F,
limit=500,
start = as.numeric(occ_count(taxonKey=key, country="SE",basisOfRecord = "HUMAN_OBSERVATION", georeferenced = TRUE)-(500*i)))
print(i)
i<-i+1
}
occ
nrow(occ$data)
occ$data$year
min(na.exclude(occ$data$year))
#for other databases checkout: https://docs.ropensci.org/spocc/