Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Verbatim extensions #141

Closed
wants to merge 3 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions docs/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,7 @@
# This is also used if you do content translation via gettext catalogs.
# Usually you set "language" from the command line for these cases.
language = 'en'
language = 'en'

# There are two options for replacing |today|: either, you set today to some
# non-false value, then it is used:
Expand Down
48 changes: 42 additions & 6 deletions pygbif/occurrences/download.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,9 +23,15 @@
# how to parse arguments/predicates
def _parse_args(x):
x = x.replace("'", '"')
tmp = re.split("\s", x)
tmp = re.split(r"\s", x)
key = key_lkup.get(tmp[0])
# check special predicates
if re.search(r"verbatimExtensions", x):
if len(tmp) != 3:
raise ValueError("Please use form 'verbatimExtensions = [...]'")
if "[" not in tmp[2] or "]" not in tmp[2]:
raise ValueError("Please use bracekts for your verbatimExtensions [...]'")
return {"verbatimExtensions" : tmp[2]}
if re.search(r"Null|NULL|null", x):
pred_type = "isNull"
if re.search(r"not|\!", x):
Expand All @@ -45,6 +51,7 @@ def _parse_args(x):
else:
return {"type": "in", "key": key, "values": json.loads(value_list.group(0))}
pred_type = operator_lkup.get(tmp[1])

return {
"type": pred_type,
"key": key,
Expand Down Expand Up @@ -74,7 +81,7 @@ def _check_environ(variable, value):

# download function
def download(
queries, format="SIMPLE_CSV", user=None, pwd=None, email=None, pred_type="and"
queries, format="SIMPLE_CSV", user=None, pwd=None, email=None, pred_type="and", prep=False
):
"""
Spin up a download request for GBIF occurrence data.
Expand All @@ -93,6 +100,7 @@ def download(
Set in your env vars with the option ``GBIF_PWD``
:param email: (character) Email address to receive download notice done
email. Required. Set in your env vars with the option ``GBIF_EMAIL``
:param prep: (logical) If True, the function will only prepare the download, but not execute it. Default: False.

Argument passed have to be passed as characters (e.g., ``country = US``),
with a space between key (``country``), operator (``=``), and value (``US``).
Expand Down Expand Up @@ -261,17 +269,19 @@ def download(
if isinstance(queries, str):
queries = [queries]

keyval = [_parse_args(z) for z in queries]
keyval = req.chk_vrb_ext([_parse_args(z) for z in queries])

# USE GBIFDownload class to set up the predicates
req.main_pred_type = pred_type
for predicate in keyval:
req.add_predicate_dict(predicate)

out = req.post_download(user, pwd)
if(prep) :
out = "download request prepared, but not executed."
else :
out = req.post_download(user, pwd)
return out, req.payload


class GbifDownload(object):
def __init__(self, creator, email, polygon=None):
"""class to setup a JSON doc with the query and POST a request
Expand Down Expand Up @@ -396,6 +406,32 @@ def add_predicate(self, key, value, predicate_type="equals"):
else:
raise Exception("predicate type not a valid operator")

def chk_vrb_ext(self, keyval):
"""
Checks for verbatimExtensions in the keyval list and sets the verbatimExtensions attribute.
If the format is DWCA, the verbatimExtensions are added to the payload as a JSON object.

:param keyval: list of dictionaries with the predicates
"""
ve = []
kv = []
for k in keyval:
if 'verbatimExtensions' in k:
ve.append(k.pop('verbatimExtensions', None))
if k:
kv.append(k)
if len(ve) == 0 :
return kv
if len(ve) != 1 :
raise ValueError("Only one verbatimExtensions expression is allowed")
else :
self.verbatimExtensions = ve[0]
if(self.format != "DWCA") :
raise ValueError("verbatimExtensions are only allowed for DWCA format")
if(self.verbatimExtensions is not None) :
self.payload["verbatimExtensions"] = json.loads(ve[0])
return kv

def add_predicate_dict(self, predicate_dictionary):
"""
allows for nested queries and will take a predicate and add it to a list of predicates
Expand Down Expand Up @@ -471,7 +507,6 @@ def post_download(self, user=None, pwd=None):
user = _check_environ("GBIF_USER", user)
pwd = _check_environ("GBIF_PWD", pwd)

# pprint.pprint(self.payload)
r = requests.post(
self.url,
auth=requests.auth.HTTPBasicAuth(user, pwd),
Expand Down Expand Up @@ -733,6 +768,7 @@ def download_get(key, path=".", **kwargs):
"userCountry": "USER_COUNTRY",
"verbatimScientificName": "VERBATIM_SCIENTIFIC_NAME",
"waterBody": "WATER_BODY",
"verbatimExtensions" : "verbatimExtensions"
}

formats = ["SIMPLE_CSV", "SIMPLE_PARQUET", "DWCA", "SPECIES_LIST", "SIMPLE_AVRO"]
10 changes: 10 additions & 0 deletions test/test-occurrences-download_request.py
Original file line number Diff line number Diff line change
Expand Up @@ -195,3 +195,13 @@ def test_geometry_predicate(self):
payload["predicate"]["predicates"][0],
{"type": "within", "geometry": "POLYGON((-82.7 36.9, -85.0 35.6, -81.0 33.5, -79.4 36.3, -79.4 36.3, -82.7 36.9))"},
)
def test_verbatim_extensions(self):
dl_key, payload = download(
["verbatimExtensions = ['http://rs.gbif.org/terms/1.0/DNADerivedData','http://rs.tdwg.org/dwc/terms/MeasurementOrFact']"],
user="dummy", email="dummy", pwd="dummy", format="DWCA"
)

self.assertListEqual(
payload["verbatimExtensions"],
['http://rs.gbif.org/terms/1.0/DNADerivedData','http://rs.tdwg.org/dwc/terms/MeasurementOrFact'],
)
Loading