# A demo of data.biodiversity.aq webservices 

In [21]:
import requests
from urllib.parse import urljoin

BASE_URL = 'https://data.biodiversity.aq/api/v1.0/'
USERNAME = 'username@email.com'
PASSWORD = "my_password"

### Data Type

What are the types of data available in the database?

In [2]:
data_type_url = urljoin(BASE_URL, 'data-type')
r = requests.get(data_type_url)
r.json()

{'count': 4,
 'next': None,
 'previous': None,
 'results': [{'id': 4, 'dataType': 'Checklist'},
  {'id': 2, 'dataType': 'Metadata'},
  {'id': 1, 'dataType': 'Occurrence'},
  {'id': 3, 'dataType': 'Sampling Event'}]}

### Project

What are the projects which has the words `electron micrograph database` in the title?

In [9]:
project_url = urljoin(BASE_URL, 'project')
r = requests.get(project_url, params={'search': 'porifera'})
r.json()

{'count': 2,
 'next': None,
 'previous': None,
 'results': [{'id': 25,
   'title': 'Antarctic Porifera in the collection of the Italian National Antarctic Museum (MNA)',
   'funding': "The specimens were collected during different Antarctic expeditions funded by the Italian National Antarctic Research Program (PNRA). The complete list of research projects is reported here (in brackets is the project name or category under the PNRA program):\n•\t2.1.4.6 (Necton e risorse da pesca), III expedition (1987/1988)\n•\t2.1.4.3 (Oceanografia & Benthos), III expedition (1987/1988)\n•\t3.2.1.2.5 (Benthos), V expedition (1989/1990)\n•\t3.2.1.4 (Oceanografia geologica), V expedition (1989/1990)\n•\t2d.2 (Ecologia e biogeochimica dell'Oceano Meridionale), IX expedition (1993/1994)\n•\t2d.2 (Ecologia e biogeochimica dell'Oceano Meridionale), X expedition (1994/1995)\n•\t2d.2 (Ecologia e biogeochimica dell'Oceano Meridionale - ROSSMIZE), XI expedition (1995/1996)\n•\t2b.3 (Ecologia e biogeochimica del

What are the datasets associated with the projects returned from the query above?

In [10]:
results = r.json().get('results')
# get the project id so that we can search for datasets associated with these projects
project_ids = [project.get('id') for project in results]  # [40, 39]
dataset_url = urljoin(BASE_URL, 'dataset')
r = requests.get(dataset_url, params={'project': project_ids})
r.json()

{'count': 2,
 'next': None,
 'previous': None,
 'results': [{'id': 455,
   'title': 'Porifera collection of the Italian National Antarctic Museum (MNA) - Data',
   'abstract': 'This new dataset presents occurrence data for Porifera collected in the Ross Sea, mainly in the Terra Nova Bay area, and curated at the Italian National Antarctic Museum (MNA, section of Genoa). Specimens were collected in 331 different sampling stations at depths ranging from 17 to 1100 meters in the framework of 17 different Italian Antarctic expeditions funded by the Italian National Antarctic Research Program (PNRA). A total of 807 specimens, belonging to 144 morphospecies (i.e. 95 taxa identified at species level and 49 classified at least at the genus level), are included in the dataset. Nearly half (45%) of the species reported here correspond to species already known for Terra Nova Bay. Out of the remaining 55% previously unknown records, under a third (~29%) was classified at the specific level, while o

### Keyword

What are the keywords which has the words `EARTH SCIENCE`?

In [11]:
keyword_url = urljoin(BASE_URL, 'keyword')
r = requests.get(keyword_url, params={'search': 'EARTH SCIENCE'})
r.json()

{'count': 9,
 'next': None,
 'previous': None,
 'results': [{'id': 390,
   'keyword': 'AIRCRAFT\nCAMERAS\nEARTH SCIENCE > BIOLOGICAL CLASSIFICATION > ANIMALS/VERTEBRATES > MAMMALS > CETACEANS',
   'thesaurus': 'Global Change Master Directory (GCMD). 2018. GCMD Keywords, Version 8.6. Greenbelt, MD: Global Change Data Center, Science and Exploration Directorate, Goddard Space Flight Center (GSFC) National Aeronautics and Space Administration (NASA). URL (GCMD Keyword Forum Page): https://earthdata.nasa.gov/gcmd-forum'},
  {'id': 262,
   'keyword': 'EARTH SCIENCE',
   'thesaurus': 'Global Change Master Directory'},
  {'id': 494,
   'keyword': 'EARTH SCIENCE',
   'thesaurus': 'GBIF Dataset Type Vocabulary: http://rs.gbif.org/vocabulary/gbif/dataset_type.xml'},
  {'id': 186,
   'keyword': 'EARTH SCIENCE',
   'thesaurus': 'GCMD Earth Science and Earth Science Services'},
  {'id': 338,
   'keyword': 'EARTH SCIENCE > BIOLOGICAL CLASSIFICATION > ANIMALS/VERTEBRATES > MAMMALS > CETACEANS\nEARTH 

In [12]:
# get the IDs of these keywords so that we could search Dataset flagged by these keyword
results = r.json().get('results')
keyword_ids = [keyword.get('id') for keyword in results]
keyword_ids

[390, 262, 494, 186, 338, 324, 402, 437, 391]

What are the datasets associated with the keyword returned from the query above `AND` is a `Occurrence` dataset?

In [13]:
dataset_url = urljoin(BASE_URL, 'dataset')
r = requests.get(dataset_url, params={'keyword': keyword_ids, 'dataType': 1})
r.json()

{'count': 9,
 'next': None,
 'previous': None,
 'results': [{'id': 481,
   'title': 'Seabird Atlas of South-eastern Australian Waters 1975-1993',
   'abstract': 'Seabird Atlas of South-eastern Australian Waters derived from multiple sources. Data from paper by Reid, T.A., Hindell, M.A., Eades, D.W. and Newman, M. 2002 Seabird Atlas of South-eastern Australian Waters, Birds Australia Monograph 4, Birds Australia, Melbourne. The following surveys/voyages have been removed as they exist in the existing OBIS dataset ANARE WoV - WOCE91, geosci , 1988/89 v4, 1989/90 v3, fishog and Thirst',
   'pubDate': '2018-11-13',
   'intellectualRight': 'Creative Commons Attribution Non Commercial (CC-BY-NC) 4.0 License',
   'doi': '10.15468/zpijum',
   'alternateIdentifiers': ['10.15468/zpijum',
    'http://ogc-act.csiro.au/ipt/resource?r=seabird_atlas_southeast_australia',
    '8c748691-6253-43fc-a851-86da40fa0021',
    'http://ogc-act.csiro.au/ipt/resource.do?r=seabird_atlas_southeast_australia'],
   

### Basis of record

What are all the `basisOfRecord` of all Occurrences in the database?

In [14]:
basis_of_record_url = urljoin(BASE_URL, 'basis-of-record')
r = requests.get(basis_of_record_url)
r.json()

{'count': 8,
 'next': None,
 'previous': None,
 'results': [{'id': 8, 'basisOfRecord': 'FOSSIL_SPECIMEN'},
  {'id': 2, 'basisOfRecord': 'HUMAN_OBSERVATION'},
  {'id': 7, 'basisOfRecord': 'LIVING_SPECIMEN'},
  {'id': 1, 'basisOfRecord': 'MACHINE_OBSERVATION'},
  {'id': 6, 'basisOfRecord': 'MATERIAL_SAMPLE'},
  {'id': 3, 'basisOfRecord': 'OBSERVATION'},
  {'id': 5, 'basisOfRecord': 'PRESERVED_SPECIMEN'},
  {'id': 4, 'basisOfRecord': 'UNKNOWN'}]}

### Occurrence

What are the Occurrences which has `depth between 6000 - 6200 meters`?

In [15]:
occurrence_url = urljoin(BASE_URL, 'occurrence')
r = requests.get(occurrence_url, params={'depth_min': 6000, 'depth_max':6200, 'limit': 1})
r.json()

{'count': 32,
 'next': 'http://data.biodiversity.aq/api/v1.0/occurrence/?depth_max=6200&depth_min=6000&limit=1&offset=1',
 'previous': None,
 'results': [{'id': 10980900,
   'license': 'CC0_1_0',
   'rightsHolder': '',
   'accessRights': '',
   'bibliographicCitation': '',
   'references': '',
   'institutionCode': 'NHMUK',
   'collectionCode': 'ZOO',
   'datasetName': 'Natural History Museum (London) Collection Specimens',
   'dynamicProperties': '{"gbifissue": ["COORDINATE_ROUNDED", "GEODETIC_DATUM_ASSUMED_WGS84"], "created": 1314122435000, "registrationcode": "CR07", "barcode": "010996440", "donorname": "M H. Thurston", "associatedmediacount": 1, "determinationnames": "Euandania gigantea Stebbing, 1888", "subdepartment": "Crustacea", "vessel": "Walter Herwig", "gbifid": 1056282003}',
   'recordedBy': 'M H. Thurston',
   'individualCount': '1',
   'organismQuantity': '',
   'organismQuantityType': '',
   'sex': '',
   'lifeStage': '',
   'occurrenceStatus': '',
   'reproductiveCondit

What are the Occurrences which has `depth between 6000 - 6200 meters` AND are `preserved specimen`?

In [20]:
occurrence_url = urljoin(BASE_URL, 'occurrence')
r = requests.get(occurrence_url, params={'depth_min': 6000, 'depth_max':6200, 'basis_of_record': 5, 'limit': 1})
r.json()

{'count': 4,
 'next': 'http://data.biodiversity.aq/api/v1.0/occurrence/?basis_of_record=5&depth_max=6200&depth_min=6000&limit=1&offset=1',
 'previous': None,
 'results': [{'id': 8386536,
   'license': 'CC_BY_4_0',
   'rightsHolder': '',
   'accessRights': '',
   'bibliographicCitation': '',
   'references': '',
   'institutionCode': '',
   'collectionCode': '',
   'datasetName': 'Antarctic Echinoids: an interactive database - David et al., 2005a',
   'dynamicProperties': '',
   'recordedBy': '',
   'individualCount': '',
   'organismQuantity': '',
   'organismQuantityType': '',
   'sex': '',
   'lifeStage': '',
   'occurrenceStatus': '',
   'reproductiveCondition': '',
   'behavior': '',
   'occurrenceRemarks': '',
   'eventDate': '1968-01-01T00:00:00Z',
   'eventTime': '',
   'year': 1968,
   'month': None,
   'day': None,
   'verbatimEventDate': '',
   'samplingProtocol': '',
   'sampleSizeValue': '',
   'sampleSizeUnit': '',
   'samplingEffort': '',
   'fieldNotes': '',
   'locality

### Download

A zip file with a csv text file of occurrence records queried will be generated asynchronously when a Download request 
is sent. An email will be sent to you once the file is ready for download. A valid user account is required for this 
request.

#### Create a Download request

In [19]:
download_url = urljoin(BASE_URL, 'download/')
# download request need to accept a dict with the key "query" and value is the dict with Occurrence query parameters and its values
data = {'query': {'depth_min': 6000, 'depth_max':6200, 'basis_of_record': 5}}
download_request = requests.post(download_url, json=data, auth=(USERNAME, PASSWORD))
download_request.json()

{'id': 415,
 'status': 'PENDING',
 'downloadLink': '',
 'created': '2020-02-11T07:06:42.335575',
 'recordCount': None,
 'query': {'depthMin': 6000, 'depthMax': 6200, 'basisOfRecord': 5}}

#### To check the detail of a particular Download request

In [18]:
# to query about the download status of a specific download
download_id = download_request.json().get('id')
download_url = urljoin(BASE_URL, 'download/{}/'.format(download_id)) 
r = requests.get(download_url, auth=(USERNAME, PASSWORD))
r.json()

{'id': 413,
 'status': 'SUCCESS',
 'downloadLink': 'http://data.biodiversity.aq/download/request/413/',
 'created': '2020-02-11T06:54:40.614741',
 'recordCount': 4,
 'query': {'depthMin': 6000, 'depthMax': 6200, 'basisOfRecord': 5}}