In [95]:
import json
import os
import requests
import time
import urllib
import yaml

### Helper functions

In [96]:
def get_nb_hits(json_response):
    return len(json_response['hits']['hits'])

def get_next_link(json_response):
    return json_response['links'].get('next', None)

def reget(url, params=None, **kwargs):
    """
    Sends a GET request and resends it with increasing delays
    when status code 429 (too many requests) is received.

    :param url: URL for the new :class:`Request` object.
    :param params: (optional) Dictionary, list of tuples or bytes to send
        in the query string for the :class:`Request`.
    :param **kwargs: Optional arguments that ``request`` takes.
    :return: :class:`Response <Response>` object
    :rtype: requests.Response
    """
    if params is None:
        print(f"URL: {url}")
    else:
        print(f"URL: {url}, params:")
        redacted_params = params
        del redacted_params['access_token'] # don't want to leak the token
        print(json.dumps(redacted_params, indent = 4))
    delay = 0.0
    while True:  
        response = requests.get(url, params=params, **kwargs)
        if response.status_code != 429: # not too many requests
            return response
        delay += 2
        print(f"delay: {delay}s to circumvent rate limiting...")
        time.sleep(delay)

### Query with one page per hit

The query string uses [elastic search syntax](https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-query-string-query.html#query-string-syntax).

In [97]:
query = ""
params = {
    'q': urllib.parse.quote(query),
    'type': 'publication',
    #'type': 'dataset',
    'communities': "coacch-co-designing-the-assessment-of-climate-change-costs-h2020-project",
    'size': 1, # hits per page
    'page': 1,
    'access_token': os.environ['ZENODO_API_TOKEN']
}
response = reget("https://zenodo.org/api/records", params = params)
assert response.status_code == 200 # success
j = response.json()
assert len(j['hits']['hits']) == 1 # yup, one hit on the first page

URL: https://zenodo.org/api/records, params:
{
    "q": "",
    "type": "publication",
    "communities": "coacch-co-designing-the-assessment-of-climate-change-costs-h2020-project",
    "size": 1,
    "page": 1
}


### List root-level keys and value types of the JSON result dictionary

In [98]:
for key in j:
    print(f"'{key}' {type(j[key])}")

'aggregations' <class 'dict'>
'hits' <class 'dict'>
'links' <class 'dict'>


### Show aggregations (aggregate data over all query hits, presumably)

In [99]:
print(yaml.dump(j['aggregations']))

access_right:
  buckets:
  - doc_count: 21
    key: open
  - doc_count: 1
    key: restricted
  doc_count_error_upper_bound: 0
  sum_other_doc_count: 0
file_type:
  buckets:
  - doc_count: 19
    key: pdf
  - doc_count: 1
    key: csv
  - doc_count: 1
    key: docx
  - doc_count: 1
    key: xlsx
  - doc_count: 1
    key: zip
  doc_count_error_upper_bound: 0
  sum_other_doc_count: 0
keywords:
  buckets:
  - doc_count: 1
    key: COACCH
  - doc_count: 1
    key: Climate change mitigation; Electricity; Europe; Risk; Capital costs
  - doc_count: 1
    key: Climate-change impacts, Geology
  - doc_count: 1
    key: Coastal cities
  - doc_count: 1
    key: Damage risk
  - doc_count: 1
    key: ERA-5
  - doc_count: 1
    key: Ice-sheet melting risk
  - doc_count: 1
    key: Probabilistic projections
  - doc_count: 1
    key: Regional sea-level rise
  - doc_count: 1
    key: Regional sea-level rise; Coastal cities; Damage risk; Probabilistic projections;
      Ice-sheet melting risk; Unmitigate

### Show links (navigation links on this page, presumably)

In [100]:
print(yaml.dump(j['links']))

next: https://zenodo.org/api/records/?sort=mostrecent&q=&communities=coacch-co-designing-the-assessment-of-climate-change-costs-h2020-project&type=publication&page=2&size=1
self: https://zenodo.org/api/records/?sort=mostrecent&q=&communities=coacch-co-designing-the-assessment-of-climate-change-costs-h2020-project&type=publication&page=1&size=1



### List root-level keys and value types of the 'hits' sub dictionary

Presumably, this is a list of hits returned on the current page as well as a total number of hits, which could have been classified as an 'aggregation' entry.

In [101]:
for key in j['hits']:
    print(f"'{key}' {type(j['hits'][key])}")

'hits' <class 'list'>
'total' <class 'int'>


### Show hit (present on this page, presumably)

Since the page size is 1, we can zero-index the single hit on this page.

In [102]:
print(yaml.dump(j['hits']['hits'][0]))

conceptrecid: '4627388'
created: '2021-03-22T14:49:58.807034+00:00'
doi: 10.5194/nhess-21-1011-2021
files:
- bucket: ddafa051-80e7-4075-bd90-004dacd76e89
  checksum: md5:95b81a41673954a8ae351ab0a01b5cf9
  key: nhess-21-1011-2021.pdf
  links:
    self: https://zenodo.org/api/files/ddafa051-80e7-4075-bd90-004dacd76e89/nhess-21-1011-2021.pdf
  size: 5622111
  type: pdf
id: 4627389
links:
  badge: https://zenodo.org/badge/doi/10.5194/nhess-21-1011-2021.svg
  bucket: https://zenodo.org/api/files/ddafa051-80e7-4075-bd90-004dacd76e89
  doi: https://doi.org/10.5194/nhess-21-1011-2021
  html: https://zenodo.org/record/4627389
  latest: https://zenodo.org/api/records/4627389
  latest_html: https://zenodo.org/record/4627389
  self: https://zenodo.org/api/records/4627389
metadata:
  access_right: open
  access_right_category: success
  communities:
  - id: coacch-co-designing-the-assessment-of-climate-change-costs-h2020-project
  creators:
  - affiliation: Deltares, VU University
    name: van Gin

### Define a function to custom-process each hit

In [127]:
def process_hit(hit):
    print(f"Title: {hit['metadata']['title']}")
    # Define a templatedReST page for  the hit,
    page = f"""
`{hit['metadata']['title']} <{hit['links']['html']}>`_
{'=' * (len(hit['metadata']['title']) + len(hit['links']['html']) + 6)}

.. image:: {hit['links']['badge']}
   :target: {hit['links']['doi']}

Description:
------------

.. raw:: html

    <embed>
        {hit['metadata']['description']}
    </embed>
    
Socio-economic and environmental impacts of gradual climate change on agriculture, forestry and fisheries calculated using the GLOBIOM model

Authors: Boere, Esther

.. meta::
   :keywords: gradual climate change, agriculture, forestry, partial-equilibrium, socio-economic, COACCH
    """
    # Write ReST page, basing the filename on the Zenodo ID
    with open(f"../docs/publications/{hit['id']}.rst", "w") as rst:
        rst.write(page)

### Process each hit, getting the next page/hit every iteration

In [128]:
# Process initial hit
process_hit(j['hits']['hits'][0])
processed_hits = get_nb_hits(j)
# Process next hits/pages until they exhaust
jn = j
while get_next_link(jn): 
    next_response = reget(get_next_link(jn))
    assert next_response.status_code == 200 # success
    jn = next_response.json()
    process_hit(jn['hits']['hits'][0])
    processed_hits += get_nb_hits(jn)
# Check that processed hits equals total hits
assert processed_hits == j['hits']['total']

Title: Flood risk assessment of the European road network
URL: https://zenodo.org/api/records/?sort=mostrecent&q=&communities=coacch-co-designing-the-assessment-of-climate-change-costs-h2020-project&type=publication&page=2&size=1
Title: A global analysis of subsidence, relative sea-level change and coastal flood exposure
URL: https://zenodo.org/api/records/?sort=mostrecent&q=&communities=coacch-co-designing-the-assessment-of-climate-change-costs-h2020-project&type=publication&page=3&size=1
Title: Regional Inequalities in Flood Insurance Affordability and Uptake under Climate Change
URL: https://zenodo.org/api/records/?sort=mostrecent&q=&communities=coacch-co-designing-the-assessment-of-climate-change-costs-h2020-project&type=publication&page=4&size=1
Title: The ongoing nutrition transition thwarts long-term targets for food security, public health and environmental protection
URL: https://zenodo.org/api/records/?sort=mostrecent&q=&communities=coacch-co-designing-the-assessment-of-clima

UnicodeEncodeError: 'charmap' codec can't encode character '\u2010' in position 1097: character maps to <undefined>

In [118]:
len("foo")

3