In [137]:
import json
import os
import requests
import time
import urllib
import yaml

### Helper functions

In [138]:
def get_nb_hits(json_response):
    return len(json_response['hits']['hits'])

def get_next_link(json_response):
    return json_response['links'].get('next', None)

def reget(url, params=None, **kwargs):
    """
    Sends a GET request and resends it with increasing delays
    when status code 429 (too many requests) is received.

    :param url: URL for the new :class:`Request` object.
    :param params: (optional) Dictionary, list of tuples or bytes to send
        in the query string for the :class:`Request`.
    :param **kwargs: Optional arguments that ``request`` takes.
    :return: :class:`Response <Response>` object
    :rtype: requests.Response
    """
    if params is None:
        print(f"URL: {url}")
    else:
        print(f"URL: {url}, params:")
        redacted_params = params
        del redacted_params['access_token'] # don't want to leak the token
        print(json.dumps(redacted_params, indent = 4))
    delay = 0.0
    while True:  
        response = requests.get(url, params=params, **kwargs)
        if response.status_code != 429: # not too many requests
            return response
        delay += 2
        print(f"delay: {delay}s to circumvent rate limiting...")
        time.sleep(delay)

### Query with one page per hit

The query string uses [elastic search syntax](https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-query-string-query.html#query-string-syntax).

In [139]:
query = ""
params = {
    'q': urllib.parse.quote(query),
    'type': 'publication',
    #'type': 'dataset',
    'communities': "coacch-co-designing-the-assessment-of-climate-change-costs-h2020-project",
    'size': 1, # hits per page
    'page': 1,
    'access_token': os.environ['ZENODO_API_TOKEN']
}
response = reget("https://zenodo.org/api/records", params = params)
assert response.status_code == 200 # success
j = response.json()
assert len(j['hits']['hits']) == 1 # yup, one hit on the first page

URL: https://zenodo.org/api/records, params:
{
    "q": "COACCH",
    "type": "dataset",
    "size": 1,
    "page": 1
}


### List root-level keys and value types of the JSON result dictionary

In [140]:
for key in j:
    print(f"'{key}' {type(j[key])}")

'aggregations' <class 'dict'>
'hits' <class 'dict'>
'links' <class 'dict'>


### Show aggregations (aggregate data over all query hits, presumably)

In [141]:
print(yaml.dump(j['aggregations']))

access_right:
  buckets:
  - doc_count: 21
    key: open
  - doc_count: 1
    key: restricted
  doc_count_error_upper_bound: 0
  sum_other_doc_count: 0
file_type:
  buckets:
  - doc_count: 19
    key: pdf
  - doc_count: 1
    key: csv
  - doc_count: 1
    key: docx
  - doc_count: 1
    key: xlsx
  - doc_count: 1
    key: zip
  doc_count_error_upper_bound: 0
  sum_other_doc_count: 0
keywords:
  buckets:
  - doc_count: 1
    key: COACCH
  - doc_count: 1
    key: Climate change mitigation; Electricity; Europe; Risk; Capital costs
  - doc_count: 1
    key: Climate-change impacts, Geology
  - doc_count: 1
    key: Coastal cities
  - doc_count: 1
    key: Damage risk
  - doc_count: 1
    key: ERA-5
  - doc_count: 1
    key: Ice-sheet melting risk
  - doc_count: 1
    key: Probabilistic projections
  - doc_count: 1
    key: Regional sea-level rise
  - doc_count: 1
    key: Regional sea-level rise; Coastal cities; Damage risk; Probabilistic projections;
      Ice-sheet melting risk; Unmitigate

### Show links (navigation links on this page, presumably)

In [142]:
print(yaml.dump(j['links']))

next: https://zenodo.org/api/records/?sort=bestmatch&q=COACCH&type=dataset&page=2&size=1
self: https://zenodo.org/api/records/?sort=bestmatch&q=COACCH&type=dataset&page=1&size=1



### List root-level keys and value types of the 'hits' sub dictionary

Presumably, this is a list of hits returned on the current page as well as a total number of hits, which could have been classified as an 'aggregation' entry.

In [143]:
for key in j['hits']:
    print(f"'{key}' {type(j['hits'][key])}")

'hits' <class 'list'>
'total' <class 'int'>


### Show hit (present on this page, presumably)

Since the page size is 1, we can zero-index the single hit on this page.

In [144]:
print(yaml.dump(j['hits']['hits'][0]))

conceptdoi: 10.5281/zenodo.5513870
conceptrecid: '5513870'
created: '2021-09-17T14:34:15.149343+00:00'
doi: 10.5281/zenodo.5513871
files:
- bucket: 4406e796-3221-4985-925e-ea43cd89ac2f
  checksum: md5:e0bca547aed38047522dfb6f5d48e409
  key: IIASA_ImpactChains_GLOBIOM_Data.csv
  links:
    self: https://zenodo.org/api/files/4406e796-3221-4985-925e-ea43cd89ac2f/IIASA_ImpactChains_GLOBIOM_Data.csv
  size: 45626665
  type: csv
- bucket: 4406e796-3221-4985-925e-ea43cd89ac2f
  checksum: md5:0c6a86f2c0bacaa674db1eb18b3fe2db
  key: ImpactChains_GLOBIOM_MetaData.csv
  links:
    self: https://zenodo.org/api/files/4406e796-3221-4985-925e-ea43cd89ac2f/ImpactChains_GLOBIOM_MetaData.csv
  size: 1705
  type: csv
id: 5513871
links:
  badge: https://zenodo.org/badge/doi/10.5281/zenodo.5513871.svg
  bucket: https://zenodo.org/api/files/4406e796-3221-4985-925e-ea43cd89ac2f
  conceptbadge: https://zenodo.org/badge/doi/10.5281/zenodo.5513870.svg
  conceptdoi: https://doi.org/10.5281/zenodo.5513870
  doi: 

### Define a function to custom-process each hit

In [129]:
def process_hit(hit):
    print(f"Title: {hit['metadata']['title']}")
    # Define a templatedReST page for  the hit,
    page = f"""
`{hit['metadata']['title']} <{hit['links']['html']}>`_
{'=' * (len(hit['metadata']['title']) + len(hit['links']['html']) + 6)}

.. image:: {hit['links']['badge']}
   :target: {hit['links']['doi']}

Description:
------------

.. raw:: html

    <embed>
        {hit['metadata']['description']}
    </embed>
    
Socio-economic and environmental impacts of gradual climate change on agriculture, forestry and fisheries calculated using the GLOBIOM model

Authors:
--------
{'; '.join([creator['name'] for creator in hit['metadata']['creators']])}

.. meta::
   :keywords: {'' if 'keywords' not in hit['metadata'] else ', '.join([keyword for keyword in hit['metadata']['keywords']])}
    """
    # Write ReST page, basing the filename on the Zenodo ID
    with open(f"../docs/publications/{hit['id']}.rst", "w", encoding = 'utf-8') as rst:
        rst.write(page)

### Process each hit, getting the next page/hit every iteration

In [130]:
# Process initial hit
process_hit(j['hits']['hits'][0])
processed_hits = get_nb_hits(j)
# Process next hits/pages until they exhaust
jn = j
while get_next_link(jn): 
    next_response = reget(get_next_link(jn))
    assert next_response.status_code == 200 # success
    jn = next_response.json()
    process_hit(jn['hits']['hits'][0])
    processed_hits += get_nb_hits(jn)
# Check that processed hits equals total hits
assert processed_hits == j['hits']['total']

Title: Flood risk assessment of the European road network
URL: https://zenodo.org/api/records/?sort=mostrecent&q=&communities=coacch-co-designing-the-assessment-of-climate-change-costs-h2020-project&type=publication&page=2&size=1
Title: A global analysis of subsidence, relative sea-level change and coastal flood exposure
URL: https://zenodo.org/api/records/?sort=mostrecent&q=&communities=coacch-co-designing-the-assessment-of-climate-change-costs-h2020-project&type=publication&page=3&size=1
Title: Regional Inequalities in Flood Insurance Affordability and Uptake under Climate Change
URL: https://zenodo.org/api/records/?sort=mostrecent&q=&communities=coacch-co-designing-the-assessment-of-climate-change-costs-h2020-project&type=publication&page=4&size=1
Title: The ongoing nutrition transition thwarts long-term targets for food security, public health and environmental protection
URL: https://zenodo.org/api/records/?sort=mostrecent&q=&communities=coacch-co-designing-the-assessment-of-clima

In [162]:
hit = j['hits']['hits'][0]
hit['metadata'].keys()

dict_keys(['access_right', 'access_right_category', 'communities', 'creators', 'description', 'doi', 'grants', 'keywords', 'license', 'publication_date', 'related_identifiers', 'relations', 'resource_type', 'title', 'version'])

In [163]:
'' if 'keywords' not in hit['metadata'] else ', '.join([keyword for keyword in hit['metadata']['keywords']])

'gradual climate change, agriculture, forestry, partial-equilibrium, socio-economic, COACCH'