In [1]:
import json
import os
import requests
import time
import urllib

### Helper functions

In [3]:
def get_nb_hits(json_response):
    return len(json_response['hits']['hits'])

def get_next_link(json_response):
    return json_response['links'].get('next', None)

def reget(url, params=None, **kwargs):
    """
    Sends a GET request and resends it with increasing delays
    when status code 429 (too many requests) is received.

    :param url: URL for the new :class:`Request` object.
    :param params: (optional) Dictionary, list of tuples or bytes to send
        in the query string for the :class:`Request`.
    :param **kwargs: Optional arguments that ``request`` takes.
    :return: :class:`Response <Response>` object
    :rtype: requests.Response
    """
    if params is None:
        print(f"URL: {url}")
    else:
        print(f"URL: {url}, params:")
        redacted_params = params
        del redacted_params['access_token'] # don't want to leak the token
        print(json.dumps(redacted_params, indent = 4))
    delay = 0.0
    while True:  
        response = requests.get(url, params=params, **kwargs)
        if response.status_code != 429: # not too many requests
            return response
        delay += 2
        print(f"delay: {delay}s to circumvent rate limiting...")
        time.sleep(delay)

### Query and display hitcount on page 1

The query string uses [elastic search syntax](https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-query-string-query.html#query-string-syntax).

In [9]:
query = ""
params = {
    'q': urllib.parse.quote(query),
    #'type': 'publication',
    'type': 'dataset',
    'communities': "coacch-co-designing-the-assessment-of-climate-change-costs-h2020-project",
    'size': 1, # hits per page
    'page': 1,
    'access_token': os.environ['ZENODO_API_TOKEN']
}
response = reget("https://zenodo.org/api/records", params = params)
assert response.status_code == 200 # success
j = response.json()
assert len(j['hits']['hits']) == 1 # yup, one hit on the first page

URL: https://zenodo.org/api/records, params:
{
    "q": "",
    "type": "dataset",
    "communities": "coacch-co-designing-the-assessment-of-climate-change-costs-h2020-project",
    "size": 1,
    "page": 1
}


### Pretty print JSON result

In [10]:
print(json.dumps(j, indent = 2))

{
  "aggregations": {
    "access_right": {
      "buckets": [
        {
          "doc_count": 31,
          "key": "open"
        },
        {
          "doc_count": 1,
          "key": "restricted"
        }
      ],
      "doc_count_error_upper_bound": 0,
      "sum_other_doc_count": 0
    },
    "file_type": {
      "buckets": [
        {
          "doc_count": 19,
          "key": "pdf"
        },
        {
          "doc_count": 11,
          "key": "csv"
        },
        {
          "doc_count": 5,
          "key": "xlsx"
        },
        {
          "doc_count": 1,
          "key": "7z"
        },
        {
          "doc_count": 1,
          "key": "docx"
        },
        {
          "doc_count": 1,
          "key": "zip"
        }
      ],
      "doc_count_error_upper_bound": 0,
      "sum_other_doc_count": 0
    },
    "keywords": {
      "buckets": [
        {
          "doc_count": 10,
          "key": "COACCH"
        },
        {
          "doc_count": 2,
        

### List URLs of files in the data-set and guess the metadata file

In [48]:
files = j['hits']['hits'][0]['files']
meta = None
for f in files:
    link = f['links']['self']
    if link.lower().find("metadata") >= 0 or link.lower().find("meta%20data") >= 0:
        meta = link
        print(f"{link} <-- metadata?")
    else:
        print(f"{link}")

https://zenodo.org/api/files/cd6c851e-1d5a-4cce-93b6-fdb73d3f9e4f/damage_coefficients-COACCH-WP4.xlsx
https://zenodo.org/api/files/cd6c851e-1d5a-4cce-93b6-fdb73d3f9e4f/damage_coefficients-ICES-COACCH-WP4.xlsx
https://zenodo.org/api/files/cd6c851e-1d5a-4cce-93b6-fdb73d3f9e4f/DFs_All_impacts.xlsx
https://zenodo.org/api/files/cd6c851e-1d5a-4cce-93b6-fdb73d3f9e4f/Metadata%20damage%20functions%20T4.3.csv <-- metadata?


#### Download the metadata file

In [51]:
meta is None
meta_name = meta.split("/")[-1]
meta_name

'Metadata%20damage%20functions%20T4.3.csv'