# Trial Zenodo API and processing of COACCH metadata

In [1]:
import unicodecsv as csv
import io
import os
import pandas as pd
import urllib
import yaml

from pprint import pprint
from zenodo_helpers import *

### Query and check that we got all hits on the first page. List the IDs.

The query string uses [elastic search syntax](https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-query-string-query.html#query-string-syntax).

In [2]:
query = "Climate change impacts on energy demand"
hits_per_page = 100
params = {
    'q': urllib.parse.quote(query),
    #'type': 'publication',
    'type': 'dataset',
    'communities': "coacch-co-designing-the-assessment-of-climate-change-costs-h2020-project",
    'size': hits_per_page,
    'page': 1,
    'access_token': os.environ['ZENODO_API_TOKEN']
}
response = reget("https://zenodo.org/api/records", params = params)
assert response.status_code == 200 # success
j = response.json()
hits = j['hits']['hits']
assert len(hits) < hits_per_page
print(f"{len(hits)} hits on the page.")
pprint([hit['id'] for hit in hits])

URL: https://zenodo.org/api/records, params:
{
    "q": "Climate%20change%20impacts%20on%20energy%20demand",
    "type": "dataset",
    "communities": "coacch-co-designing-the-assessment-of-climate-change-costs-h2020-project",
    "size": 100,
    "page": 1
}
13 hits on the page.
[4733499,
 5530146,
 5546248,
 5546264,
 5541894,
 5513871,
 5530237,
 5541337,
 5529732,
 5534190,
 5529888,
 5549953,
 5541327]


### Pretty print first hit

In [3]:
print(json.dumps(j['hits']['hits'][0], indent = 2))

{
  "conceptdoi": "10.5281/zenodo.4733498",
  "conceptrecid": "4733498",
  "created": "2021-05-03T15:18:08.786942+00:00",
  "doi": "10.5281/zenodo.4733499",
  "files": [
    {
      "bucket": "cca08a1a-ad35-489a-8049-bfe951fda371",
      "checksum": "md5:bf691c1dd7eae01fdfbaa63f5b5c028b",
      "key": "Abadie et al_2020_Additional scenarios.xlsx",
      "links": {
        "self": "https://zenodo.org/api/files/cca08a1a-ad35-489a-8049-bfe951fda371/Abadie%20et%20al_2020_Additional%20scenarios.xlsx"
      },
      "size": 549054,
      "type": "xlsx"
    }
  ],
  "id": 4733499,
  "links": {
    "badge": "https://zenodo.org/badge/doi/10.5281/zenodo.4733499.svg",
    "bucket": "https://zenodo.org/api/files/cca08a1a-ad35-489a-8049-bfe951fda371",
    "conceptbadge": "https://zenodo.org/badge/doi/10.5281/zenodo.4733498.svg",
    "conceptdoi": "https://doi.org/10.5281/zenodo.4733498",
    "doi": "https://doi.org/10.5281/zenodo.4733499",
    "html": "https://zenodo.org/record/4733499",
    "lates

### Get the URL of the DOI badge of the first hit

In [6]:
hit = j['hits']['hits'][0]
print(hit['id'])
hit['links']['badge']

4733499


'https://zenodo.org/badge/doi/10.5281/zenodo.4733499.svg'

### Download meta data as an in-memory binary-file-like object

In [7]:
r = reget(hit['links']['badge'])
meta_data = io.BytesIO(r.content)
r.content

URL: https://zenodo.org/badge/doi/10.5281/zenodo.4733499.svg


b'<svg xmlns="http://www.w3.org/2000/svg"\n     width="186" height="20">\n        <linearGradient id="b" x2="0" y2="100%">\n            <stop offset="0" stop-color="#bbb" stop-opacity=".1"/>\n            <stop offset="1" stop-opacity=".1"/>\n        </linearGradient>\n        <mask id="a" width="186" height="20">\n            <rect width="186" height="20" rx="3"\n            fill="#fff"/>\n        </mask>\n        <g mask="url(#a)">\n            <path fill="#555" d="M0 0h31v20H0z" />\n            <path fill="#007ec6"\n            d="M31 0h155v20H31z"\n            />\n            <path fill="url(#b)" d="M0 0h186v20H0z" />\n        </g>\n        <g fill="#fff" text-anchor="middle" font-family="DejaVu Sans,\n        Verdana,Geneva,sans-serif" font-size="11">\n            <text x="16" y="15" fill="#010101"\n            fill-opacity=".3">\n                DOI\n            </text>\n            <text x="16" y="14">\n                DOI\n            </text>\n            <text x="108"\n        