# Trial Zenodo API and processing of COACCH metadata

In [1]:
import unicodecsv as csv
import io
import os
import pandas as pd
import urllib
import yaml

from pprint import pprint
from zenodo_helpers import *

### Query and check that we got all hits on the first page. List the IDs.

The query string uses [elastic search syntax](https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-query-string-query.html#query-string-syntax).

In [2]:
query = "Climate change impacts on energy demand"
hits_per_page = 100
params = {
    'q': urllib.parse.quote(query),
    #'type': 'publication',
    'type': 'dataset',
    'communities': "coacch-co-designing-the-assessment-of-climate-change-costs-h2020-project",
    'size': hits_per_page,
    'page': 1,
    'access_token': os.environ['ZENODO_API_TOKEN']
}
response = reget("https://zenodo.org/api/records", params = params)
assert response.status_code == 200 # success
j = response.json()
hits = j['hits']['hits']
assert len(hits) < hits_per_page
print(f"{len(hits)} hits on the page.")
pprint([hit['id'] for hit in hits])

URL: https://zenodo.org/api/records, params:
{
    "q": "Climate%20change%20impacts%20on%20energy%20demand",
    "type": "dataset",
    "communities": "coacch-co-designing-the-assessment-of-climate-change-costs-h2020-project",
    "size": 100,
    "page": 1
}
13 hits on the page.
[4733499,
 5530146,
 5541894,
 5546248,
 5546264,
 5530237,
 5513871,
 5541337,
 5529732,
 5534190,
 5549953,
 5541327,
 5529888]


### Pretty print first hit

In [3]:
print(json.dumps(j['hits']['hits'][0], indent = 2))

{
  "conceptdoi": "10.5281/zenodo.4733498",
  "conceptrecid": "4733498",
  "created": "2021-05-03T15:18:08.786942+00:00",
  "doi": "10.5281/zenodo.4733499",
  "files": [
    {
      "bucket": "cca08a1a-ad35-489a-8049-bfe951fda371",
      "checksum": "md5:bf691c1dd7eae01fdfbaa63f5b5c028b",
      "key": "Abadie et al_2020_Additional scenarios.xlsx",
      "links": {
        "self": "https://zenodo.org/api/files/cca08a1a-ad35-489a-8049-bfe951fda371/Abadie%20et%20al_2020_Additional%20scenarios.xlsx"
      },
      "size": 549054,
      "type": "xlsx"
    }
  ],
  "id": 4733499,
  "links": {
    "badge": "https://zenodo.org/badge/doi/10.5281/zenodo.4733499.svg",
    "bucket": "https://zenodo.org/api/files/cca08a1a-ad35-489a-8049-bfe951fda371",
    "conceptbadge": "https://zenodo.org/badge/doi/10.5281/zenodo.4733498.svg",
    "conceptdoi": "https://doi.org/10.5281/zenodo.4733498",
    "doi": "https://doi.org/10.5281/zenodo.4733499",
    "html": "https://zenodo.org/record/4733499",
    "lates

### List URLs of files in the data-set of a hit, and guess the COACCH-specific metadata file

In [4]:
meta_urls = []
for hit in j['hits']['hits']:
    print(f"-------- ID: {hit['id']}")
    files = hit['files']
    meta_url = None
    for f in files:
        link = f['links']['self']
        if link.lower().find("metadata") >= 0 or link.lower().find("meta%20data") >= 0:
            assert meta_url is None
            meta_url = link
            print(f"{link} <-- metadata?")
        else:
            print(f"{link}")
    if meta_url is None:
        print(f"WARNING: data set {hit['id']} includes no obvious metadata file!")
    meta_urls.append(meta_url)

-------- ID: 4733499
https://zenodo.org/api/files/cca08a1a-ad35-489a-8049-bfe951fda371/Abadie%20et%20al_2020_Additional%20scenarios.xlsx
-------- ID: 5530146
https://zenodo.org/api/files/22c9648d-617e-4cd7-8a3b-fd85bb567dac/absolute_risk.xlsx
https://zenodo.org/api/files/22c9648d-617e-4cd7-8a3b-fd85bb567dac/COACCH_MetaData.csv <-- metadata?
https://zenodo.org/api/files/22c9648d-617e-4cd7-8a3b-fd85bb567dac/mvri.xlsx
https://zenodo.org/api/files/22c9648d-617e-4cd7-8a3b-fd85bb567dac/relative_risk.xlsx
-------- ID: 5541894
https://zenodo.org/api/files/47e4e7fc-084d-4e22-83ef-59337a2f86a9/COACCH_MetaData_energy_demand.csv <-- metadata?
https://zenodo.org/api/files/47e4e7fc-084d-4e22-83ef-59337a2f86a9/energy_demand_all_rcp.xlsx
-------- ID: 5546248
https://zenodo.org/api/files/c5de69df-aafb-46ca-8e57-a8a2aeafa8ba/COACCH_ICES-CMCC_data.7z
https://zenodo.org/api/files/c5de69df-aafb-46ca-8e57-a8a2aeafa8ba/COACCH_ICES_MetaData-CMCC.csv <-- metadata?
-------- ID: 5546264
https://zenodo.org/api/fi

### Download meta data as an in-memory binary-file-like object

In [5]:
meta_urls

[None,
 'https://zenodo.org/api/files/22c9648d-617e-4cd7-8a3b-fd85bb567dac/COACCH_MetaData.csv',
 'https://zenodo.org/api/files/47e4e7fc-084d-4e22-83ef-59337a2f86a9/COACCH_MetaData_energy_demand.csv',
 'https://zenodo.org/api/files/c5de69df-aafb-46ca-8e57-a8a2aeafa8ba/COACCH_ICES_MetaData-CMCC.csv',
 'https://zenodo.org/api/files/cd6c851e-1d5a-4cce-93b6-fdb73d3f9e4f/Metadata%20damage%20functions%20T4.3.csv',
 'https://zenodo.org/api/files/d69d64c8-affa-4166-9635-d14660bdcd60/metadata.csv',
 'https://zenodo.org/api/files/4406e796-3221-4985-925e-ea43cd89ac2f/ImpactChains_GLOBIOM_MetaData.csv',
 'https://zenodo.org/api/files/ace61379-f532-4208-b241-091201337012/COACCH_MetaData_T2.6_CUNI.csv',
 'https://zenodo.org/api/files/0beb8030-0676-45fe-9134-3406d6116a73/T3.4_SETP_Food_MetaData.csv',
 'https://zenodo.org/api/files/bbbc0e7e-dff8-4737-bd93-67da222b1478/metadata.csv',
 'https://zenodo.org/api/files/b049c94c-88b1-4c04-9ed6-6c46566eb1f5/COACCH_MetaData_T3.2_CT2.csv',
 'https://zenodo.org/

In [6]:
meta_url = meta_urls[1]
r = reget(meta_url)
meta_data = io.BytesIO(r.content)
r.content

URL: https://zenodo.org/api/files/22c9648d-617e-4cd7-8a3b-fd85bb567dac/COACCH_MetaData.csv


b'Name,Entry date,Dataset version,Author/Contact person,Short description,Partner,Model type/method,Model,Model version,Documentation,Sector,Keywords,SSP,RCP,GCM,Variables and units,Time start,Time end,Time resolution,Spatial coverage,Spatial resolution unit Europe,Spatial resolution Rest of World,Spatial projection,Data type,File format,Recommended citation,Other comments\r\nClimate induced economic shocks using CLIMRISK,9082021,1,Predrag Ignjacevic,"Various risk measures of climate induced economic shocks using CLIMRISK. Metrics include year of exceeding 1 billion in climate damages, year of exceeding 5% annual GDP lost and a multivariate risk index.",IVM - VU,integrated assessment model (IAM),CLIMRISK,CLIMRISK-RIVER,"Ignjacevic, P., Botzen, W. W., Estrada, F., Kuik, O., Ward, P., & Tiggeloven, T. (2020). CLIMRISK-RIVER: Accounting for local river flood risk in estimating the economic cost of climate change.\xa0Environmental Modelling & Software,\xa0132, 104784.",Direct impacts,"CLIM