In this exercise, we will query the Zenodo API for the most recent version of 10 published, open access metadata records that include the 'contributors' field. 

Prerequisites:
1. [Sign up for a Zenodo account](https://zenodo.org/signup/). There is an option to use an existing GitHub or ORCiD account if you like.
2. Create an [access token for the Zenodo API](https://zenodo.org/account/settings/applications/tokens/new/#). This will be added to all of your API requests using the ACCESS_TOKEN parameter.

References:
* [Zenodo API documentation](https://developers.zenodo.org/)
* [Elasticsearch help](https://help.zenodo.org/guides/search/)

In [10]:
import requests

In [11]:
ACCESS_TOKEN = 'Your key here'

# Number of records to return
size = 10

response = requests.get('https://zenodo.org/api/records',
                        params={'q': '_exists_:contributors', 'size': size, 'page': 1, 'status': 'published', 'sort': 'bestmatch', 'all_versions': 'false', 'access_token': ACCESS_TOKEN})

records = response.json()

In [12]:
from collections import Counter

In [13]:
# Print ID, resource type, and contributor role(s) for each record
results = []

for i in range(len(records['hits']['hits'])):
    c = []
    contributors = ''
    
    record_id = records['hits']['hits'][i]['id']
    if 'resource_type' in records['hits']['hits'][i]['metadata']:
        resource_type = records['hits']['hits'][i]['metadata']['resource_type']['title']
    else:
        resource_type = "none"
    if 'title' in records['hits']['hits'][i]['metadata']:
        title = records['hits']['hits'][i]['metadata']['title']
    else:
        title = "none"
    if 'contributors' in records['hits']['hits'][i]['metadata']:
        for j in range(len(records['hits']['hits'][i]['metadata']['contributors'])):
            c.append(records['hits']['hits'][i]['metadata']['contributors'][j]['type']) 
        
        contributor_counts = Counter(c)
        
        count = 0
        for contrib in contributor_counts.keys():
            if (count > 0):
                contributors += ','
            contributors += contrib + "=" + str(contributor_counts[contrib])
            count += 1
    else:    
        contributors = 'none'
    
    results.append([record_id,title,resource_type,contributors])

In [14]:
import json
import time
import os.path
import csv

In [15]:
# Get the current time
timestr = time.strftime("%Y%m%d-%H%M%S")

# Print all records to a file in JSON format
records_path = "./contrib_records"
record_dump_fn = "zenodo_contrib_records_" + str(size) + "_" + timestr + ".json"
complete_rd_fn = os.path.join(records_path, record_dump_fn)
with open(complete_rd_fn, 'w') as f:
    json.dump(records, f, indent = 4, sort_keys = True)
f.close()

# Print selected output from records
parsed_path = "parsed_output"
parsed_records_fn = "zenodo_parsed_records" + "_" + timestr + ".csv"
complete_pr_fn = os.path.join(parsed_path, parsed_records_fn)

header = ['id','title','resource_type','contributor_role']

with open(complete_pr_fn, 'w') as f: 
    writer = csv.writer(f)
    writer.writerow(header)
    writer.writerows(results)
f.close()

The code below is for reference:

In [4]:
type(records)

dict

In [5]:
records.keys()

dict_keys(['aggregations', 'hits', 'links'])

In [6]:
type(records['hits'])

dict

In [7]:
records['hits'].keys()

dict_keys(['hits', 'total'])

In [8]:
type(records['hits']['hits'])

list

In [9]:
len(records['hits']['hits'])

100

In [10]:
type(records['hits']['hits'][0])

dict

In [11]:
records['hits']['hits'][3]['metadata'].keys()

dict_keys(['access_right', 'access_right_category', 'creators', 'description', 'doi', 'keywords', 'language', 'license', 'publication_date', 'related_identifiers', 'relations', 'resource_type', 'title'])