# POST an EAMENA dataset on Zenodo

Post an EAMENA datset on Zenodo sandbox (~~https://zenodo.org/~~, https://sandbox.zenodo.org) from an EAMENA Search URL (ex: Sistan dataset)

---

documentation: https://developers.zenodo.org/#quickstart-upload

## Libraries

In [1]:
import os
import requests
import json
import zipfile
import pandas as pd
import numpy as np

# needed to export as JSON
class NpEncoder(json.JSONEncoder):
    def default(self, obj):
        if isinstance(obj, np.integer):
            return int(obj)
        if isinstance(obj, np.floating):
            return float(obj)
        if isinstance(obj, np.ndarray):
            return obj.tolist()
        return json.JSONEncoder.default(self, obj)

## Variables/metadata fields

Adapt these metadata to your dataset

In [4]:
# data/filename
FILENAME = "data2" # this is also the title
GEOJSON_URL = r"https://database.eamena.org/api/search/export_results?paging-filter=1&tiles=true&format=geojson&reportlink=false&precision=6&total=326&language=en&advanced-search=%5B%7B%22op%22%3A%22and%22%2C%2234cfea78-c2c0-11ea-9026-02e7594ce0a0%22%3A%7B%22op%22%3A%22~%22%2C%22lang%22%3A%22en%22%2C%22val%22%3A%22Sistan%22%7D%2C%2234cfea87-c2c0-11ea-9026-02e7594ce0a0%22%3A%7B%22op%22%3A%22%22%2C%22val%22%3A%22%22%7D%7D%2C%7B%22op%22%3A%22or%22%2C%2234cfea69-c2c0-11ea-9026-02e7594ce0a0%22%3A%7B%22op%22%3A%22%22%2C%22val%22%3A%22%22%7D%2C%2234cfea73-c2c0-11ea-9026-02e7594ce0a0%22%3A%7B%22op%22%3A%22%22%2C%22val%22%3A%22%22%7D%2C%2234cfea43-c2c0-11ea-9026-02e7594ce0a0%22%3A%7B%22op%22%3A%22%22%2C%22val%22%3A%224ed99706-2d90-449a-9a70-700fc5326fb1%22%7D%2C%2234cfea5d-c2c0-11ea-9026-02e7594ce0a0%22%3A%7B%22op%22%3A%22%22%2C%22val%22%3A%22%22%7D%2C%2234cfea95-c2c0-11ea-9026-02e7594ce0a0%22%3A%7B%22op%22%3A%22~%22%2C%22lang%22%3A%22en%22%2C%22val%22%3A%22%22%7D%7D%5D&resource-type-filter=%5B%7B%22graphid%22%3A%2234cfe98e-c2c0-11ea-9026-02e7594ce0a0%22%2C%22name%22%3A%22Heritage%20Place%22%2C%22inverted%22%3Afalse%7D%5D&map-filter=%7B%22type%22%3A%22FeatureCollection%22%2C%22features%22%3A%5B%7B%22id%22%3A%22e84886109295dcb2d515f9ab792832bf%22%2C%22type%22%3A%22Feature%22%2C%22properties%22%3A%7B%22buffer%22%3A%7B%22width%22%3A10%2C%22unit%22%3A%22m%22%7D%2C%22inverted%22%3Afalse%7D%2C%22geometry%22%3A%7B%22coordinates%22%3A%5B%5B%5B61.5629662657594%2C31.341070427554456%5D%2C%5B61.39269902363566%2C31.226740239181964%5D%2C%5B61.52316353383432%2C30.977760218239922%5D%2C%5B61.773036239808164%2C30.92940344148805%5D%2C%5B61.89244443558445%2C31.037461248216815%5D%2C%5B61.933352798951745%2C31.22484931983834%5D%2C%5B61.5629662657594%2C31.341070427554456%5D%5D%5D%2C%22type%22%3A%22Polygon%22%7D%7D%5D%7D"
# Zenodo metadata
UPLOAD_TYPE = 'dataset'
DESCRIPTION = 'This is my first upload'
CREATORS_NAMES = "EAMENA database"
CREATORS_AFFILIATION = "University of Oxford, University of Southampton"

Request the databse using the `GEOJSON_URL` URL selection. This GeoJSON url comes from a Search query on HP, see: https://github.com/eamena-project/eamena-arches-dev/blob/main/projects/sistan/README.md#dataset. Create the 'data' JSON

In [3]:
resp = requests.get(GEOJSON_URL)
data = resp.json()

Write JSON, ZIP, etc., in your Dir folder

If you are running this script on your local machine, uncomment the first lines

In [5]:
# mydir = "C:\\Rprojects\\eamena-arches-dev\\dev\\citations"
# os.chdir(mydir)
# os.chdir(os.getcwd() + "/exports")
# from pathlib import Path
# Path(os.getcwd() + "/" + FILENAME).mkdir(parents = True, exist_ok = True)
# os.chdir(os.getcwd() + "/" + FILENAME)

# JSON file name and ZIP file
json_file_name = FILENAME + ".geojson"
zip_file_name = FILENAME + ".zip"

# Create the JSON file and write the data to it
json_string = json.dumps(data, cls = NpEncoder)
json_string = json.loads(json_string)
with open(json_file_name, 'w') as json_file:
	json.dump(json_string, json_file, indent=4)
	print(json_file_name + " has been exported in " + os.getcwd())

# Create a ZIP file and add the JSON file to it
with zipfile.ZipFile(zip_file_name, "w", zipfile.ZIP_DEFLATED) as zipf:
    zipf.write(json_file_name)
    print(zip_file_name + " has been exported in " + os.getcwd())



data2.geojson has been exported in C:\Rprojects\eamena-arches-dev\dev\citations\exports\data2
data2.zip has been exported in C:\Rprojects\eamena-arches-dev\dev\citations\exports\data2


ℹ️ Further data can be created and files added into the ZIP

## Creates an empty bucket

Provide the secret `ACCESS_TOKEN` of the Zenodo account as a string here below

In [None]:
ACCESS_TOKEN = ''

Create the bucket

In [10]:
params = {'access_token': ACCESS_TOKEN}
r = requests.post('https://sandbox.zenodo.org/api/deposit/depositions',
                   params=params,
                   json={})
r.status_code
r.json()
# collect the deposition id
deposition_id = r.json()['id']
print("The deposition_id is: " + str(deposition_id))

The deposition_id is: 10000737


## Add data

In [11]:
bucket_url = r.json()["links"]["bucket"]
with open(zip_file_name, "rb") as fp:
    r = requests.put(
        "%s/%s" % (bucket_url, zip_file_name),
        data = fp,
        params = params,
    )
r.json()

{'created': '2023-10-15T10:21:32.304612+00:00',
 'updated': '2023-10-15T10:21:33.798183+00:00',
 'version_id': 'e5100fed-2a8d-4d02-8fe8-8a847f968f4e',
 'key': 'data2.zip',
 'size': 59915,
 'mimetype': 'application/zip',
 'checksum': 'md5:2f6ac1e147d45716a489aa9f9c19fc79',
 'is_head': True,
 'delete_marker': False,
 'links': {'self': 'https://zenodo-rdm-qa.web.cern.ch/api/files/c89e4186-7803-4a8b-ac18-16d5aff46d37/files/data2.zip',
  'version': 'https://zenodo-rdm-qa.web.cern.ch/api/files/c89e4186-7803-4a8b-ac18-16d5aff46d37/files/data2.zip?versionId=e5100fed-2a8d-4d02-8fe8-8a847f968f4e',
  'uploads': 'https://zenodo-rdm-qa.web.cern.ch/api/files/c89e4186-7803-4a8b-ac18-16d5aff46d37/files/data2.zip?uploads'}}

## Add metadata

In [12]:
data = {
     'metadata': {
         'title': FILENAME,
         'upload_type': UPLOAD_TYPE,
         'description': DESCRIPTION,
         'creators': [{'name': CREATORS_NAMES,
                       'affiliation': CREATORS_AFFILIATION}]
     } 
 }
r = requests.put('https://sandbox.zenodo.org/api/deposit/depositions/%s' % deposition_id,
                  params = {'access_token': ACCESS_TOKEN}, 
                  data = json.dumps(data)) # ,
                  # headers = headers)
r.status_code
# 200

200

## Publish

In [13]:
r = requests.post('https://sandbox.zenodo.org/api/deposit/depositions/%s/actions/publish' % deposition_id,
                      params={'access_token': ACCESS_TOKEN} )
r.status_code
# 504

504

## Check

Have a look at the last deposit (`r.json()[0]`)

In [14]:
r = requests.get('https://sandbox.zenodo.org/api/deposit/depositions',
                  params={'access_token': ACCESS_TOKEN})
r.status_code
# 200
r.json()[0]

[{'created': '2023-10-15T10:22:23.131953+00:00',
  'modified': '2023-10-15T10:22:23.841097+00:00',
  'id': 10000737,
  'conceptrecid': '10000736',
  'doi': '10.5281/zenodo.10000737',
  'conceptdoi': '10.5281/zenodo.10000736',
  'doi_url': 'https://doi.org/10.5281/zenodo.10000737',
  'metadata': {'title': 'data2',
   'doi': '10.5281/zenodo.10000737',
   'publication_date': '2023-10-15',
   'description': 'This is my first upload',
   'access_right': 'open',
   'creators': [{'name': 'EAMENA database',
     'affiliation': 'University of Oxford, University of Southampton'}],
   'license': 'cc-zero',
   'imprint_publisher': 'Zenodo',
   'upload_type': 'dataset',
   'prereserve_doi': {'doi': '10.5281/zenodo.10000737', 'recid': 10000737}},
  'title': 'data2',
  'links': {'self': 'https://zenodo-rdm-qa.web.cern.ch/api/records/10000737',
   'html': 'https://zenodo-rdm-qa.web.cern.ch/records/10000737',
   'doi': 'https://doi.org/10.5281/zenodo.10000737',
   'parent_doi': 'https://zenodo-rdm-qa.w

ℹ️ The Zenodo link to the record is recorded in `r.json()[0]['links']['html']`

In [None]:
zen_url = r.json()[0]['links']['html']
zen_url