In [None]:
#hide
from nbdev_template.core import *

# Check Metadata and Upload to Zenodo

> This notebook will check the metadata according to certain norms and if they are correct, it will upload the data to Zenodo.

## Packages

In [28]:
import yaml
from pathlib import Path
import pandas as pd
import sys
import requests

## Definitions

In [29]:
def openfile(f):
    ending = str(f).split('.')[-1]
    path = str(f)
    if ending == 'json':
        try:
            d = pd.read_json(path, orient = 'table')
        except:
            d = pd.read_json(path)
    if ending == 'csv':
        try:
            d = pd.read_csv(path, delimiter = ',')
        except:
            d = pd.read_csv(path, delimiter = '\t')
    if ending == 'yaml' or ending == 'cite':
        with open(path) as yml:
            d = yaml.load(yml, Loader=yaml.FullLoader)
    return d

def findkeys(c):
    ck = list(c.keys())
    mk = list(c['Metadata'].keys())
    prk = [list(i.keys()) for i in c['Resources']]
    rk = [list(i.keys()) for i in c['Resources']][0]
    ak = [list(i['Attributes'].keys()) for i in c['Resources']]
    return ck, mk, prk, rk, ak

def checkVal(c, mk, ak):
    mke = []
    for i in mk:
        if c['Metadata'][i].strip() == '':
            print('Key {} may not be empty!'.format(i))
            mke.append(i)
    ake = []
    for i in ak:
        a = []
        for j in range(len(i)):
            if i[j].strip() == '':
                print('Key {} may not be empty!'.format(i[j]))
                mke.append(i[j])
    if not (mke or ake):
        message = None
        print('All metadata keys and attributes are filled, wonderful!')
    else:
        message = 'bad'
    return mke, ake, message

def checkkeys(pk, k):
    ke = []
    for i in pk:
        if i not in k:
            message = 'bad'
            print('The key {} does not exist in your metadata, please add it!'.format(i))
            ke.append(i)
        else:
            message = None
    return ke, message

def congrat(c1, c2, c3):
    if not (c1 or c2 or c3):
        message = None
        print('Congrats, all keys are set!')
    else:
        message = 'bad'
    return message

def comparekeys(allfn, cf, caks, dataDFkeys):
    if allfn != cf:
        missf = cf - allfn
        addedf = allfn - cf
        message = 'bad'
        print('You did not commented the file {} in your cite! Please to so!'.format(missf))
        print('The file {} is not in the directory, please remove it from the cite!'.format(addedf))
    else:
        missf = None
        addedf = None
        missk = []
        addedk = []
        allfn = list(allfn)
        messages = []
        for i in range(len(allfn)):
            print(allfn[i])
            print(i)
            if caks[i] != dataDFkeys[i]:
                misski = dataDFkeys[i] - caks[i]
                addedki = caks[i] - dataDFkeys[i]
                messages.append('bad')
                print('You did not commented the key {} in your cite! Please to so!'.format(misski))
                print('The key {} is not your data, please remove it from the cite!'.format(addedki))
                missk.append(misski)
                addedk.append(addedki)
        if len(messages) == 0:
            message = None
        else:
            message = 'bad'
                
    return missf, addedf, missk, addedk, message

In [30]:
def makeEmptyUpload(sandbox, ACCESS_TOKEN):
    
    # Create empty upload first to get the bucket_url
    headers = {"Content-Type": "application/json"}
    params = {'access_token': ACCESS_TOKEN}
    if sandbox:
        r = requests.post('https://sandbox.zenodo.org/api/deposit/depositions', params=params, json={}, headers=headers)
    else:
        r = requests.post('https://zenodo.org/api/deposit/depositions', params=params, json={}, headers=headers)
    print(r.status_code)
    bucket_url = r.json()["links"]["bucket"]
    return bucket_url, params

def uploadOneFile(bucket_url, params, filepath):

    # Give file
    p = Path(filepath)
    file = p.open("rb")
    filename = p.name
    
    # Upload file
    r = requests.put("%s/%s" % (bucket_url, filename), data=file, params=params)
    
    return r.json()
    
def uploadDirectory(bucket_url, params, dirpath):

    # Find all files in directory
    
    p = Path(dirpath)
    allfil = list(p.glob('*'))
    allfiles = [i.open("rb") for i in allfil]
    allfilenames = [i.name for i in allfil]
    
    # Upload files
    rs = []
    for i in range(len(allfiles)):
        r = requests.put("%s/%s" % (bucket_url, allfilenames[i]), data=allfiles[i], params=params)
        rs.append(r.json())
    return rs

## publprofil

In [31]:
with open('./publprofil.yaml') as yml:
    pp = yaml.load(yml, Loader=yaml.FullLoader)
pp = pp['ResearchObject']

In [32]:
pck, pmk, pprk, prk, pak = findkeys(pp)

## Dataset

In [33]:
p = Path('./data/Parapegmata')

In [34]:
allfile = list(p.glob('*'))

In [35]:
allfn = set([i.name for i in allfile if not i.suffix == '.cite'])

### Cite

In [36]:
c = list(sorted([i for i in allfile if str(i).split('.')[-1] == 'cite']))[0]

In [37]:
cite = openfile(c)['ResearchObject']

In [38]:
cck, cmk, cprk, crk, cak = findkeys(cite)

In [39]:
caks = [set(i) for i in cak]

In [40]:
cf = set([i['File'.lower()] for i in cite['Resources']])

### Data

In [41]:
d = list(sorted([i for i in allfile if not i.suffix == '.cite']))

In [42]:
dataDFkeys = [set(openfile(f).keys()) for f in d]

#### Check Metadata keys & Attributes are not empty:

In [43]:
cmke, cake, message1 = checkVal(cite, pmk, pak)

All metadata keys and attributes are filled, wonderful!


#### Check existence of all keys:

In [44]:
c1 = checkkeys(pck, cck)
c2 = checkkeys(pmk, cmk)
c3 = []
for i in cprk:
    c3.append(checkkeys(prk, i))
message2 = congrat(c1, c2, c3)

The key File does not exist in your metadata, please add it!
The key File does not exist in your metadata, please add it!
The key File does not exist in your metadata, please add it!
The key File does not exist in your metadata, please add it!
The key File does not exist in your metadata, please add it!
The key File does not exist in your metadata, please add it!
The key File does not exist in your metadata, please add it!
The key File does not exist in your metadata, please add it!


#### If data is identical to cite:

In [45]:
missf, addedf, missk, addedk, message3 = comparekeys(allfn, cf, caks, dataDFkeys)

Oxford.json
0
You did not commented the key {'addition_ID', 'addition_greek', 'feast', 'feast_greek', 'zodiac_part', 'ID', 'feast_ID', 'text_string', 'zodiac_part_ID', 'addition'} in your cite! Please to so!
The key {'meteo_event_class', 'parallel', 'authority', 'meteo_event_class_ID', 'authority_ID', 'parallel_ID', 'record_ID'} is not your data, please remove it from the cite!
Paris.json
1
Milet.json
2
You did not commented the key {'month_ID', 'night_length_greek', 'meteo_statement', 'day', 'text_passage', 'day_length_footnote', 'night_length_footnote', 'column', 'addition_text_string_greek', 'addition_text_string', 'night_length', 'day_length', 'zodiac_part', 'fragment', 'night_length_fractions', 'season', 'month', 'day_length_greek', 'season_ID', 'season_greek', 'day_length_fractions', 'meteo_addition_text_string_greek', 'meteo_addition_text_string', 'length_month', 'zodiac_part_ID'} in your cite! Please to so!
The key {'authority_ID_Meteo', 'fragment_ID', 'meteo_event_class', 'hol

## Upload on Zenodo

If there is any error in the cite, the process is terminated; otherwise one proceed with upload.

In [46]:
if (message1 or message2 or message3):
    sys.exit()

SystemExit: 

sandbox (for testing purposes) = sandbox ACCESS_TOKEN from https://sandbox.zenodo.org/account/settings/applications/tokens/new/

real data (for real upload) = ACCESS_TOKEN from https://zenodo.org/account/settings/applications/tokens/new/

In [47]:
''' 
    Register for a Zenodo sandbox account if you don’t already have one.
    Go to https://sandbox.zenodo.org/account/settings/applications/tokens/new/.
    Select the OAuth scopes you need (for the quick start tutorial you need deposit:write and deposit:actions).
    Please insert your just generated token for ... below.
'''

ACCESS_TOKEN = '...'

ONLY CHANGE SANDBOX TO FALSE IF YOU KNOW WHAT YOU ARE DOING, THIS WILL BE A REAL UPLOAD THEN! If you want to test something always use sandbox!

In [48]:
bucket_url, params = makeEmptyUpload(True, ACCESS_TOKEN)

201


If you get a number lower than 400 everything is fine!

In [49]:
uploadDirectory(bucket_url, params, str(p.resolve()))

[{'mimetype': 'application/json',
  'updated': '2020-10-14T14:47:30.838604+00:00',
  'links': {'self': 'https://sandbox.zenodo.org/api/files/125bd150-85df-4c93-9ca5-d8ca03b926e7/Antiochos.json',
   'version': 'https://sandbox.zenodo.org/api/files/125bd150-85df-4c93-9ca5-d8ca03b926e7/Antiochos.json?versionId=6d0fb48e-5af2-4a83-a130-7bbfb81afc57',
   'uploads': 'https://sandbox.zenodo.org/api/files/125bd150-85df-4c93-9ca5-d8ca03b926e7/Antiochos.json?uploads'},
  'is_head': True,
  'created': '2020-10-14T14:47:30.832807+00:00',
  'checksum': 'md5:9f8b8b88c310dc7256f23ae114eecb10',
  'version_id': '6d0fb48e-5af2-4a83-a130-7bbfb81afc57',
  'delete_marker': False,
  'key': 'Antiochos.json',
  'size': 78439},
 {'mimetype': 'application/json',
  'updated': '2020-10-14T14:47:31.132815+00:00',
  'links': {'self': 'https://sandbox.zenodo.org/api/files/125bd150-85df-4c93-9ca5-d8ca03b926e7/Geminos.json',
   'version': 'https://sandbox.zenodo.org/api/files/125bd150-85df-4c93-9ca5-d8ca03b926e7/Gemino