Skip to content

Commit

Permalink
Merge 2ff1213 into 64b0f1d
Browse files Browse the repository at this point in the history
  • Loading branch information
Aidavhd committed Aug 27, 2020
2 parents 64b0f1d + 2ff1213 commit f9a6d18
Show file tree
Hide file tree
Showing 11 changed files with 517 additions and 239 deletions.
41 changes: 25 additions & 16 deletions boutiques/bosh.py 100755 → 100644
Expand Up @@ -389,23 +389,28 @@ def data(*params):
parser.parse_known_args(params + ('--help',))
raise_error(DataHandlerError,
"Missing data mode {delete, inspect, publish}.")
elif results.mode == "inspect":
from boutiques.dataHandler import DataHandler
dataHandler = DataHandler()
return dataHandler.inspect(results.example)
elif results.mode == "publish":
from boutiques.dataHandler import DataHandler
dataHandler = DataHandler()
return dataHandler.publish(results.file, results.zenodo_token,
results.author, results.nexus_token,
results.nexus_org, results.nexus_project,
results.individually, results.sandbox,
results.no_int, results.verbose,
results.nexus)
elif results.mode == "delete":
else:
from boutiques.dataHandler import DataHandler
dataHandler = DataHandler()
return dataHandler.delete(results.file, results.no_int)

if results.mode == "inspect":
return dataHandler.inspect(results.example)
elif results.mode == "publish":
return dataHandler.publish(results.file, results.zenodo_token,
results.author, results.nexus_token,
results.nexus_org, results.nexus_project,
results.individually, results.sandbox,
results.no_int, results.verbose,
results.nexus)
elif results.mode == "delete":
return dataHandler.delete(results.file, results.no_int)

elif results.mode == "search":
return dataHandler.search(results.verbose, results.sandbox)

elif results.mode == "pull":
return dataHandler.pull(results.zids, results.verbose,
results.sandbox)


def deprecate(*params):
Expand Down Expand Up @@ -487,6 +492,9 @@ def bosh_return(val, code=0, hide=False, formatted=None):
return bosh_return(out, hide=True)
elif func == "data":
out = data(*params)
if params.__contains__("search"):
return bosh_return(out, formatted=tabulate(out, headers='keys',
tablefmt='plain'))
return bosh_return(out)
elif func == "version":
from boutiques.__version__ import VERSION
Expand All @@ -506,7 +514,8 @@ def bosh_return(val, code=0, hide=False, formatted=None):
ValidationError,
ExportError,
ImportError,
ExecutorError) as e:
ExecutorError,
DataHandlerError) as e:
# We don't want to raise an exception when function is called
# from CLI.'
if runs_as_cli():
Expand Down
38 changes: 33 additions & 5 deletions boutiques/boshParsers.py
Expand Up @@ -60,15 +60,17 @@ def add_subparser_create(subparsers):

def add_subparser_data(subparsers):
parser_data = subparsers.add_parser(
"data", description="Manage execution data collection.")
"data", description="Manage execution data collection.",
formatter_class=RawTextHelpFormatter)
parser_data.set_defaults(function='data')
data_subparsers = parser_data.add_subparsers(
help="Manage execution data records. Inspect: displays "
"the unpublished records currently in the cache. "
help="Delete: remove one or more records from the cache.\n"
"Inspect: displays the unpublished records currently in the cache.\n"
"Publish: publishes contents of cache to Zenodo as "
"a public data set. Requires a Zenodo access token, "
"see http://developers.zenodo.org/#authentication. "
"Delete: remove one or more records from the cache.")
"see http://developers.zenodo.org/#authentication.\n"
"Pull: pull one or more execution data records from Zenodo.\n"
"Search: search for published execution data records on Zenodo.\n")

parser_data_delete = data_subparsers.add_parser(
"delete", description="Delete data record(s) in cache.")
Expand Down Expand Up @@ -134,6 +136,32 @@ def add_subparser_data(subparsers):
parser_data_publish.add_argument("--nexus-project", action="store",
help="Nexus project to publish to. ")

parser_data_pull = data_subparsers.add_parser(
"pull", description="Ensures that execution data records from Zenodo"
"are locally cached, downloading them if needed.")
parser_data_pull.set_defaults(mode='pull')
parser_data_pull.add_argument("zids", nargs="+", action="store",
help="One or more Zenodo IDs for the excution"
" record(s) to pull, prefixed by 'zenodo.',"
" e.g. zenodo.123456 zenodo.123457")
parser_data_pull.add_argument("-v", "--verbose", action="store_true",
help="Print information messages")
parser_data_pull.add_argument("--sandbox", action="store_true",
help="pull from Zenodo's sandbox instead of "
"production server. Recommended for tests.")

parser_data_search = data_subparsers.add_parser(
"search", description="Search on Zenodo for"
" execution data records. When no term is"
" supplied, will search for all execution"
" data records.")
parser_data_search.set_defaults(mode='search')
parser_data_search.add_argument("-v", "--verbose", action="store_true",
help="Print information messages")
parser_data_search.add_argument("--sandbox", action="store_true",
help="search Zenodo's sandbox instead of "
"production server. Recommended for tests.")


def add_subparser_deprecate(subparsers):
parser_deprecate = subparsers.add_parser(
Expand Down
40 changes: 32 additions & 8 deletions boutiques/dataHandler.py
Expand Up @@ -36,13 +36,17 @@ def inspect(self, example=False):
else:
print("No records in the cache at the moment.")
# Print information about files in cache
# and the directory of caching data
else:
print("There are {} unpublished records in the cache"
.format(len(self.record_files)))
print("There are {} unpublished descriptors in the cache"
.format(len(self.descriptor_files)))
for i in range(len(self.cache_files)):
print(self.cache_files[i])
print("Execution records are stored in: " +
os.path.join(os.path.expanduser('~'),
".cache", "boutiques", "data"))

# Private function to print a file to console
def _display_file(self, file_path):
Expand Down Expand Up @@ -82,10 +86,7 @@ def publish(self, file, zenodo_token, author, nexus_token,
# Verify publishing
if not self.no_int:
prompt = self._get_publishing_prompt()
try:
ret = raw_input(prompt) # Python 2
except NameError:
ret = input(prompt) # Python 3
ret = input(prompt)
if ret.upper() != "Y":
return

Expand Down Expand Up @@ -199,6 +200,7 @@ def _create_metadata(self, records_dict):
# Add tool name(s) to keywords
data['metadata']['keywords'] = [v for v in unique_names]
data['metadata']['keywords'].insert(0, 'Boutiques')
data['metadata']['keywords'].insert(1, 'Boutiques-execution-record')
# Add descriptor link(s) to related identifiers
data['metadata']['related_identifiers'] = \
[{'identifier': url.format(v.split('.')[2]),
Expand Down Expand Up @@ -250,10 +252,7 @@ def delete(self, file=None, no_int=False):
# Verify deletion
if not self.no_int:
prompt = self._get_delete_prompt()
try:
ret = raw_input(prompt) # Python 2
except NameError:
ret = input(prompt) # Python 3
ret = input(prompt)
if ret.upper() != "Y":
return

Expand All @@ -272,6 +271,31 @@ def delete(self, file=None, no_int=False):
for f in self.cache_files]
print_info("All files have been removed from the data cache")

def search(self, verbose=False, sandbox=False):
firstKeyWord = "Boutiques"
secondKeyWord = "boutiques-execution-record"
searchType = "dataset"
query = ''
query_line = ''

from boutiques.zenodoHelper import ZenodoHelper
zenodoHelper = ZenodoHelper(verbose=verbose, sandbox=sandbox)

return zenodoHelper.search(query, query_line, firstKeyWord,
secondKeyWord, searchType)

def pull(self, zids, verbose=False, sandbox=False):
dataPull = True
firstKeyWord = "Boutiques"
secondKeyWord = "boutiques-execution-record"
searchType = "dataset"

from boutiques.zenodoHelper import ZenodoHelper
zenodoHelper = ZenodoHelper(verbose=verbose, sandbox=sandbox)

return zenodoHelper.zenodo_pull(zids, firstKeyWord,
secondKeyWord, searchType, dataPull)

def _file_exists_in_cache(self, filename):
file_path = os.path.join(self.cache_dir, filename)
# Incorrect filename input
Expand Down
7 changes: 5 additions & 2 deletions boutiques/publisher.py
Expand Up @@ -93,8 +93,11 @@ def publish(self):
from boutiques.searcher import Searcher
searcher = Searcher(self.descriptor.get("name"), self.verbose,
self.sandbox, exact_match=True)
r = self.zenodo_helper.zenodo_search(searcher.query,
searcher.query_line)
zenodoHelper = ZenodoHelper(sandbox=self.sandbox,
verbose=self.verbose)
r = zenodoHelper.zenodo_search(searcher.query, searcher.query_line,
"Boutiques", "schema-version.*",
"software")

publish_update = False
for hit in r.json()["hits"]["hits"]:
Expand Down
92 changes: 12 additions & 80 deletions boutiques/puller.py 100755 → 100644
@@ -1,90 +1,22 @@
import requests
import urllib
import os
from boutiques.logger import raise_error, print_info
from boutiques.searcher import Searcher
from boutiques.zenodoHelper import ZenodoError, ZenodoHelper

try:
# Python 3
from urllib.request import urlopen
from urllib.request import urlretrieve
except ImportError:
# Python 2
from urllib2 import urlopen
from urllib import urlretrieve
from boutiques.zenodoHelper import ZenodoError
from urllib.request import urlopen
from urllib.request import urlretrieve


class Puller():

def __init__(self, zids, verbose=False, sandbox=False):
# remove zenodo prefix
self.zenodo_entries = []
self.cache_dir = os.path.join(
os.path.expanduser('~'), ".cache", "boutiques",
"sandbox" if sandbox else "production")
discarded_zids = zids
# This removes duplicates, should maintain order
zids = list(dict.fromkeys(zids))
for zid in zids:
discarded_zids.remove(zid)
try:
# Zenodo returns the full DOI, but for the purposes of
# Boutiques we just use the Zenodo-specific portion (as its the
# unique part). If the API updates on Zenodo to no longer
# provide the full DOI, this still works because it just grabs
# the last thing after the split.
zid = zid.split('/')[-1]
newzid = zid.split(".", 1)[1]
newfname = os.path.join(self.cache_dir,
"zenodo-{0}.json".format(newzid))
self.zenodo_entries.append({"zid": newzid, "fname": newfname})
except IndexError:
raise_error(ZenodoError, "Zenodo ID must be prefixed by "
"'zenodo', e.g. zenodo.123456")
self.verbose = verbose
self.sandbox = sandbox
if(self.verbose):
for zid in discarded_zids:
print_info("Discarded duplicate id {0}".format(zid))
self.zenodo_helper = ZenodoHelper(sandbox=self.sandbox,
verbose=self.verbose)
self.zids = zids

def pull(self):
# return cached file if it exists
json_files = []
for entry in self.zenodo_entries:
if os.path.isfile(entry["fname"]):
if(self.verbose):
print_info("Found cached file at %s"
% entry["fname"])
json_files.append(entry["fname"])
continue

searcher = Searcher(entry["zid"], self.verbose, self.sandbox,
exact_match=True)
r = self.zenodo_helper.zenodo_search(searcher.query,
searcher.query_line)
if not len(r.json()["hits"]["hits"]):
raise_error(ZenodoError, "Descriptor \"{0}\" "
"not found".format(entry["zid"]))
for hit in r.json()["hits"]["hits"]:
file_path = hit["files"][0]["links"]["self"]
file_name = file_path.split(os.sep)[-1]
if hit["id"] == int(entry["zid"]):
if not os.path.exists(self.cache_dir):
os.makedirs(self.cache_dir)
if(self.verbose):
print_info("Downloading descriptor %s"
% file_name)
downloaded = urlretrieve(file_path, entry["fname"])
if(self.verbose):
print_info("Downloaded descriptor to "
+ downloaded[0])
json_files.append(downloaded[0])
else:
raise_error(ZenodoError, "Searched-for descriptor \"{0}\" "
"does not match descriptor \"{1}\" returned "
"from Zenodo".format(entry["zid"], hit["id"]))
dataPull = False
firstKeyWord = "Boutiques"
secondKeyWord = "schema-version.*"
searchType = "software"
from boutiques.zenodoHelper import ZenodoHelper
zenodoHelper = ZenodoHelper(verbose=self.verbose, sandbox=self.sandbox)

return json_files
return zenodoHelper.zenodo_pull(self.zids, firstKeyWord,
secondKeyWord, searchType, dataPull)

0 comments on commit f9a6d18

Please sign in to comment.