diff --git a/cwlkernel/CWLKernel.py b/cwlkernel/CWLKernel.py
index 7206284..05ffe28 100644
--- a/cwlkernel/CWLKernel.py
+++ b/cwlkernel/CWLKernel.py
@@ -2,6 +2,7 @@
import logging
import os
import re
+import shutil
import traceback
from io import StringIO
from pathlib import Path
@@ -22,7 +23,7 @@
from .git.CWLGitResolver import CWLGitResolver
version = "0.0.2"
-
+BOOT_DIRECTORY = Path(os.getcwd()).absolute()
CONF = CWLExecuteConfigurator()
@@ -43,10 +44,12 @@ class CWLKernel(Kernel):
def __init__(self, **kwargs):
super().__init__(**kwargs)
+ self._session_dir = os.path.join(CONF.CWLKERNEL_BOOT_DIRECTORY, self.ident)
+ self._boot_directory: Path = BOOT_DIRECTORY
self._yaml_input_data: Optional[str] = None
- self._results_manager = ResultsManager(os.sep.join([CONF.CWLKERNEL_BOOT_DIRECTORY, self.ident, 'results']))
- runtime_file_manager = IOFileManager(os.sep.join([CONF.CWLKERNEL_BOOT_DIRECTORY, self.ident, 'runtime_data']))
- self._cwl_executor = CoreExecutor(runtime_file_manager)
+ self._results_manager = ResultsManager(os.path.join(self._session_dir, 'results'))
+ runtime_file_manager = IOFileManager(os.path.join(self._session_dir, 'runtime_data'))
+ self._cwl_executor = CoreExecutor(runtime_file_manager, self._boot_directory)
self._pid = (os.getpid(), os.getppid())
self._cwl_logger = CWLLogger(os.path.join(CONF.CWLKERNEL_BOOT_DIRECTORY, self.ident, 'logs'))
self._set_process_ids()
@@ -60,6 +63,10 @@ def __init__(self, **kwargs):
if self.log is None: # pylint: disable=access-member-before-definition
self.log = logging.getLogger()
+ @property
+ def runtime_directory(self) -> Path:
+ return Path(self._cwl_executor.file_manager.ROOT_DIRECTORY).absolute()
+
@property
def workflow_repository(self) -> WorkflowRepository:
return self._workflow_repository
@@ -232,12 +239,12 @@ def _preprocess_data(self, data: Dict) -> Dict:
def _clear_data(self):
self._yaml_input_data = None
- def _execute_workflow(self, code_path: Path) -> Optional[Exception]:
+ def _execute_workflow(self, code_path: Path, provenance: bool = False) -> Optional[Exception]:
input_data = [self._yaml_input_data] if self._yaml_input_data is not None else []
self._cwl_executor.set_data(input_data)
self._cwl_executor.set_workflow_path(str(code_path))
self.log.debug('starting executing workflow ...')
- run_id, results, exception = self._cwl_executor.execute()
+ run_id, results, exception, research_object = self._cwl_executor.execute(provenance)
self.log.debug(f'\texecution results: {run_id}, {results}, {exception}')
output_directory_for_that_run = str(run_id)
for output in results:
@@ -259,6 +266,22 @@ def _execute_workflow(self, code_path: Path) -> Optional[Exception]:
metadata=results[output]
)
self.send_json_response(results)
+ if research_object is not None:
+ self.send_text_to_stdout(f'\nProvenance stored in directory {research_object.folder}')
+ for path, _, files in os.walk(research_object.folder):
+ for name in files:
+ file = os.path.relpath(os.path.join(path, name), self._boot_directory.as_posix())
+ self.send_response(
+ self.iopub_socket,
+ 'display_data',
+ {
+ 'data': {
+ "text/html": f'{file}',
+ "text/plain": f"{file}"
+ },
+ 'metadata': {},
+ },
+ )
if exception is not None:
self.log.debug(f'execution error: {exception}')
self.send_response(self.iopub_socket, 'stream', {'name': 'stderr', 'text': str(exception)})
@@ -283,6 +306,10 @@ def do_complete(self, code: str, cursor_pos: int):
def send_text_to_stdout(self, text: str):
self.send_response(self.iopub_socket, 'stream', {'name': 'stdout', 'text': text})
+ def __del__(self):
+ shutil.rmtree(self._session_dir, ignore_errors=True)
+ os.chdir(self._boot_directory.as_posix())
+
if __name__ == '__main__':
from ipykernel.kernelapp import IPKernelApp
diff --git a/cwlkernel/CoreExecutor.py b/cwlkernel/CoreExecutor.py
index 7399e35..6213507 100644
--- a/cwlkernel/CoreExecutor.py
+++ b/cwlkernel/CoreExecutor.py
@@ -1,5 +1,8 @@
+import functools
+import logging
import os
import sys
+import tempfile
import traceback
from pathlib import Path
from subprocess import DEVNULL
@@ -8,22 +11,80 @@
List,
Optional,
Tuple,
- NoReturn)
+ NoReturn, cast, IO, MutableMapping, MutableSequence)
from uuid import uuid4, UUID
-from cwltool.context import RuntimeContext
+from cwltool.context import RuntimeContext, LoadingContext
+from cwltool.executors import JobExecutor
from cwltool.factory import Factory
+from cwltool.load_tool import fetch_document
+from cwltool.loghandler import _logger
+from cwltool.main import ProvLogFormatter, prov_deps
+from cwltool.process import add_sizes
+from cwltool.provenance import ResearchObject
+from cwltool.stdfsaccess import StdFsAccess
+from cwltool.utils import CWLObjectType, visit_class
from ruamel import yaml
from .IOManager import IOFileManager
+class JupyterFactory(Factory):
+
+ def __init__(self, root_directory: str,
+ executor: Optional[JobExecutor] = None,
+ loading_context: Optional[LoadingContext] = None,
+ runtime_context: Optional[RuntimeContext] = None, ):
+ super().__init__(
+ executor=executor,
+ loading_context=loading_context,
+ runtime_context=runtime_context,
+ )
+ self.runtime_context.outdir = root_directory
+ self.runtime_context.basedir = root_directory
+ self.runtime_context.default_stdin = DEVNULL
+ self.runtime_context.default_stdout = DEVNULL
+ self.runtime_context.default_stderr = DEVNULL
+
+
+class ProvenanceFactory(JupyterFactory):
+
+ def __init__(self,
+ workflow_uri_path: str,
+ root_directory: str,
+ stove_provenance_directory: str,
+ executor: Optional[JobExecutor] = None,
+ loading_context: Optional[LoadingContext] = None,
+ runtime_context: Optional[RuntimeContext] = None) -> None:
+ """Easy way to load a CWL document for execution."""
+ super().__init__(
+ root_directory=root_directory,
+ executor=executor,
+ loading_context=loading_context,
+ runtime_context=runtime_context
+ )
+ self.store_provenance_directory = stove_provenance_directory
+ self.loading_context, self.workflow_object, self.uri = fetch_document(workflow_uri_path, self.loading_context)
+ make_fs_access = self.runtime_context.make_fs_access \
+ if self.runtime_context.make_fs_access is not None else StdFsAccess
+ ro = ResearchObject(
+ make_fs_access(""),
+ )
+ self.runtime_context.research_obj = ro
+ log_file_io = ro.open_log_file_for_activity(ro.engine_uuid)
+ prov_log_handler = logging.StreamHandler(cast(IO[str], log_file_io))
+ prov_log_handler.setFormatter(ProvLogFormatter())
+ _logger.addHandler(prov_log_handler)
+ _logger.debug("[provenance] Logging to %s", log_file_io)
+
+
class CoreExecutor:
- def __init__(self, file_manager: IOFileManager):
+ def __init__(self, file_manager: IOFileManager, provenance_directory: Optional[Path]):
self.file_manager = file_manager
self._workflow_path = None
self._data_paths = []
+ self.provenance_directory = provenance_directory if provenance_directory is not None else tempfile.mkdtemp()
def set_data(self, data: List[str]) -> List[str]:
self._data_paths = [self.file_manager.write(f'{str(uuid4())}.yml', d.encode()) for d in data]
@@ -37,20 +98,23 @@ def set_workflow_path(self, workflow_str: str) -> str:
self._workflow_path = workflow_str
return self._workflow_path
- def execute(self) -> Tuple[UUID, Dict, Optional[Exception]]:
+ def execute(self, provenance=False) -> Tuple[UUID, Dict, Optional[Exception], Optional[ResearchObject]]:
"""
- :return: Run ID, dict with new files, exception if there is any
+ :param provenance: Execute with provenance enabled/disabled.
+ :return: Run ID, dict with new files, exception if there is any.
"""
run_id = uuid4()
-
- runtime_context = RuntimeContext()
- runtime_context.outdir = self.file_manager.ROOT_DIRECTORY
- runtime_context.basedir = self.file_manager.ROOT_DIRECTORY
- runtime_context.default_stdin = DEVNULL
- runtime_context.default_stdout = DEVNULL
- runtime_context.default_stderr = DEVNULL
+ factory: JupyterFactory
+ if not provenance:
+ factory = JupyterFactory(self.file_manager.ROOT_DIRECTORY)
+ else:
+ provenance_dir = os.path.join(self.provenance_directory.as_posix(), 'provenance')
+ factory = ProvenanceFactory(
+ Path(self._workflow_path).as_uri(),
+ self.file_manager.ROOT_DIRECTORY,
+ provenance_dir
+ )
os.chdir(self.file_manager.ROOT_DIRECTORY)
- factory = Factory(runtime_context=runtime_context)
executable = factory.make(self._workflow_path)
data = {}
for data_file in self._data_paths:
@@ -59,10 +123,53 @@ def execute(self) -> Tuple[UUID, Dict, Optional[Exception]]:
data = {**new_data, **data}
try:
result: Dict = executable(**data)
- return run_id, result, None
+ e = None
except Exception as e:
traceback.print_exc(file=sys.stderr)
- return run_id, {}, e
+ result = {}
+
+ if provenance:
+ self._store_provenance(cast(ProvenanceFactory, factory), result)
+ return run_id, result, e, factory.runtime_context.research_obj
+
+ @classmethod
+ def _store_provenance(cls, factory: ProvenanceFactory, out) -> None:
+ """Proxy method to cwltool's logic"""
+ runtime_context = factory.runtime_context
+ loading_context = factory.loading_context
+ workflow_object = factory.workflow_object
+ uri = factory.uri
+
+ if runtime_context.research_obj is not None:
+ runtime_context.research_obj.create_job(out, True)
+
+ def remove_at_id(doc: CWLObjectType) -> None:
+ for key in list(doc.keys()):
+ if key == "@id":
+ del doc[key]
+ else:
+ value = doc[key]
+ if isinstance(value, MutableMapping):
+ remove_at_id(value)
+ elif isinstance(value, MutableSequence):
+ for entry in value:
+ if isinstance(entry, MutableMapping):
+ remove_at_id(entry)
+
+ remove_at_id(out)
+ visit_class(
+ out,
+ ("File",),
+ functools.partial(add_sizes, runtime_context.make_fs_access("")),
+ )
+
+ research_obj = runtime_context.research_obj
+ if loading_context.loader is not None:
+ research_obj.generate_snapshot(
+ prov_deps(workflow_object, loading_context.loader, uri)
+ )
+
+ research_obj.close(factory.store_provenance_directory)
@classmethod
def validate_input_files(cls, yaml_input: Dict, cwd: Path) -> NoReturn:
diff --git a/cwlkernel/kernel_magics.py b/cwlkernel/kernel_magics.py
index 88eaf40..1bf1592 100644
--- a/cwlkernel/kernel_magics.py
+++ b/cwlkernel/kernel_magics.py
@@ -2,6 +2,7 @@
import json
import os
import random
+import subprocess
from io import StringIO
from pathlib import Path
from typing import List
@@ -101,15 +102,30 @@ def snippet(kernel: CWLKernel, command: str):
class ExecutionMagics:
@staticmethod
- @CWLKernel.register_magic()
- def execute(kernel: CWLKernel, execute_argument_string: str):
+ def _parse_args(execute_argument_string: str):
execute_argument_string = execute_argument_string.splitlines()
cwl_id = execute_argument_string[0].strip()
+ yaml_str_data = '\n'.join(execute_argument_string[1:])
+ return cwl_id, yaml_str_data
+
+ @staticmethod
+ def _execute(kernel: CWLKernel, execute_argument_string: str, provenance: bool = False):
+ cwl_id, yaml_str_data = ExecutionMagics._parse_args(execute_argument_string)
cwl_component_path: Path = kernel.workflow_repository.get_tools_path_by_id(cwl_id)
- kernel._set_data('\n'.join(execute_argument_string[1:]))
- kernel._execute_workflow(cwl_component_path)
+ kernel._set_data(yaml_str_data)
+ kernel._execute_workflow(cwl_component_path, provenance=provenance)
kernel._clear_data()
+ @staticmethod
+ @CWLKernel.register_magic()
+ def execute(kernel: CWLKernel, execute_argument_string: str):
+ ExecutionMagics._execute(kernel, execute_argument_string, provenance=False)
+
+ @staticmethod
+ @CWLKernel.register_magic('executeWithProvenance')
+ def execute_with_provenance(kernel: CWLKernel, execute_argument_string: str):
+ ExecutionMagics._execute(kernel, execute_argument_string, provenance=True)
+
@staticmethod
@CWLKernel.register_magics_suggester('execute')
def suggest_execution_id(query_token: str, *args, **kwargs) -> List[str]:
@@ -372,6 +388,28 @@ def visualize_graph(kernel: CWLKernel, tool_id: str):
)
+@CWLKernel.register_magic()
+def system(kernel: CWLKernel, commands: str):
+ """
+ Execute bash commands in the Runtime Directory of the session.
+
+ @param kernel:
+ @param commands:
+ @return:
+ """
+ result = subprocess.run(
+ commands,
+ stdout=subprocess.PIPE, stderr=subprocess.PIPE,
+ shell=True, cwd=kernel.runtime_directory.as_posix()
+ )
+ stdout = result.stdout.decode()
+ stderr = result.stderr.decode()
+ if len(stdout) > 0:
+ kernel.send_text_to_stdout(stdout)
+ if len(stderr) > 0:
+ kernel.send_error_response(stderr)
+
+
# import user's magic commands
if CWLKernel_CONF.CWLKERNEL_MAGIC_COMMANDS_DIRECTORY is not None:
diff --git a/examples/introduction.ipynb b/examples/introduction.ipynb
index 43e1819..ade298f 100644
--- a/examples/introduction.ipynb
+++ b/examples/introduction.ipynb
@@ -89,20 +89,20 @@
"data": {
"application/json": {
"example_output": {
- "basename": "962e7e02e9c8b4ebb94b0fff3f46694014d0cc79",
+ "basename": "dea13d39e960dfdad075c55ea61ed207dcada01d",
"checksum": "sha1$47a013e660d408619d894b20806b1d5086aab03b",
"class": "File",
"http://commonwl.org/cwltool#generation": 0,
"id": "example_output",
- "location": "file:///private/tmp/CWLKERNEL_DATA/6ba2c2e0-1b1d-470a-88a2-a9737aefa60c/runtime_data/962e7e02e9c8b4ebb94b0fff3f46694014d0cc79",
+ "location": "file:///private/tmp/CWLKERNEL_DATA/2bd19feb-6520-446d-be42-54c64fde5cd9/runtime_data/dea13d39e960dfdad075c55ea61ed207dcada01d",
"nameext": "",
- "nameroot": "962e7e02e9c8b4ebb94b0fff3f46694014d0cc79",
+ "nameroot": "dea13d39e960dfdad075c55ea61ed207dcada01d",
"result_counter": 0,
"size": 13
}
},
"text/plain": [
- "{\"example_output\": {\"location\": \"file:///private/tmp/CWLKERNEL_DATA/6ba2c2e0-1b1d-470a-88a2-a9737aefa60c/runtime_data/962e7e02e9c8b4ebb94b0fff3f46694014d0cc79\", \"basename\": \"962e7e02e9c8b4ebb94b0fff3f46694014d0cc79\", \"nameroot\": \"962e7e02e9c8b4ebb94b0fff3f46694014d0cc79\", \"nameext\": \"\", \"class\": \"File\", \"checksum\": \"sha1$47a013e660d408619d894b20806b1d5086aab03b\", \"size\": 13, \"http://commonwl.org/cwltool#generation\": 0, \"id\": \"example_output\", \"result_counter\": 0}}"
+ "{\"example_output\": {\"location\": \"file:///private/tmp/CWLKERNEL_DATA/2bd19feb-6520-446d-be42-54c64fde5cd9/runtime_data/dea13d39e960dfdad075c55ea61ed207dcada01d\", \"basename\": \"dea13d39e960dfdad075c55ea61ed207dcada01d\", \"nameroot\": \"dea13d39e960dfdad075c55ea61ed207dcada01d\", \"nameext\": \"\", \"class\": \"File\", \"checksum\": \"sha1$47a013e660d408619d894b20806b1d5086aab03b\", \"size\": 13, \"http://commonwl.org/cwltool#generation\": 0, \"id\": \"example_output\", \"result_counter\": 0}}"
]
},
"metadata": {
@@ -119,6 +119,289 @@
"message: Hello world!"
]
},
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Add data in memory"
+ ]
+ },
+ {
+ "data": {
+ "application/json": {
+ "example_output": {
+ "basename": "07d090896bde77c6d2de664ba9d9e32dd23c4133",
+ "checksum": "sha1$47a013e660d408619d894b20806b1d5086aab03b",
+ "class": "File",
+ "http://commonwl.org/cwltool#generation": 0,
+ "id": "example_output",
+ "location": "file:///private/tmp/CWLKERNEL_DATA/3991bf77-e2c9-41a9-ad5a-028113e1d2a3/runtime_data/07d090896bde77c6d2de664ba9d9e32dd23c4133",
+ "nameext": "",
+ "nameroot": "07d090896bde77c6d2de664ba9d9e32dd23c4133",
+ "result_counter": 0,
+ "size": 13
+ }
+ },
+ "text/plain": [
+ "{\"example_output\": {\"location\": \"file:///private/tmp/CWLKERNEL_DATA/3991bf77-e2c9-41a9-ad5a-028113e1d2a3/runtime_data/07d090896bde77c6d2de664ba9d9e32dd23c4133\", \"basename\": \"07d090896bde77c6d2de664ba9d9e32dd23c4133\", \"nameroot\": \"07d090896bde77c6d2de664ba9d9e32dd23c4133\", \"nameext\": \"\", \"class\": \"File\", \"checksum\": \"sha1$47a013e660d408619d894b20806b1d5086aab03b\", \"size\": 13, \"http://commonwl.org/cwltool#generation\": 0, \"id\": \"example_output\", \"result_counter\": 0}}"
+ ]
+ },
+ "metadata": {
+ "application/json": {
+ "expanded": false,
+ "root": "root"
+ }
+ },
+ "output_type": "display_data"
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\n",
+ "Provenance Stores in directory /Users/dks/Workspaces/CWLKernel/examples/provenance\n",
+ ": "
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "provenance/bagit.txt"
+ ],
+ "text/plain": [
+ "full path...."
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "provenance/bag-info.txt"
+ ],
+ "text/plain": [
+ "full path...."
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "provenance/manifest-sha1.txt"
+ ],
+ "text/plain": [
+ "full path...."
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "provenance/tagmanifest-sha256.txt"
+ ],
+ "text/plain": [
+ "full path...."
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "provenance/tagmanifest-sha1.txt"
+ ],
+ "text/plain": [
+ "full path...."
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "provenance/tagmanifest-sha512.txt"
+ ],
+ "text/plain": [
+ "full path...."
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "provenance/snapshot/echo.cwl"
+ ],
+ "text/plain": [
+ "full path...."
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "provenance/workflow/primary-job.json"
+ ],
+ "text/plain": [
+ "full path...."
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "provenance/workflow/primary-output.json"
+ ],
+ "text/plain": [
+ "full path...."
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "provenance/data/d3/d3486ae9136e7856bc42212385ea797094475802"
+ ],
+ "text/plain": [
+ "full path...."
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "provenance/data/47/47a013e660d408619d894b20806b1d5086aab03b"
+ ],
+ "text/plain": [
+ "full path...."
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "provenance/metadata/manifest.json"
+ ],
+ "text/plain": [
+ "full path...."
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "provenance/metadata/logs/engine.6b9569de-e707-489e-afa7-518292f93e87.txt"
+ ],
+ "text/plain": [
+ "full path...."
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "provenance/metadata/provenance/primary.cwlprov.jsonld"
+ ],
+ "text/plain": [
+ "full path...."
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "provenance/metadata/provenance/primary.cwlprov.json"
+ ],
+ "text/plain": [
+ "full path...."
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "provenance/metadata/provenance/primary.cwlprov.xml"
+ ],
+ "text/plain": [
+ "full path...."
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "provenance/metadata/provenance/primary.cwlprov.nt"
+ ],
+ "text/plain": [
+ "full path...."
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "provenance/metadata/provenance/primary.cwlprov.ttl"
+ ],
+ "text/plain": [
+ "full path...."
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "provenance/metadata/provenance/primary.cwlprov.provn"
+ ],
+ "text/plain": [
+ "full path...."
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "% executeWithProvenance echo\n",
+ "message: Hello world!"
+ ]
+ },
{
"cell_type": "code",
"execution_count": 3,
diff --git a/examples/parameter_reference.ipynb b/examples/parameter_reference.ipynb
index 1babaee..9828467 100644
--- a/examples/parameter_reference.ipynb
+++ b/examples/parameter_reference.ipynb
@@ -27,6 +27,26 @@
"cell_type": "code",
"execution_count": 1,
"metadata": {},
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "a goodbye.txt\n"
+ ]
+ }
+ ],
+ "source": [
+ "% system \n",
+ "rm -f hello.tar goodbye.txt\n",
+ "touch goodbye.txt\n",
+ "tar -cvf hello.tar goodbye.txt"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "metadata": {},
"outputs": [
{
"name": "stdout",
@@ -61,22 +81,50 @@
},
{
"cell_type": "code",
- "execution_count": 2,
+ "execution_count": 3,
"metadata": {},
"outputs": [
{
- "name": "stderr",
+ "name": "stdout",
"output_type": "stream",
"text": [
- "FileNotFoundError: /private/tmp/CWLKERNEL_DATA/123c0738-dab8-4809-9d86-4267f0080da4/runtime_data/hello.tar"
+ "Add data in memory"
]
+ },
+ {
+ "data": {
+ "application/json": {
+ "extracted_file": {
+ "basename": "goodbye.txt",
+ "checksum": "sha1$da39a3ee5e6b4b0d3255bfef95601890afd80709",
+ "class": "File",
+ "http://commonwl.org/cwltool#generation": 0,
+ "id": "extracted_file",
+ "location": "file:///private/tmp/CWLKERNEL_DATA/dc9738cd-5710-4f75-902f-d1fac3bdf9d6/runtime_data/goodbye.txt",
+ "nameext": ".txt",
+ "nameroot": "goodbye",
+ "result_counter": 0,
+ "size": 0
+ }
+ },
+ "text/plain": [
+ "{\"extracted_file\": {\"location\": \"file:///private/tmp/CWLKERNEL_DATA/dc9738cd-5710-4f75-902f-d1fac3bdf9d6/runtime_data/goodbye.txt\", \"basename\": \"goodbye.txt\", \"nameroot\": \"goodbye\", \"nameext\": \".txt\", \"class\": \"File\", \"checksum\": \"sha1$da39a3ee5e6b4b0d3255bfef95601890afd80709\", \"size\": 0, \"http://commonwl.org/cwltool#generation\": 0, \"id\": \"extracted_file\", \"result_counter\": 0}}"
+ ]
+ },
+ "metadata": {
+ "application/json": {
+ "expanded": false,
+ "root": "root"
+ }
+ },
+ "output_type": "display_data"
}
],
"source": [
"% execute tar\n",
"tarfile:\n",
" class: File\n",
- " path: hello.tar\n",
+ " location: hello.tar\n",
"extractfile: goodbye.txt"
]
}
diff --git a/tests/test_CWLKernel.py b/tests/test_CWLKernel.py
index 9af0677..0d0403e 100644
--- a/tests/test_CWLKernel.py
+++ b/tests/test_CWLKernel.py
@@ -1,6 +1,7 @@
import json
import logging
import os
+import shutil
import tarfile
import tempfile
import unittest
@@ -79,6 +80,11 @@ def get_data():
self.assertTrue(full_path.startswith(kernel._results_manager.ROOT_DIRECTORY), 'output is in a wrong directory')
self.assertTrue(basename, 'hello.txt')
+ session_dir = kernel._session_dir
+ self.assertTrue(os.path.isdir(session_dir))
+ kernel.__del__()
+ self.assertFalse(os.path.isdir(session_dir))
+
def test_get_past_results_without_input(self):
from cwlkernel.CWLKernel import CWLKernel
kernel = CWLKernel()
@@ -933,6 +939,67 @@ def test_view_magic_command(self):
'image/svg+xml',
responses[-1][0][2]['data'])
+ def test_system_magic_command(self):
+ kernel = CWLKernel()
+ # cancel send_response
+ responses = []
+ kernel.send_response = lambda *args, **kwargs: responses.append((args, kwargs))
+ self.assertDictEqual(
+ {'status': 'ok', 'execution_count': 0, 'payload': [], 'user_expressions': {}},
+ kernel.do_execute("% system echo 'Hello World'")
+ )
+ self.assertDictEqual(
+ {'name': 'stdout', 'text': 'Hello World\n'},
+ responses[-1][0][2],
+ )
+
+ self.assertDictEqual(
+ {'status': 'ok', 'execution_count': 0, 'payload': [], 'user_expressions': {}},
+ kernel.do_execute('% system ls ERROR')
+ )
+
+ self.assertEqual(
+ 'stderr',
+ responses[-1][0][2]['name'],
+ )
+
+ def test_execute_with_provenance(self):
+ kernel = CWLKernel()
+ # cancel send_response
+ responses = []
+ kernel.send_response = lambda *args, **kwargs: responses.append((args, kwargs))
+
+ yaml = YAML(typ='safe')
+
+ with open(os.sep.join([self.cwl_directory, 'echo.cwl'])) as f:
+ workflow_str = f.read()
+ self.assertDictEqual(
+ {'status': 'ok', 'execution_count': 0, 'payload': [], 'user_expressions': {}},
+ kernel.do_execute(workflow_str, False)
+ )
+ self.assertIsNotNone(kernel._workflow_repository.get_by_id(yaml.load(workflow_str)['id']))
+
+ with open(os.sep.join([self.data_directory, 'echo-job.yml'])) as f:
+ data = '\n'.join(["% executeWithProvenance echo", f.read()])
+
+ self.assertDictEqual(
+ {'status': 'ok', 'execution_count': 0, 'payload': [], 'user_expressions': {}},
+ kernel.do_execute(data)
+ )
+
+ provenance_directory = list(filter(
+ lambda r: 'text' in r[0][2] and "Provenance stored in directory" in r[0][2]['text'],
+ responses
+ ))[0][0][2]['text'].split()[-1]
+ print(provenance_directory)
+ self.assertTrue(os.path.isdir(provenance_directory))
+ self.assertTrue(os.path.isdir(os.path.join(provenance_directory, 'data')))
+ self.assertTrue(os.path.isdir(os.path.join(provenance_directory, 'metadata')))
+ self.assertTrue(os.path.isdir(os.path.join(provenance_directory, 'snapshot')))
+ self.assertTrue(os.path.isfile(os.path.join(provenance_directory, 'snapshot', 'echo.cwl')))
+ self.assertTrue(os.path.isdir(os.path.join(provenance_directory, 'workflow')))
+ shutil.rmtree(provenance_directory)
+
if __name__ == '__main__':
unittest.main()
diff --git a/tests/test_CoreExecutor.py b/tests/test_CoreExecutor.py
index 97ee450..d77c2a5 100644
--- a/tests/test_CoreExecutor.py
+++ b/tests/test_CoreExecutor.py
@@ -22,17 +22,18 @@ def setUpClass(cls) -> None:
def test_executor_execute(self):
# That tests fails only when is called through PyCharm
file_manager = IOFileManager(self.kernel_root_directory)
- executor = CoreExecutor(file_manager)
+ executor = CoreExecutor(file_manager, None)
workflow_path = os.sep.join([self.cwl_directory, 'essential_input.cwl'])
executor.set_workflow_path(workflow_path)
with open(os.sep.join([self.data_directory, 'essential_input_data1.yml'])) as f:
data_str = f.read()
executor.set_data([data_str])
try:
- execution_id, new_files, exception = executor.execute()
+ execution_id, new_files, exception, research_object = executor.execute()
self.assertIsNotNone(execution_id)
self.assertDictEqual(new_files, {})
self.assertIsNone(exception, 'An exception occurred while executing workflow')
+ self.assertIsNone(research_object, 'Research object must be none when provenance is disabled')
except Exception:
self.fail("execution failed")
diff --git a/tests/test_IOManager.py b/tests/test_IOManager.py
index 5d5e9d9..f553e57 100644
--- a/tests/test_IOManager.py
+++ b/tests/test_IOManager.py
@@ -10,9 +10,8 @@
class TestIOManager(unittest.TestCase):
root_directory: str
- @classmethod
- def setUpClass(cls) -> None:
- cls.root_directory = tempfile.mkdtemp()
+ def setUp(self) -> None:
+ self.root_directory = tempfile.mkdtemp()
def test_read(self):
fd, filename = tempfile.mkstemp(prefix='CWLJNKernelUnittests_', dir=self.root_directory)
@@ -47,15 +46,18 @@ def test_append_files(self):
self.assertListEqual(
[os.path.basename(f) for f in new_files],
- [os.path.basename(f) for f in os.listdir(self.root_directory) if os.path.isfile(os.path.join(self.root_directory, f))]
+ [os.path.basename(f) for f in os.listdir(self.root_directory) if
+ os.path.isfile(os.path.join(self.root_directory, f))]
)
with open(new_files[0]) as f:
copy_text = f.read()
self.assertEqual(copy_text, 'tmp text')
- @classmethod
- def tearDownClass(cls) -> None:
- shutil.rmtree(cls.root_directory)
+ def tearDown(self) -> None:
+ try:
+ shutil.rmtree(self.root_directory)
+ except:
+ pass
if __name__ == '__main__':