From fbb3ca2be3b21776420812c812725c1ae2cfce92 Mon Sep 17 00:00:00 2001 From: Giannis Doukas Date: Wed, 1 Jul 2020 21:57:45 +0100 Subject: [PATCH 1/9] fix new lines after the docs in the autogerated scirpt --- ipython2cwl/repo2cwl.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/ipython2cwl/repo2cwl.py b/ipython2cwl/repo2cwl.py index 1cd536a..3a9ab37 100644 --- a/ipython2cwl/repo2cwl.py +++ b/ipython2cwl/repo2cwl.py @@ -56,8 +56,7 @@ def _store_jn_as_script(notebook_path: str, git_directory_absolute_path: str, bi 'DO NOT EDIT THIS FILE', 'THIS FILE IS AUTO-GENERATED BY THE ipython2cwl.', 'FOR MORE INFORMATION CHECK https://github.com/giannisdoukas/ipython2cwl', - '\n\n', - '"""', + '"""\n\n', converter._wrap_script_to_method(converter._tree, converter._variables) ]) with open(script_absolute_name, 'w') as fd: From 667f53d9c2e3e19f393161e789769e8ebba1a2bf Mon Sep 17 00:00:00 2001 From: Giannis Doukas Date: Wed, 1 Jul 2020 23:16:28 +0100 Subject: [PATCH 2/9] init dumpables --- ipython2cwl/cwltoolextractor.py | 66 ++++++++++++++++++++------------- ipython2cwl/iotypes.py | 29 ++++++++++++--- tests/test_cwltoolextractor.py | 53 ++++++++++++++++++++++++++ 3 files changed, 117 insertions(+), 31 deletions(-) diff --git a/ipython2cwl/cwltoolextractor.py b/ipython2cwl/cwltoolextractor.py index 861c861..b5992a0 100644 --- a/ipython2cwl/cwltoolextractor.py +++ b/ipython2cwl/cwltoolextractor.py @@ -6,14 +6,15 @@ import tempfile from collections import namedtuple from pathlib import Path -from typing import Dict, Any +from typing import Dict, Any, List import astor import nbconvert import yaml from nbformat.notebooknode import NotebookNode -from .iotypes import CWLFilePathInput, CWLBooleanInput, CWLIntInput, CWLStringInput, CWLFilePathOutput +from .iotypes import CWLFilePathInput, CWLBooleanInput, CWLIntInput, CWLStringInput, CWLFilePathOutput, CWLDumpableFile, \ + CWLDumpableBinaryFile from .requirements_manager import RequirementsManager with open(os.sep.join([os.path.abspath(os.path.dirname(__file__)), 'templates', 'template.dockerfile'])) as f: @@ -21,6 +22,11 @@ with open(os.sep.join([os.path.abspath(os.path.dirname(__file__)), 'templates', 'template.setup'])) as f: SETUP_TEMPLATE = f.read() +_VariableNameTypePair = namedtuple( + 'VariableNameTypePair', + ['name', 'cwl_typeof', 'argparse_typeof', 'required', 'is_input', 'is_output', 'value'] +) + # TODO: check if supports recursion if main function exists @@ -55,9 +61,15 @@ class AnnotatedVariablesExtractor(ast.NodeTransformer): CWLFilePathOutput.__name__ } + dumpable_mapper = { + (CWLDumpableFile.__name__,): "with open('{var_name}', 'w') as f:\n\tf.write({var_name})", + (CWLDumpableBinaryFile.__name__,): "with open('{var_name}', 'wb') as f:\n\tf.write({var_name})", + } + def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) - self.extracted_nodes = [] + self.extracted_variables: List = [] + self.to_dump: List = [] def __get_annotation__(self, type_annotation): annotation = None @@ -77,15 +89,27 @@ def visit_AnnAssign(self, node): annotation = self.__get_annotation__(node.annotation) if annotation in self.input_type_mapper: mapper = self.input_type_mapper[annotation] - self.extracted_nodes.append( - (node, mapper[0], mapper[1], not mapper[0].endswith('?'), True, False) + self.extracted_variables.append(_VariableNameTypePair( + node.target.id, mapper[0], mapper[1], not mapper[0].endswith('?'), True, False, None) ) return None - + elif annotation in self.dumpable_mapper: + dump_tree = ast.parse(self.dumpable_mapper[annotation].format(var_name=node.target.id)) + self.to_dump.append(dump_tree.body) + self.extracted_variables.append(_VariableNameTypePair( + node.target.id, None, None, None, False, True, node.target.id) + ) + # removing type annotation + return ast.Assign( + col_offset=node.col_offset, + lineno=node.lineno, + targets=[node.target], + value=node.value + ) elif (isinstance(node.annotation, ast.Name) and node.annotation.id in self.output_type_mapper) or \ (isinstance(node.annotation, ast.Str) and node.annotation.s in self.output_type_mapper): - self.extracted_nodes.append( - (node, None, None, None, False, True) + self.extracted_variables.append(_VariableNameTypePair( + node.target.id, None, None, None, False, True, node.value.s) ) # removing type annotation return ast.Assign( @@ -123,12 +147,6 @@ class AnnotatedIPython2CWLToolConverter: """ _code: str - - _VariableNameTypePair = namedtuple( - 'VariableNameTypePair', - ['name', 'cwl_typeof', 'argparse_typeof', 'required', 'is_input', 'is_output', 'value'] - ) - """The annotated python code to convert.""" def __init__(self, annotated_ipython_code: str): @@ -137,19 +155,15 @@ def __init__(self, annotated_ipython_code: str): self._code = annotated_ipython_code extractor = AnnotatedVariablesExtractor() - self._tree = ast.fix_missing_locations(extractor.visit(ast.parse(self._code))) + self._tree = extractor.visit(ast.parse(self._code)) + [self._tree.body.extend(d) for d in extractor.to_dump] + self._tree = ast.fix_missing_locations(self._tree) self._variables = [] - for node, cwl_type, click_type, required, is_input, is_output in extractor.extracted_nodes: - if is_input: - self._variables.append( - self._VariableNameTypePair(node.target.id, cwl_type, click_type, required, is_input, is_output, - None) - ) - if is_output: - self._variables.append( - self._VariableNameTypePair(node.target.id, cwl_type, click_type, required, is_input, is_output, - node.value.s) - ) + for variable in extractor.extracted_variables: # type: _VariableNameTypePair + if variable.is_input: + self._variables.append(variable) + if variable.is_output: + self._variables.append(variable) @classmethod def from_jupyter_notebook_node(cls, node: NotebookNode) -> 'AnnotatedIPython2CWLToolConverter': diff --git a/ipython2cwl/iotypes.py b/ipython2cwl/iotypes.py index 8cb6688..3f64d39 100644 --- a/ipython2cwl/iotypes.py +++ b/ipython2cwl/iotypes.py @@ -1,19 +1,38 @@ +class _CWLInput: + pass + + +class CWLFilePathInput(_CWLInput): + pass + + +class CWLBooleanInput(_CWLInput): + pass + + +class CWLStringInput(_CWLInput): + pass + + +class CWLIntInput(_CWLInput): + pass + -class CWLFilePathInput: +class _CWLOutput: pass -class CWLBooleanInput: +class CWLFilePathOutput(_CWLOutput): pass -class CWLStringInput: +class _CWLDumpable(_CWLOutput): pass -class CWLIntInput: +class CWLDumpableFile(_CWLDumpable): pass -class CWLFilePathOutput: +class CWLDumpableBinaryFile(_CWLDumpable): pass diff --git a/tests/test_cwltoolextractor.py b/tests/test_cwltoolextractor.py index da573ad..b6332c5 100644 --- a/tests/test_cwltoolextractor.py +++ b/tests/test_cwltoolextractor.py @@ -377,3 +377,56 @@ def test_AnnotatedIPython2CWLToolConverter_optional_array_input(self): self.assertListEqual([], AnnotatedIPython2CWLToolConverter(os.linesep.join([ 'x1: "RANDOM CHARACTERS!!!!!!" = True' ]))._variables) + + def test_AnnotatedIPython2CWLToolConverter_dumpables(self): + script = os.linesep.join([ + 'message: CWLDumpableFile = "this is a text from a dumpable"', + 'message2: "CWLDumpableFile" = "this is a text from a dumpable 2"', + 'binary_message: CWLDumpableBinaryFile = b"this is a text from a binary dumpable"', + 'print("Message:", message)', + 'print(b"Binary Message:" + binary_message)', + ]) + converter = AnnotatedIPython2CWLToolConverter(script) + generated_script = AnnotatedIPython2CWLToolConverter._wrap_script_to_method( + converter._tree, converter._variables + ) + for f in ['message', 'binary_message', 'message2']: + try: + os.remove(f) + except FileNotFoundError: + pass + exec(generated_script) + print(generated_script) + locals()['main']() + with open('message') as f: + self.assertEqual('this is a text from a dumpable', f.read()) + with open('message2') as f: + self.assertEqual('this is a text from a dumpable 2', f.read()) + with open('binary_message', 'rb') as f: + self.assertEqual(b'this is a text from a binary dumpable', f.read()) + + cwl_tool = converter.cwl_command_line_tool() + print(cwl_tool) + self.assertDictEqual( + { + 'message': { + 'type': 'File', + 'outputBinding': { + 'glob': 'message' + } + }, + 'message2': { + 'type': 'File', + 'outputBinding': { + 'glob': 'message2' + } + }, + 'binary_message': { + 'type': 'File', + 'outputBinding': { + 'glob': 'binary_message' + } + } + }, + cwl_tool['outputs'] + ) From 12028efb9c312d028aee755a6ac4d256f5dcc206 Mon Sep 17 00:00:00 2001 From: Giannis Doukas Date: Wed, 1 Jul 2020 23:16:57 +0100 Subject: [PATCH 3/9] rm files generated from tests --- .gitignore | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.gitignore b/.gitignore index 3ee1292..56a1207 100644 --- a/.gitignore +++ b/.gitignore @@ -247,3 +247,6 @@ tmp.py cwlbuild /tests/repo-like/result.yaml /tests/repo-like/messages.txt +/tests/binary_message +/tests/message +/tests/message2 From 6853f88fc397a705ccc4f390c64c2566cd2def14 Mon Sep 17 00:00:00 2001 From: Giannis Doukas Date: Wed, 1 Jul 2020 23:28:07 +0100 Subject: [PATCH 4/9] fix code style --- ipython2cwl/cwltoolextractor.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ipython2cwl/cwltoolextractor.py b/ipython2cwl/cwltoolextractor.py index b5992a0..d3344c8 100644 --- a/ipython2cwl/cwltoolextractor.py +++ b/ipython2cwl/cwltoolextractor.py @@ -13,8 +13,8 @@ import yaml from nbformat.notebooknode import NotebookNode -from .iotypes import CWLFilePathInput, CWLBooleanInput, CWLIntInput, CWLStringInput, CWLFilePathOutput, CWLDumpableFile, \ - CWLDumpableBinaryFile +from .iotypes import CWLFilePathInput, CWLBooleanInput, CWLIntInput, CWLStringInput, CWLFilePathOutput, \ + CWLDumpableFile, CWLDumpableBinaryFile from .requirements_manager import RequirementsManager with open(os.sep.join([os.path.abspath(os.path.dirname(__file__)), 'templates', 'template.dockerfile'])) as f: From 398226092125c5c4c9960e87ecceb7f1c19b4e85 Mon Sep 17 00:00:00 2001 From: Giannis Doukas Date: Thu, 2 Jul 2020 16:33:08 +0100 Subject: [PATCH 5/9] add custom dumpables functionality --- ipython2cwl/cwltoolextractor.py | 65 ++++++++++++++++++++++++++------- ipython2cwl/iotypes.py | 20 ++++++---- test-requirements.txt | 3 +- tests/test_cwltoolextractor.py | 44 ++++++++++++++++++++++ 4 files changed, 111 insertions(+), 21 deletions(-) diff --git a/ipython2cwl/cwltoolextractor.py b/ipython2cwl/cwltoolextractor.py index d3344c8..65e1d6d 100644 --- a/ipython2cwl/cwltoolextractor.py +++ b/ipython2cwl/cwltoolextractor.py @@ -5,6 +5,7 @@ import tarfile import tempfile from collections import namedtuple +from copy import deepcopy from pathlib import Path from typing import Dict, Any, List @@ -14,7 +15,7 @@ from nbformat.notebooknode import NotebookNode from .iotypes import CWLFilePathInput, CWLBooleanInput, CWLIntInput, CWLStringInput, CWLFilePathOutput, \ - CWLDumpableFile, CWLDumpableBinaryFile + CWLDumpableFile, CWLDumpableBinaryFile, CWLDumpable from .requirements_manager import RequirementsManager with open(os.sep.join([os.path.abspath(os.path.dirname(__file__)), 'templates', 'template.dockerfile'])) as f: @@ -64,6 +65,7 @@ class AnnotatedVariablesExtractor(ast.NodeTransformer): dumpable_mapper = { (CWLDumpableFile.__name__,): "with open('{var_name}', 'w') as f:\n\tf.write({var_name})", (CWLDumpableBinaryFile.__name__,): "with open('{var_name}', 'wb') as f:\n\tf.write({var_name})", + (CWLDumpable.__name__, CWLDumpable.dump.__name__): None, } def __init__(self, *args, **kwargs): @@ -82,6 +84,8 @@ def __get_annotation__(self, type_annotation): annotation = self.__get_annotation__(ann_expr.value) elif isinstance(type_annotation, ast.Subscript): annotation = (type_annotation.value.id, *self.__get_annotation__(type_annotation.slice.value)) + elif isinstance(type_annotation, ast.Call): + annotation = (type_annotation.func.value.id, type_annotation.func.attr) return annotation def visit_AnnAssign(self, node): @@ -94,18 +98,53 @@ def visit_AnnAssign(self, node): ) return None elif annotation in self.dumpable_mapper: - dump_tree = ast.parse(self.dumpable_mapper[annotation].format(var_name=node.target.id)) - self.to_dump.append(dump_tree.body) - self.extracted_variables.append(_VariableNameTypePair( - node.target.id, None, None, None, False, True, node.target.id) - ) - # removing type annotation - return ast.Assign( - col_offset=node.col_offset, - lineno=node.lineno, - targets=[node.target], - value=node.value - ) + dumper = self.dumpable_mapper[annotation] + if dumper is not None: + dump_tree = ast.parse(dumper.format(var_name=node.target.id)) + self.to_dump.append(dump_tree.body) + self.extracted_variables.append(_VariableNameTypePair( + node.target.id, None, None, None, False, True, node.target.id) + ) + # removing type annotation + return ast.Assign( + col_offset=node.col_offset, + lineno=node.lineno, + targets=[node.target], + value=node.value + ) + else: + load_ctx = ast.Load() + func_name = deepcopy(node.annotation.args[0].value) + func_name.ctx = load_ctx + ast.fix_missing_locations(func_name) + + new_dump_node = ast.Expr( + col_offset=0, lineno=0, + value=ast.Call( + args=node.annotation.args[1:], + col_offset=0, + func=ast.Attribute( + attr=node.annotation.args[0].attr, + col_offset=0, + ctx=load_ctx, + lineno=0, + value=func_name, + ), + keywords=node.annotation.keywords + ) + ) + ast.fix_missing_locations(new_dump_node) + self.to_dump.append([new_dump_node]) + self.extracted_variables.append(_VariableNameTypePair( + node.target.id, None, None, None, False, True, node.annotation.args[1].s) + ) + # removing type annotation + return ast.Assign( + col_offset=node.col_offset, + lineno=node.lineno, + targets=[node.target], + value=node.value + ) elif (isinstance(node.annotation, ast.Name) and node.annotation.id in self.output_type_mapper) or \ (isinstance(node.annotation, ast.Str) and node.annotation.s in self.output_type_mapper): self.extracted_variables.append(_VariableNameTypePair( diff --git a/ipython2cwl/iotypes.py b/ipython2cwl/iotypes.py index 3f64d39..c0b31e0 100644 --- a/ipython2cwl/iotypes.py +++ b/ipython2cwl/iotypes.py @@ -1,8 +1,11 @@ +from typing import Callable + + class _CWLInput: pass -class CWLFilePathInput(_CWLInput): +class CWLFilePathInput(str, _CWLInput): pass @@ -10,7 +13,7 @@ class CWLBooleanInput(_CWLInput): pass -class CWLStringInput(_CWLInput): +class CWLStringInput(str, _CWLInput): pass @@ -22,17 +25,20 @@ class _CWLOutput: pass -class CWLFilePathOutput(_CWLOutput): +class CWLFilePathOutput(str, _CWLOutput): pass -class _CWLDumpable(_CWLOutput): - pass +class CWLDumpable(_CWLOutput): + + @classmethod + def dump(cls, dumper: Callable, *args, **kwargs): + return _CWLOutput -class CWLDumpableFile(_CWLDumpable): +class CWLDumpableFile(CWLDumpable): pass -class CWLDumpableBinaryFile(_CWLDumpable): +class CWLDumpableBinaryFile(CWLDumpable): pass diff --git a/test-requirements.txt b/test-requirements.txt index 06fa332..5896fc7 100644 --- a/test-requirements.txt +++ b/test-requirements.txt @@ -4,4 +4,5 @@ coveralls>=2.0.0 virtualenv>=3.1.0 gitpython>=3.1.3 docker>=4.2.1 -git+https://github.com/giannisdoukas/cwltool.git#egg=cwltool \ No newline at end of file +git+https://github.com/giannisdoukas/cwltool.git#egg=cwltool +pandas==1.0.5 diff --git a/tests/test_cwltoolextractor.py b/tests/test_cwltoolextractor.py index b6332c5..3c50fa6 100644 --- a/tests/test_cwltoolextractor.py +++ b/tests/test_cwltoolextractor.py @@ -430,3 +430,47 @@ def test_AnnotatedIPython2CWLToolConverter_dumpables(self): }, cwl_tool['outputs'] ) + + def test_AnnotatedIPython2CWLToolConverter_custom_dumpables(self): + script = os.linesep.join([ + 'import pandas', + 'from ipython2cwl.iotypes import CWLDumpable', + 'd: CWLDumpable.dump(d.to_csv, "dumpable.csv", sep="\\t", index=False) = pandas.DataFrame([[1,2,3], [4,5,6], [7,8,9]])' + ]) + converter = AnnotatedIPython2CWLToolConverter(script) + generated_script = AnnotatedIPython2CWLToolConverter._wrap_script_to_method( + converter._tree, converter._variables + ) + for f in ["dumpable.csv"]: + try: + os.remove(f) + except FileNotFoundError: + pass + exec(generated_script) + print(generated_script) + locals()['main']() + import pandas + data_file = pandas.read_csv('dumpable.csv', sep="\t") + self.assertListEqual( + [[0, 0, 0], [0, 0, 0], [0, 0, 0]], + (data_file.to_numpy() - pandas.DataFrame([[1, 2, 3], [4, 5, 6], [7, 8, 9]]).to_numpy()).tolist() + ) + + cwl_tool = converter.cwl_command_line_tool() + print(cwl_tool) + self.assertDictEqual( + { + 'd': { + 'type': 'File', + 'outputBinding': { + 'glob': 'dumpable.csv' + } + }, + }, + cwl_tool['outputs'] + ) + for f in ["dumpable.csv"]: + try: + os.remove(f) + except FileNotFoundError: + pass From a39f33632705e4367b22c320da91cc2eb4ca8a12 Mon Sep 17 00:00:00 2001 From: Giannis Doukas Date: Thu, 2 Jul 2020 20:19:02 +0100 Subject: [PATCH 6/9] add documentation --- docs/index.rst | 51 +++++++++++++++--- ipython2cwl/iotypes.py | 118 ++++++++++++++++++++++++++++++++++++++++- 2 files changed, 160 insertions(+), 9 deletions(-) diff --git a/docs/index.rst b/docs/index.rst index 05b8055..4490e8f 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -30,18 +30,53 @@ IPython2CWL is a tool for converting `IPython `_ Jupyter N ------------------------------------------------------------------------------------------ IPython2CWL is based on `repo2docker `_, the same tool -used by `mybinder `_. Now, by writing Jupyter Notebook and publish them, including repo2docker -configuration, the community can not only execute the notebooks remotely but also to use them as steps in scientific +used by `mybinder `_. Now, by writing Jupyter Notebook and publishing them, including repo2docker +configuration, the community can not only execute the notebooks remotely but can also use them as steps in scientific workflows. -* Install ipython2cwl: :code:`pip install python2cwl` +* `Install ipython2cwl `_: :code:`pip install ipython2cwl` * Ensure that you have docker running * Create a directory to store the generated cwl files, for example cwlbuild * Execute :code:`jupyter repo2cwl https://github.com/giannisdoukas/cwl-annotated-jupyter-notebook.git -o cwlbuild` -Indices and tables -================== +HOW IT WORKS? +------------------ + +IPython2CWL parses each IPython notebook and finds the variables with the typing annotations. For each input variable, +the assigment of that variable will be generalised as a command line argument. Each output variable will be mapped +in the cwl description as an output file. + +SUPPORTED TYPES +------------------ + +.. automodule:: ipython2cwl.iotypes + :members: + + +THAT'S COOL! WHAT ABOUT LIST & OPTIONAL ARGUMENTS? +""""""""""""""""""""""""""""""""""""""""""""""""""" + +The basic input data types can be combined with the List and Optional annotations. For example, write the following +annotation: + +.. code-block:: python + + file_inputs: List[CWLFilePathInput] = ['data1.txt', 'data2.txt', 'data3.txt'] + example: Optional[CWLStringInput] = None + + +SEEMS INTERESTING! WHAT ABOUT A DEMO? +---------------------------------------- + +If you would like to see a demo before you want to start annotating your notebooks check here! +`github.com/giannisdoukas/ipython2cwl-demo `_ + + +WHAT IF I WANT TO VALIDATE THAT THE GENERATED SCRIPTS ARE CORRECT? +------------------------------------------------------------------ + +All the generated scripts are stored in the docker image under the directory :code:`/app/cwl/bin`. You can see the list +of the files by running :code:`docker run [IMAGE_ID] find /app/cwl/bin/ -type f`. + + -* :ref:`genindex` -* :ref:`modindex` -* :ref:`search` diff --git a/ipython2cwl/iotypes.py b/ipython2cwl/iotypes.py index c0b31e0..6ae99db 100644 --- a/ipython2cwl/iotypes.py +++ b/ipython2cwl/iotypes.py @@ -1,3 +1,37 @@ +""" + +Basic Data Types +^^^^^^^^^^^^^^^^^ + +Each variable can be an input or an output. The basic data types are: + +* Inputs: + + * CWLFilePathInput + + * CWLBooleanInput + + * CWLStringInput + + * CWLIntInput + +* Outputs: + + * CWLFilePathOutput + + * CWLDumpableFile + + * CWLDumpableBinaryFile + + +Complex Dumpables Types +^^^^^^^^^^^^^^^^^^^^^^^^ + +Dumpables are variables which are able to be written to a file, but the jupyter notebook developer +does not want to write it, for example to avoid the IO overhead. To bypass that, you can use +Dumpables annotation. See :func:`~iotypes.CWLDumpable.dump` for more details. + +""" from typing import Callable @@ -6,18 +40,50 @@ class _CWLInput: class CWLFilePathInput(str, _CWLInput): + """Use that hint to annotate that a variable is a string-path input. You can use the typing annotation + as a string by importing it. At the generated script a command line argument with the name of the variable + will be created and the assignment of value will be generalised. + + >>> dataset1: CWLFilePathInput = './data/data.csv' + >>> dataset2: 'CWLFilePathInput' = './data/data.csv' + + """ pass class CWLBooleanInput(_CWLInput): + """Use that hint to annotate that a variable is a boolean input. You can use the typing annotation + as a string by importing it. At the generated script a command line argument with the name of the variable + will be created and the assignment of value will be generalised. + + >>> dataset1: CWLBooleanInput = True + >>> dataset2: 'CWLBooleanInput' = False + + """ pass class CWLStringInput(str, _CWLInput): + """Use that hint to annotate that a variable is a string input. You can use the typing annotation + as a string by importing it. At the generated script a command line argument with the name of the variable + will be created and the assignment of value will be generalised. + + >>> dataset1: CWLBooleanInput = 'this is a message input' + >>> dataset2: 'CWLBooleanInput' = 'yet another message input' + + """ pass class CWLIntInput(_CWLInput): + """Use that hint to annotate that a variable is a integer input. You can use the typing annotation + as a string by importing it. At the generated script a command line argument with the name of the variable + will be created and the assignment of value will be generalised. + + >>> dataset1: CWLBooleanInput = 1 + >>> dataset2: 'CWLBooleanInput' = 2 + + """ pass @@ -26,19 +92,69 @@ class _CWLOutput: class CWLFilePathOutput(str, _CWLOutput): + """Use that hint to annotate that a variable is a string-path to an output file. You can use the typing annotation + as a string by importing it. The generated file will be mapped as a CWL output. + + >>> filename: CWLBooleanInput = 'data.csv' + + """ pass class CWLDumpable(_CWLOutput): + """Use that class to define custom Dumpables variables.""" @classmethod - def dump(cls, dumper: Callable, *args, **kwargs): + def dump(cls, dumper: Callable, filename, *args, **kwargs): + """ + Set the function to be used to dump the variable to a file. + + >>> import pandas + >>> d: CWLDumpable.dump(d.to_csv, "dumpable.csv", sep="\\t", index=False) = pandas.DataFrame( + ... [[1,2,3], [4,5,6], [7,8,9]] + ... ) + + In that example the converter will add at the end of the script the following line: + >>> d.to_csv("dumpable.csv", sep="\\t", index=False) + + :param dumper: The function that has to be called to write the variable to a file. + :param filename: The name of the generated file. That string must be the first argument + in the dumper function. That file will also be mapped as an output in + the CWL file. + :param args: Any positional arguments you want to pass to dumper after the filename + :param kwargs: Any keyword arguments you want to pass to dumper + """ return _CWLOutput class CWLDumpableFile(CWLDumpable): + """Use that annotation to define that a variable should be dumped to a text file. For example for the annotation: + + >>> data: CWLDumpableFile = "this is text data" + + + the converter will append at the end of the script the following lines: + + + >>> with open('data', 'w') as f: + ... f.write(data) + + + and at the CWL, the data, will be mapped as a output. + """ pass class CWLDumpableBinaryFile(CWLDumpable): + """Use that annotation to define that a variable should be dumped to a binary file. For example for the annotation: + + >>> data: CWLDumpableBinaryFile = b"this is text data" + + the converter will append at the end of the script the following lines: + + >>> with open('data', 'wb') as f: + ... f.write(data) + + and at the CWL, the data, will be mapped as a output. + """ pass From 1b09ddac9c19c85af9930dd15f29577f9c286f63 Mon Sep 17 00:00:00 2001 From: Giannis Doukas Date: Thu, 2 Jul 2020 20:32:40 +0100 Subject: [PATCH 7/9] cleanup --- ipython2cwl/cwltoolextractor.py | 131 ++++++++++++++++---------------- 1 file changed, 67 insertions(+), 64 deletions(-) diff --git a/ipython2cwl/cwltoolextractor.py b/ipython2cwl/cwltoolextractor.py index 65e1d6d..61ac743 100644 --- a/ipython2cwl/cwltoolextractor.py +++ b/ipython2cwl/cwltoolextractor.py @@ -29,8 +29,6 @@ ) -# TODO: check if supports recursion if main function exists - class AnnotatedVariablesExtractor(ast.NodeTransformer): input_type_mapper = { (CWLFilePathInput.__name__,): ( @@ -59,7 +57,7 @@ class AnnotatedVariablesExtractor(ast.NodeTransformer): }} output_type_mapper = { - CWLFilePathOutput.__name__ + (CWLFilePathOutput.__name__,) } dumpable_mapper = { @@ -88,75 +86,80 @@ def __get_annotation__(self, type_annotation): annotation = (type_annotation.func.value.id, type_annotation.func.attr) return annotation + @classmethod + def conv_AnnAssign_to_Assign(cls, node): + return ast.Assign( + col_offset=node.col_offset, + lineno=node.lineno, + targets=[node.target], + value=node.value + ) + + def _visit_input_ann_assign(self, node, annotation): + mapper = self.input_type_mapper[annotation] + self.extracted_variables.append(_VariableNameTypePair( + node.target.id, mapper[0], mapper[1], not mapper[0].endswith('?'), True, False, None) + ) + return None + + def _visit_default_dumper(self, node, dumper): + dump_tree = ast.parse(dumper.format(var_name=node.target.id)) + self.to_dump.append(dump_tree.body) + self.extracted_variables.append(_VariableNameTypePair( + node.target.id, None, None, None, False, True, node.target.id) + ) + return self.conv_AnnAssign_to_Assign(node) + + def _visit_user_defined_dumper(self, node): + load_ctx = ast.Load() + func_name = deepcopy(node.annotation.args[0].value) + func_name.ctx = load_ctx + ast.fix_missing_locations(func_name) + + new_dump_node = ast.Expr( + col_offset=0, lineno=0, + value=ast.Call( + args=node.annotation.args[1:], keywords=node.annotation.keywords, col_offset=0, + func=ast.Attribute( + attr=node.annotation.args[0].attr, + value=func_name, + col_offset=0, ctx=load_ctx, lineno=0, + ), + ) + ) + ast.fix_missing_locations(new_dump_node) + self.to_dump.append([new_dump_node]) + self.extracted_variables.append(_VariableNameTypePair( + node.target.id, None, None, None, False, True, node.annotation.args[1].s) + ) + # removing type annotation + return self.conv_AnnAssign_to_Assign(node) + + def _visit_output_type(self, node): + self.extracted_variables.append(_VariableNameTypePair( + node.target.id, None, None, None, False, True, node.value.s) + ) + # removing type annotation + return ast.Assign( + col_offset=node.col_offset, + lineno=node.lineno, + targets=[node.target], + value=node.value + ) + def visit_AnnAssign(self, node): try: annotation = self.__get_annotation__(node.annotation) if annotation in self.input_type_mapper: - mapper = self.input_type_mapper[annotation] - self.extracted_variables.append(_VariableNameTypePair( - node.target.id, mapper[0], mapper[1], not mapper[0].endswith('?'), True, False, None) - ) - return None + return self._visit_input_ann_assign(node, annotation) elif annotation in self.dumpable_mapper: dumper = self.dumpable_mapper[annotation] if dumper is not None: - dump_tree = ast.parse(dumper.format(var_name=node.target.id)) - self.to_dump.append(dump_tree.body) - self.extracted_variables.append(_VariableNameTypePair( - node.target.id, None, None, None, False, True, node.target.id) - ) - # removing type annotation - return ast.Assign( - col_offset=node.col_offset, - lineno=node.lineno, - targets=[node.target], - value=node.value - ) + return self._visit_default_dumper(node, dumper) else: - load_ctx = ast.Load() - func_name = deepcopy(node.annotation.args[0].value) - func_name.ctx = load_ctx - ast.fix_missing_locations(func_name) - - new_dump_node = ast.Expr( - col_offset=0, lineno=0, - value=ast.Call( - args=node.annotation.args[1:], - col_offset=0, - func=ast.Attribute( - attr=node.annotation.args[0].attr, - col_offset=0, - ctx=load_ctx, - lineno=0, - value=func_name, - ), - keywords=node.annotation.keywords - ) - ) - ast.fix_missing_locations(new_dump_node) - self.to_dump.append([new_dump_node]) - self.extracted_variables.append(_VariableNameTypePair( - node.target.id, None, None, None, False, True, node.annotation.args[1].s) - ) - # removing type annotation - return ast.Assign( - col_offset=node.col_offset, - lineno=node.lineno, - targets=[node.target], - value=node.value - ) - elif (isinstance(node.annotation, ast.Name) and node.annotation.id in self.output_type_mapper) or \ - (isinstance(node.annotation, ast.Str) and node.annotation.s in self.output_type_mapper): - self.extracted_variables.append(_VariableNameTypePair( - node.target.id, None, None, None, False, True, node.value.s) - ) - # removing type annotation - return ast.Assign( - col_offset=node.col_offset, - lineno=node.lineno, - targets=[node.target], - value=node.value - ) + return self._visit_user_defined_dumper(node) + elif annotation in self.output_type_mapper: + return self._visit_output_type(node) except Exception: pass return node From 0e3a5079fac202b6e96ebd68f7cf160828ee89a1 Mon Sep 17 00:00:00 2001 From: Giannis Doukas Date: Thu, 2 Jul 2020 20:34:53 +0100 Subject: [PATCH 8/9] change path in example in readme --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 4468b11..c0b8c4a 100644 --- a/README.md +++ b/README.md @@ -37,7 +37,7 @@ pip install ipython2cwl ### Example ``` -jupyter repo2cwl https://github.com/giannisdoukas/cwl-annotated-jupyter-notebook.git -o cwlbuild +jupyter repo2cwl https://github.com/giannisdoukas/cwl-annotated-jupyter-notebook.git -o . ``` ### Docs From fb7bfc46f9320754c4b5a33b84284bace6302742 Mon Sep 17 00:00:00 2001 From: Giannis Doukas Date: Thu, 2 Jul 2020 20:38:49 +0100 Subject: [PATCH 9/9] update version --- README.md | 2 +- ipython2cwl/__init__.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index c0b8c4a..9f95cea 100644 --- a/README.md +++ b/README.md @@ -23,7 +23,7 @@ with open(result_file, 'w') as f: ``` IPython2CWL is based on [repo2docker](https://github.com/jupyter/repo2docker), the same tool -used by [mybinder](https://mybinder.org/). Now, by writing Jupyter Notebook and publish them, including repo2docker +used by [mybinder](https://mybinder.org/). Now, by writing Jupyter Notebook and publishing them, including repo2docker configuration, the community can not only execute the notebooks remotely but also to use them as steps in scientific workflows. diff --git a/ipython2cwl/__init__.py b/ipython2cwl/__init__.py index 27fdca4..81f0fde 100644 --- a/ipython2cwl/__init__.py +++ b/ipython2cwl/__init__.py @@ -1 +1 @@ -__version__ = "0.0.3" +__version__ = "0.0.4"