diff --git a/.gitignore b/.gitignore
index 3ee1292..56a1207 100644
--- a/.gitignore
+++ b/.gitignore
@@ -247,3 +247,6 @@ tmp.py
cwlbuild
/tests/repo-like/result.yaml
/tests/repo-like/messages.txt
+/tests/binary_message
+/tests/message
+/tests/message2
diff --git a/README.md b/README.md
index 4468b11..9f95cea 100644
--- a/README.md
+++ b/README.md
@@ -23,7 +23,7 @@ with open(result_file, 'w') as f:
```
IPython2CWL is based on [repo2docker](https://github.com/jupyter/repo2docker), the same tool
-used by [mybinder](https://mybinder.org/). Now, by writing Jupyter Notebook and publish them, including repo2docker
+used by [mybinder](https://mybinder.org/). Now, by writing Jupyter Notebook and publishing them, including repo2docker
configuration, the community can not only execute the notebooks remotely but also to use them as steps in scientific
workflows.
@@ -37,7 +37,7 @@ pip install ipython2cwl
### Example
```
-jupyter repo2cwl https://github.com/giannisdoukas/cwl-annotated-jupyter-notebook.git -o cwlbuild
+jupyter repo2cwl https://github.com/giannisdoukas/cwl-annotated-jupyter-notebook.git -o .
```
### Docs
diff --git a/docs/index.rst b/docs/index.rst
index 05b8055..4490e8f 100644
--- a/docs/index.rst
+++ b/docs/index.rst
@@ -30,18 +30,53 @@ IPython2CWL is a tool for converting `IPython `_ Jupyter N
------------------------------------------------------------------------------------------
IPython2CWL is based on `repo2docker `_, the same tool
-used by `mybinder `_. Now, by writing Jupyter Notebook and publish them, including repo2docker
-configuration, the community can not only execute the notebooks remotely but also to use them as steps in scientific
+used by `mybinder `_. Now, by writing Jupyter Notebook and publishing them, including repo2docker
+configuration, the community can not only execute the notebooks remotely but can also use them as steps in scientific
workflows.
-* Install ipython2cwl: :code:`pip install python2cwl`
+* `Install ipython2cwl `_: :code:`pip install ipython2cwl`
* Ensure that you have docker running
* Create a directory to store the generated cwl files, for example cwlbuild
* Execute :code:`jupyter repo2cwl https://github.com/giannisdoukas/cwl-annotated-jupyter-notebook.git -o cwlbuild`
-Indices and tables
-==================
+HOW IT WORKS?
+------------------
+
+IPython2CWL parses each IPython notebook and finds the variables with the typing annotations. For each input variable,
+the assigment of that variable will be generalised as a command line argument. Each output variable will be mapped
+in the cwl description as an output file.
+
+SUPPORTED TYPES
+------------------
+
+.. automodule:: ipython2cwl.iotypes
+ :members:
+
+
+THAT'S COOL! WHAT ABOUT LIST & OPTIONAL ARGUMENTS?
+"""""""""""""""""""""""""""""""""""""""""""""""""""
+
+The basic input data types can be combined with the List and Optional annotations. For example, write the following
+annotation:
+
+.. code-block:: python
+
+ file_inputs: List[CWLFilePathInput] = ['data1.txt', 'data2.txt', 'data3.txt']
+ example: Optional[CWLStringInput] = None
+
+
+SEEMS INTERESTING! WHAT ABOUT A DEMO?
+----------------------------------------
+
+If you would like to see a demo before you want to start annotating your notebooks check here!
+`github.com/giannisdoukas/ipython2cwl-demo `_
+
+
+WHAT IF I WANT TO VALIDATE THAT THE GENERATED SCRIPTS ARE CORRECT?
+------------------------------------------------------------------
+
+All the generated scripts are stored in the docker image under the directory :code:`/app/cwl/bin`. You can see the list
+of the files by running :code:`docker run [IMAGE_ID] find /app/cwl/bin/ -type f`.
+
+
-* :ref:`genindex`
-* :ref:`modindex`
-* :ref:`search`
diff --git a/ipython2cwl/__init__.py b/ipython2cwl/__init__.py
index 27fdca4..81f0fde 100644
--- a/ipython2cwl/__init__.py
+++ b/ipython2cwl/__init__.py
@@ -1 +1 @@
-__version__ = "0.0.3"
+__version__ = "0.0.4"
diff --git a/ipython2cwl/cwltoolextractor.py b/ipython2cwl/cwltoolextractor.py
index 861c861..61ac743 100644
--- a/ipython2cwl/cwltoolextractor.py
+++ b/ipython2cwl/cwltoolextractor.py
@@ -5,15 +5,17 @@
import tarfile
import tempfile
from collections import namedtuple
+from copy import deepcopy
from pathlib import Path
-from typing import Dict, Any
+from typing import Dict, Any, List
import astor
import nbconvert
import yaml
from nbformat.notebooknode import NotebookNode
-from .iotypes import CWLFilePathInput, CWLBooleanInput, CWLIntInput, CWLStringInput, CWLFilePathOutput
+from .iotypes import CWLFilePathInput, CWLBooleanInput, CWLIntInput, CWLStringInput, CWLFilePathOutput, \
+ CWLDumpableFile, CWLDumpableBinaryFile, CWLDumpable
from .requirements_manager import RequirementsManager
with open(os.sep.join([os.path.abspath(os.path.dirname(__file__)), 'templates', 'template.dockerfile'])) as f:
@@ -21,8 +23,11 @@
with open(os.sep.join([os.path.abspath(os.path.dirname(__file__)), 'templates', 'template.setup'])) as f:
SETUP_TEMPLATE = f.read()
+_VariableNameTypePair = namedtuple(
+ 'VariableNameTypePair',
+ ['name', 'cwl_typeof', 'argparse_typeof', 'required', 'is_input', 'is_output', 'value']
+)
-# TODO: check if supports recursion if main function exists
class AnnotatedVariablesExtractor(ast.NodeTransformer):
input_type_mapper = {
@@ -52,12 +57,19 @@ class AnnotatedVariablesExtractor(ast.NodeTransformer):
}}
output_type_mapper = {
- CWLFilePathOutput.__name__
+ (CWLFilePathOutput.__name__,)
+ }
+
+ dumpable_mapper = {
+ (CWLDumpableFile.__name__,): "with open('{var_name}', 'w') as f:\n\tf.write({var_name})",
+ (CWLDumpableBinaryFile.__name__,): "with open('{var_name}', 'wb') as f:\n\tf.write({var_name})",
+ (CWLDumpable.__name__, CWLDumpable.dump.__name__): None,
}
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
- self.extracted_nodes = []
+ self.extracted_variables: List = []
+ self.to_dump: List = []
def __get_annotation__(self, type_annotation):
annotation = None
@@ -70,30 +82,84 @@ def __get_annotation__(self, type_annotation):
annotation = self.__get_annotation__(ann_expr.value)
elif isinstance(type_annotation, ast.Subscript):
annotation = (type_annotation.value.id, *self.__get_annotation__(type_annotation.slice.value))
+ elif isinstance(type_annotation, ast.Call):
+ annotation = (type_annotation.func.value.id, type_annotation.func.attr)
return annotation
+ @classmethod
+ def conv_AnnAssign_to_Assign(cls, node):
+ return ast.Assign(
+ col_offset=node.col_offset,
+ lineno=node.lineno,
+ targets=[node.target],
+ value=node.value
+ )
+
+ def _visit_input_ann_assign(self, node, annotation):
+ mapper = self.input_type_mapper[annotation]
+ self.extracted_variables.append(_VariableNameTypePair(
+ node.target.id, mapper[0], mapper[1], not mapper[0].endswith('?'), True, False, None)
+ )
+ return None
+
+ def _visit_default_dumper(self, node, dumper):
+ dump_tree = ast.parse(dumper.format(var_name=node.target.id))
+ self.to_dump.append(dump_tree.body)
+ self.extracted_variables.append(_VariableNameTypePair(
+ node.target.id, None, None, None, False, True, node.target.id)
+ )
+ return self.conv_AnnAssign_to_Assign(node)
+
+ def _visit_user_defined_dumper(self, node):
+ load_ctx = ast.Load()
+ func_name = deepcopy(node.annotation.args[0].value)
+ func_name.ctx = load_ctx
+ ast.fix_missing_locations(func_name)
+
+ new_dump_node = ast.Expr(
+ col_offset=0, lineno=0,
+ value=ast.Call(
+ args=node.annotation.args[1:], keywords=node.annotation.keywords, col_offset=0,
+ func=ast.Attribute(
+ attr=node.annotation.args[0].attr,
+ value=func_name,
+ col_offset=0, ctx=load_ctx, lineno=0,
+ ),
+ )
+ )
+ ast.fix_missing_locations(new_dump_node)
+ self.to_dump.append([new_dump_node])
+ self.extracted_variables.append(_VariableNameTypePair(
+ node.target.id, None, None, None, False, True, node.annotation.args[1].s)
+ )
+ # removing type annotation
+ return self.conv_AnnAssign_to_Assign(node)
+
+ def _visit_output_type(self, node):
+ self.extracted_variables.append(_VariableNameTypePair(
+ node.target.id, None, None, None, False, True, node.value.s)
+ )
+ # removing type annotation
+ return ast.Assign(
+ col_offset=node.col_offset,
+ lineno=node.lineno,
+ targets=[node.target],
+ value=node.value
+ )
+
def visit_AnnAssign(self, node):
try:
annotation = self.__get_annotation__(node.annotation)
if annotation in self.input_type_mapper:
- mapper = self.input_type_mapper[annotation]
- self.extracted_nodes.append(
- (node, mapper[0], mapper[1], not mapper[0].endswith('?'), True, False)
- )
- return None
-
- elif (isinstance(node.annotation, ast.Name) and node.annotation.id in self.output_type_mapper) or \
- (isinstance(node.annotation, ast.Str) and node.annotation.s in self.output_type_mapper):
- self.extracted_nodes.append(
- (node, None, None, None, False, True)
- )
- # removing type annotation
- return ast.Assign(
- col_offset=node.col_offset,
- lineno=node.lineno,
- targets=[node.target],
- value=node.value
- )
+ return self._visit_input_ann_assign(node, annotation)
+ elif annotation in self.dumpable_mapper:
+ dumper = self.dumpable_mapper[annotation]
+ if dumper is not None:
+ return self._visit_default_dumper(node, dumper)
+ else:
+ return self._visit_user_defined_dumper(node)
+ elif annotation in self.output_type_mapper:
+ return self._visit_output_type(node)
except Exception:
pass
return node
@@ -123,12 +189,6 @@ class AnnotatedIPython2CWLToolConverter:
"""
_code: str
-
- _VariableNameTypePair = namedtuple(
- 'VariableNameTypePair',
- ['name', 'cwl_typeof', 'argparse_typeof', 'required', 'is_input', 'is_output', 'value']
- )
-
"""The annotated python code to convert."""
def __init__(self, annotated_ipython_code: str):
@@ -137,19 +197,15 @@ def __init__(self, annotated_ipython_code: str):
self._code = annotated_ipython_code
extractor = AnnotatedVariablesExtractor()
- self._tree = ast.fix_missing_locations(extractor.visit(ast.parse(self._code)))
+ self._tree = extractor.visit(ast.parse(self._code))
+ [self._tree.body.extend(d) for d in extractor.to_dump]
+ self._tree = ast.fix_missing_locations(self._tree)
self._variables = []
- for node, cwl_type, click_type, required, is_input, is_output in extractor.extracted_nodes:
- if is_input:
- self._variables.append(
- self._VariableNameTypePair(node.target.id, cwl_type, click_type, required, is_input, is_output,
- None)
- )
- if is_output:
- self._variables.append(
- self._VariableNameTypePair(node.target.id, cwl_type, click_type, required, is_input, is_output,
- node.value.s)
- )
+ for variable in extractor.extracted_variables: # type: _VariableNameTypePair
+ if variable.is_input:
+ self._variables.append(variable)
+ if variable.is_output:
+ self._variables.append(variable)
@classmethod
def from_jupyter_notebook_node(cls, node: NotebookNode) -> 'AnnotatedIPython2CWLToolConverter':
diff --git a/ipython2cwl/iotypes.py b/ipython2cwl/iotypes.py
index 8cb6688..6ae99db 100644
--- a/ipython2cwl/iotypes.py
+++ b/ipython2cwl/iotypes.py
@@ -1,19 +1,160 @@
+"""
-class CWLFilePathInput:
+Basic Data Types
+^^^^^^^^^^^^^^^^^
+
+Each variable can be an input or an output. The basic data types are:
+
+* Inputs:
+
+ * CWLFilePathInput
+
+ * CWLBooleanInput
+
+ * CWLStringInput
+
+ * CWLIntInput
+
+* Outputs:
+
+ * CWLFilePathOutput
+
+ * CWLDumpableFile
+
+ * CWLDumpableBinaryFile
+
+
+Complex Dumpables Types
+^^^^^^^^^^^^^^^^^^^^^^^^
+
+Dumpables are variables which are able to be written to a file, but the jupyter notebook developer
+does not want to write it, for example to avoid the IO overhead. To bypass that, you can use
+Dumpables annotation. See :func:`~iotypes.CWLDumpable.dump` for more details.
+
+"""
+from typing import Callable
+
+
+class _CWLInput:
+ pass
+
+
+class CWLFilePathInput(str, _CWLInput):
+ """Use that hint to annotate that a variable is a string-path input. You can use the typing annotation
+ as a string by importing it. At the generated script a command line argument with the name of the variable
+ will be created and the assignment of value will be generalised.
+
+ >>> dataset1: CWLFilePathInput = './data/data.csv'
+ >>> dataset2: 'CWLFilePathInput' = './data/data.csv'
+
+ """
pass
-class CWLBooleanInput:
+class CWLBooleanInput(_CWLInput):
+ """Use that hint to annotate that a variable is a boolean input. You can use the typing annotation
+ as a string by importing it. At the generated script a command line argument with the name of the variable
+ will be created and the assignment of value will be generalised.
+
+ >>> dataset1: CWLBooleanInput = True
+ >>> dataset2: 'CWLBooleanInput' = False
+
+ """
pass
-class CWLStringInput:
+class CWLStringInput(str, _CWLInput):
+ """Use that hint to annotate that a variable is a string input. You can use the typing annotation
+ as a string by importing it. At the generated script a command line argument with the name of the variable
+ will be created and the assignment of value will be generalised.
+
+ >>> dataset1: CWLBooleanInput = 'this is a message input'
+ >>> dataset2: 'CWLBooleanInput' = 'yet another message input'
+
+ """
pass
-class CWLIntInput:
+class CWLIntInput(_CWLInput):
+ """Use that hint to annotate that a variable is a integer input. You can use the typing annotation
+ as a string by importing it. At the generated script a command line argument with the name of the variable
+ will be created and the assignment of value will be generalised.
+
+ >>> dataset1: CWLBooleanInput = 1
+ >>> dataset2: 'CWLBooleanInput' = 2
+
+ """
pass
-class CWLFilePathOutput:
+class _CWLOutput:
+ pass
+
+
+class CWLFilePathOutput(str, _CWLOutput):
+ """Use that hint to annotate that a variable is a string-path to an output file. You can use the typing annotation
+ as a string by importing it. The generated file will be mapped as a CWL output.
+
+ >>> filename: CWLBooleanInput = 'data.csv'
+
+ """
+ pass
+
+
+class CWLDumpable(_CWLOutput):
+ """Use that class to define custom Dumpables variables."""
+
+ @classmethod
+ def dump(cls, dumper: Callable, filename, *args, **kwargs):
+ """
+ Set the function to be used to dump the variable to a file.
+
+ >>> import pandas
+ >>> d: CWLDumpable.dump(d.to_csv, "dumpable.csv", sep="\\t", index=False) = pandas.DataFrame(
+ ... [[1,2,3], [4,5,6], [7,8,9]]
+ ... )
+
+ In that example the converter will add at the end of the script the following line:
+ >>> d.to_csv("dumpable.csv", sep="\\t", index=False)
+
+ :param dumper: The function that has to be called to write the variable to a file.
+ :param filename: The name of the generated file. That string must be the first argument
+ in the dumper function. That file will also be mapped as an output in
+ the CWL file.
+ :param args: Any positional arguments you want to pass to dumper after the filename
+ :param kwargs: Any keyword arguments you want to pass to dumper
+ """
+ return _CWLOutput
+
+
+class CWLDumpableFile(CWLDumpable):
+ """Use that annotation to define that a variable should be dumped to a text file. For example for the annotation:
+
+ >>> data: CWLDumpableFile = "this is text data"
+
+
+ the converter will append at the end of the script the following lines:
+
+
+ >>> with open('data', 'w') as f:
+ ... f.write(data)
+
+
+ and at the CWL, the data, will be mapped as a output.
+ """
+ pass
+
+
+class CWLDumpableBinaryFile(CWLDumpable):
+ """Use that annotation to define that a variable should be dumped to a binary file. For example for the annotation:
+
+ >>> data: CWLDumpableBinaryFile = b"this is text data"
+
+ the converter will append at the end of the script the following lines:
+
+ >>> with open('data', 'wb') as f:
+ ... f.write(data)
+
+ and at the CWL, the data, will be mapped as a output.
+ """
pass
diff --git a/ipython2cwl/repo2cwl.py b/ipython2cwl/repo2cwl.py
index 1cd536a..3a9ab37 100644
--- a/ipython2cwl/repo2cwl.py
+++ b/ipython2cwl/repo2cwl.py
@@ -56,8 +56,7 @@ def _store_jn_as_script(notebook_path: str, git_directory_absolute_path: str, bi
'DO NOT EDIT THIS FILE',
'THIS FILE IS AUTO-GENERATED BY THE ipython2cwl.',
'FOR MORE INFORMATION CHECK https://github.com/giannisdoukas/ipython2cwl',
- '\n\n',
- '"""',
+ '"""\n\n',
converter._wrap_script_to_method(converter._tree, converter._variables)
])
with open(script_absolute_name, 'w') as fd:
diff --git a/test-requirements.txt b/test-requirements.txt
index 06fa332..5896fc7 100644
--- a/test-requirements.txt
+++ b/test-requirements.txt
@@ -4,4 +4,5 @@ coveralls>=2.0.0
virtualenv>=3.1.0
gitpython>=3.1.3
docker>=4.2.1
-git+https://github.com/giannisdoukas/cwltool.git#egg=cwltool
\ No newline at end of file
+git+https://github.com/giannisdoukas/cwltool.git#egg=cwltool
+pandas==1.0.5
diff --git a/tests/test_cwltoolextractor.py b/tests/test_cwltoolextractor.py
index da573ad..3c50fa6 100644
--- a/tests/test_cwltoolextractor.py
+++ b/tests/test_cwltoolextractor.py
@@ -377,3 +377,100 @@ def test_AnnotatedIPython2CWLToolConverter_optional_array_input(self):
self.assertListEqual([], AnnotatedIPython2CWLToolConverter(os.linesep.join([
'x1: "RANDOM CHARACTERS!!!!!!" = True'
]))._variables)
+
+ def test_AnnotatedIPython2CWLToolConverter_dumpables(self):
+ script = os.linesep.join([
+ 'message: CWLDumpableFile = "this is a text from a dumpable"',
+ 'message2: "CWLDumpableFile" = "this is a text from a dumpable 2"',
+ 'binary_message: CWLDumpableBinaryFile = b"this is a text from a binary dumpable"',
+ 'print("Message:", message)',
+ 'print(b"Binary Message:" + binary_message)',
+ ])
+ converter = AnnotatedIPython2CWLToolConverter(script)
+ generated_script = AnnotatedIPython2CWLToolConverter._wrap_script_to_method(
+ converter._tree, converter._variables
+ )
+ for f in ['message', 'binary_message', 'message2']:
+ try:
+ os.remove(f)
+ except FileNotFoundError:
+ pass
+ exec(generated_script)
+ print(generated_script)
+ locals()['main']()
+ with open('message') as f:
+ self.assertEqual('this is a text from a dumpable', f.read())
+ with open('message2') as f:
+ self.assertEqual('this is a text from a dumpable 2', f.read())
+ with open('binary_message', 'rb') as f:
+ self.assertEqual(b'this is a text from a binary dumpable', f.read())
+
+ cwl_tool = converter.cwl_command_line_tool()
+ print(cwl_tool)
+ self.assertDictEqual(
+ {
+ 'message': {
+ 'type': 'File',
+ 'outputBinding': {
+ 'glob': 'message'
+ }
+ },
+ 'message2': {
+ 'type': 'File',
+ 'outputBinding': {
+ 'glob': 'message2'
+ }
+ },
+ 'binary_message': {
+ 'type': 'File',
+ 'outputBinding': {
+ 'glob': 'binary_message'
+ }
+ }
+ },
+ cwl_tool['outputs']
+ )
+
+ def test_AnnotatedIPython2CWLToolConverter_custom_dumpables(self):
+ script = os.linesep.join([
+ 'import pandas',
+ 'from ipython2cwl.iotypes import CWLDumpable',
+ 'd: CWLDumpable.dump(d.to_csv, "dumpable.csv", sep="\\t", index=False) = pandas.DataFrame([[1,2,3], [4,5,6], [7,8,9]])'
+ ])
+ converter = AnnotatedIPython2CWLToolConverter(script)
+ generated_script = AnnotatedIPython2CWLToolConverter._wrap_script_to_method(
+ converter._tree, converter._variables
+ )
+ for f in ["dumpable.csv"]:
+ try:
+ os.remove(f)
+ except FileNotFoundError:
+ pass
+ exec(generated_script)
+ print(generated_script)
+ locals()['main']()
+ import pandas
+ data_file = pandas.read_csv('dumpable.csv', sep="\t")
+ self.assertListEqual(
+ [[0, 0, 0], [0, 0, 0], [0, 0, 0]],
+ (data_file.to_numpy() - pandas.DataFrame([[1, 2, 3], [4, 5, 6], [7, 8, 9]]).to_numpy()).tolist()
+ )
+
+ cwl_tool = converter.cwl_command_line_tool()
+ print(cwl_tool)
+ self.assertDictEqual(
+ {
+ 'd': {
+ 'type': 'File',
+ 'outputBinding': {
+ 'glob': 'dumpable.csv'
+ }
+ },
+ },
+ cwl_tool['outputs']
+ )
+ for f in ["dumpable.csv"]:
+ try:
+ os.remove(f)
+ except FileNotFoundError:
+ pass