Merge pull request #10 from giannisdoukas/dumpables

add dumpables & documentation
common-workflow-lab · Jul 2, 2020 · af7d2c5 · af7d2c5
2 parents 13edcd5 + fb7bfc4
commit af7d2c5
Show file tree

Hide file tree

Showing 9 changed files with 392 additions and 60 deletions.
diff --git a/.gitignore b/.gitignore
@@ -247,3 +247,6 @@ tmp.py
 cwlbuild
 /tests/repo-like/result.yaml
 /tests/repo-like/messages.txt
+/tests/binary_message
+/tests/message
+/tests/message2
diff --git a/README.md b/README.md
@@ -23,7 +23,7 @@ with open(result_file, 'w') as f:
 ```
 
 IPython2CWL is based on [repo2docker](https://github.com/jupyter/repo2docker), the same tool
-used by [mybinder](https://mybinder.org/). Now, by writing Jupyter Notebook and publish them, including repo2docker
+used by [mybinder](https://mybinder.org/). Now, by writing Jupyter Notebook and publishing them, including repo2docker
 configuration, the community can not only execute the notebooks remotely but also to use them as steps in scientific
 workflows.
 
@@ -37,7 +37,7 @@ pip install ipython2cwl
 ### Example
 
 ```
-jupyter repo2cwl https://github.com/giannisdoukas/cwl-annotated-jupyter-notebook.git -o cwlbuild
+jupyter repo2cwl https://github.com/giannisdoukas/cwl-annotated-jupyter-notebook.git -o .
 ```
 
 ### Docs

diff --git a/docs/index.rst b/docs/index.rst
@@ -30,18 +30,53 @@ IPython2CWL is a tool for converting `IPython <https://ipython.org/>`_ Jupyter N
 ------------------------------------------------------------------------------------------
 
 IPython2CWL is based on `repo2docker <https://github.com/jupyter/repo2docker>`_, the same tool
-used by `mybinder <https://mybinder.org/>`_. Now, by writing Jupyter Notebook and publish them, including repo2docker
-configuration, the community can not only execute the notebooks remotely but also to use them as steps in scientific
+used by `mybinder <https://mybinder.org/>`_. Now, by writing Jupyter Notebook and publishing them, including repo2docker
+configuration, the community can not only execute the notebooks remotely but can also use them as steps in scientific
 workflows.
 
-* Install ipython2cwl: :code:`pip install python2cwl`
+* `Install ipython2cwl <https://pypi.org/project/ipython2cwl/>`_: :code:`pip install ipython2cwl`
 * Ensure that you have docker running
 * Create a directory to store the generated cwl files, for example cwlbuild
 * Execute :code:`jupyter repo2cwl https://github.com/giannisdoukas/cwl-annotated-jupyter-notebook.git -o cwlbuild`
 
-Indices and tables
-==================
+HOW IT WORKS?
+------------------
+
+IPython2CWL parses each IPython notebook and finds the variables with the typing annotations. For each input variable,
+the assigment of that variable will be generalised as a command line argument. Each output variable will be mapped
+in the cwl description as an output file.
+
+SUPPORTED TYPES
+------------------
+
+.. automodule:: ipython2cwl.iotypes
+   :members:
+
+
+THAT'S COOL! WHAT ABOUT LIST & OPTIONAL ARGUMENTS?
+"""""""""""""""""""""""""""""""""""""""""""""""""""
+
+The basic input data types can be combined with the List and Optional annotations. For example, write the following
+annotation:
+
+.. code-block:: python
+
+  file_inputs: List[CWLFilePathInput] = ['data1.txt', 'data2.txt', 'data3.txt']
+  example: Optional[CWLStringInput] = None
+
+
+SEEMS INTERESTING! WHAT ABOUT A DEMO?
+----------------------------------------
+
+If you would like to see a demo before you want to start annotating your notebooks check here!
+`github.com/giannisdoukas/ipython2cwl-demo <https://github.com/giannisdoukas/ipython2cwl-demo>`_
+
+
+WHAT IF I WANT TO VALIDATE THAT THE GENERATED SCRIPTS ARE CORRECT?
+------------------------------------------------------------------
+
+All the generated scripts are stored in the docker image under the directory :code:`/app/cwl/bin`. You can see the list
+of the files by running :code:`docker run [IMAGE_ID] find /app/cwl/bin/ -type f`.
+
+
 
-* :ref:`genindex`
-* :ref:`modindex`
-* :ref:`search`
diff --git a/ipython2cwl/__init__.py b/ipython2cwl/__init__.py
@@ -1 +1 @@
-__version__ = "0.0.3"
+__version__ = "0.0.4"
diff --git a/ipython2cwl/cwltoolextractor.py b/ipython2cwl/cwltoolextractor.py
@@ -5,24 +5,29 @@
 import tarfile
 import tempfile
 from collections import namedtuple
+from copy import deepcopy
 from pathlib import Path
-from typing import Dict, Any
+from typing import Dict, Any, List
 
 import astor
 import nbconvert
 import yaml
 from nbformat.notebooknode import NotebookNode
 
-from .iotypes import CWLFilePathInput, CWLBooleanInput, CWLIntInput, CWLStringInput, CWLFilePathOutput
+from .iotypes import CWLFilePathInput, CWLBooleanInput, CWLIntInput, CWLStringInput, CWLFilePathOutput, \
+    CWLDumpableFile, CWLDumpableBinaryFile, CWLDumpable
 from .requirements_manager import RequirementsManager
 
 with open(os.sep.join([os.path.abspath(os.path.dirname(__file__)), 'templates', 'template.dockerfile'])) as f:
     DOCKERFILE_TEMPLATE = f.read()
 with open(os.sep.join([os.path.abspath(os.path.dirname(__file__)), 'templates', 'template.setup'])) as f:
     SETUP_TEMPLATE = f.read()
 
+_VariableNameTypePair = namedtuple(
+    'VariableNameTypePair',
+    ['name', 'cwl_typeof', 'argparse_typeof', 'required', 'is_input', 'is_output', 'value']
+)
 
-# TODO: check if supports recursion if main function exists
 
 class AnnotatedVariablesExtractor(ast.NodeTransformer):
     input_type_mapper = {
@@ -52,12 +57,19 @@ class AnnotatedVariablesExtractor(ast.NodeTransformer):
     }}
 
     output_type_mapper = {
-        CWLFilePathOutput.__name__
+        (CWLFilePathOutput.__name__,)
+    }
+
+    dumpable_mapper = {
+        (CWLDumpableFile.__name__,): "with open('{var_name}', 'w') as f:\n\tf.write({var_name})",
+        (CWLDumpableBinaryFile.__name__,): "with open('{var_name}', 'wb') as f:\n\tf.write({var_name})",
+        (CWLDumpable.__name__, CWLDumpable.dump.__name__): None,
     }
 
     def __init__(self, *args, **kwargs):
         super().__init__(*args, **kwargs)
-        self.extracted_nodes = []
+        self.extracted_variables: List = []
+        self.to_dump: List = []
 
     def __get_annotation__(self, type_annotation):
         annotation = None
@@ -70,30 +82,84 @@ def __get_annotation__(self, type_annotation):
                 annotation = self.__get_annotation__(ann_expr.value)
         elif isinstance(type_annotation, ast.Subscript):
             annotation = (type_annotation.value.id, *self.__get_annotation__(type_annotation.slice.value))
+        elif isinstance(type_annotation, ast.Call):
+            annotation = (type_annotation.func.value.id, type_annotation.func.attr)
         return annotation
 
+    @classmethod
+    def conv_AnnAssign_to_Assign(cls, node):
+        return ast.Assign(
+            col_offset=node.col_offset,
+            lineno=node.lineno,
+            targets=[node.target],
+            value=node.value
+        )
+
+    def _visit_input_ann_assign(self, node, annotation):
+        mapper = self.input_type_mapper[annotation]
+        self.extracted_variables.append(_VariableNameTypePair(
+            node.target.id, mapper[0], mapper[1], not mapper[0].endswith('?'), True, False, None)
+        )
+        return None
+
+    def _visit_default_dumper(self, node, dumper):
+        dump_tree = ast.parse(dumper.format(var_name=node.target.id))
+        self.to_dump.append(dump_tree.body)
+        self.extracted_variables.append(_VariableNameTypePair(
+            node.target.id, None, None, None, False, True, node.target.id)
+        )
+        return self.conv_AnnAssign_to_Assign(node)
+
+    def _visit_user_defined_dumper(self, node):
+        load_ctx = ast.Load()
+        func_name = deepcopy(node.annotation.args[0].value)
+        func_name.ctx = load_ctx
+        ast.fix_missing_locations(func_name)
+
+        new_dump_node = ast.Expr(
+            col_offset=0, lineno=0,
+            value=ast.Call(
+                args=node.annotation.args[1:], keywords=node.annotation.keywords, col_offset=0,
+                func=ast.Attribute(
+                    attr=node.annotation.args[0].attr,
+                    value=func_name,
+                    col_offset=0, ctx=load_ctx, lineno=0,
+                ),
+            )
+        )
+        ast.fix_missing_locations(new_dump_node)
+        self.to_dump.append([new_dump_node])
+        self.extracted_variables.append(_VariableNameTypePair(
+            node.target.id, None, None, None, False, True, node.annotation.args[1].s)
+        )
+        # removing type annotation
+        return self.conv_AnnAssign_to_Assign(node)
+
+    def _visit_output_type(self, node):
+        self.extracted_variables.append(_VariableNameTypePair(
+            node.target.id, None, None, None, False, True, node.value.s)
+        )
+        # removing type annotation
+        return ast.Assign(
+            col_offset=node.col_offset,
+            lineno=node.lineno,
+            targets=[node.target],
+            value=node.value
+        )
+
     def visit_AnnAssign(self, node):
         try:
             annotation = self.__get_annotation__(node.annotation)
             if annotation in self.input_type_mapper:
-                mapper = self.input_type_mapper[annotation]
-                self.extracted_nodes.append(
-                    (node, mapper[0], mapper[1], not mapper[0].endswith('?'), True, False)
-                )
-                return None
-
-            elif (isinstance(node.annotation, ast.Name) and node.annotation.id in self.output_type_mapper) or \
-                    (isinstance(node.annotation, ast.Str) and node.annotation.s in self.output_type_mapper):
-                self.extracted_nodes.append(
-                    (node, None, None, None, False, True)
-                )
-                # removing type annotation
-                return ast.Assign(
-                    col_offset=node.col_offset,
-                    lineno=node.lineno,
-                    targets=[node.target],
-                    value=node.value
-                )
+                return self._visit_input_ann_assign(node, annotation)
+            elif annotation in self.dumpable_mapper:
+                dumper = self.dumpable_mapper[annotation]
+                if dumper is not None:
+                    return self._visit_default_dumper(node, dumper)
+                else:
+                    return self._visit_user_defined_dumper(node)
+            elif annotation in self.output_type_mapper:
+                return self._visit_output_type(node)
         except Exception:
             pass
         return node
@@ -123,12 +189,6 @@ class AnnotatedIPython2CWLToolConverter:
     """
 
     _code: str
-
-    _VariableNameTypePair = namedtuple(
-        'VariableNameTypePair',
-        ['name', 'cwl_typeof', 'argparse_typeof', 'required', 'is_input', 'is_output', 'value']
-    )
-
     """The annotated python code to convert."""
 
     def __init__(self, annotated_ipython_code: str):
@@ -137,19 +197,15 @@ def __init__(self, annotated_ipython_code: str):
 
         self._code = annotated_ipython_code
         extractor = AnnotatedVariablesExtractor()
-        self._tree = ast.fix_missing_locations(extractor.visit(ast.parse(self._code)))
+        self._tree = extractor.visit(ast.parse(self._code))
+        [self._tree.body.extend(d) for d in extractor.to_dump]
+        self._tree = ast.fix_missing_locations(self._tree)
         self._variables = []
-        for node, cwl_type, click_type, required, is_input, is_output in extractor.extracted_nodes:
-            if is_input:
-                self._variables.append(
-                    self._VariableNameTypePair(node.target.id, cwl_type, click_type, required, is_input, is_output,
-                                               None)
-                )
-            if is_output:
-                self._variables.append(
-                    self._VariableNameTypePair(node.target.id, cwl_type, click_type, required, is_input, is_output,
-                                               node.value.s)
-                )
+        for variable in extractor.extracted_variables:  # type: _VariableNameTypePair
+            if variable.is_input:
+                self._variables.append(variable)
+            if variable.is_output:
+                self._variables.append(variable)
 
     @classmethod
     def from_jupyter_notebook_node(cls, node: NotebookNode) -> 'AnnotatedIPython2CWLToolConverter':