Skip to content

Commit

Permalink
Merge pull request #10 from giannisdoukas/dumpables
Browse files Browse the repository at this point in the history
add dumpables & documentation
  • Loading branch information
giannisdoukas committed Jul 2, 2020
2 parents 13edcd5 + fb7bfc4 commit af7d2c5
Show file tree
Hide file tree
Showing 9 changed files with 392 additions and 60 deletions.
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -247,3 +247,6 @@ tmp.py
cwlbuild
/tests/repo-like/result.yaml
/tests/repo-like/messages.txt
/tests/binary_message
/tests/message
/tests/message2
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ with open(result_file, 'w') as f:
```

IPython2CWL is based on [repo2docker](https://github.com/jupyter/repo2docker), the same tool
used by [mybinder](https://mybinder.org/). Now, by writing Jupyter Notebook and publish them, including repo2docker
used by [mybinder](https://mybinder.org/). Now, by writing Jupyter Notebook and publishing them, including repo2docker
configuration, the community can not only execute the notebooks remotely but also to use them as steps in scientific
workflows.

Expand All @@ -37,7 +37,7 @@ pip install ipython2cwl
### Example

```
jupyter repo2cwl https://github.com/giannisdoukas/cwl-annotated-jupyter-notebook.git -o cwlbuild
jupyter repo2cwl https://github.com/giannisdoukas/cwl-annotated-jupyter-notebook.git -o .
```

### Docs
Expand Down
51 changes: 43 additions & 8 deletions docs/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -30,18 +30,53 @@ IPython2CWL is a tool for converting `IPython <https://ipython.org/>`_ Jupyter N
------------------------------------------------------------------------------------------

IPython2CWL is based on `repo2docker <https://github.com/jupyter/repo2docker>`_, the same tool
used by `mybinder <https://mybinder.org/>`_. Now, by writing Jupyter Notebook and publish them, including repo2docker
configuration, the community can not only execute the notebooks remotely but also to use them as steps in scientific
used by `mybinder <https://mybinder.org/>`_. Now, by writing Jupyter Notebook and publishing them, including repo2docker
configuration, the community can not only execute the notebooks remotely but can also use them as steps in scientific
workflows.

* Install ipython2cwl: :code:`pip install python2cwl`
* `Install ipython2cwl <https://pypi.org/project/ipython2cwl/>`_: :code:`pip install ipython2cwl`
* Ensure that you have docker running
* Create a directory to store the generated cwl files, for example cwlbuild
* Execute :code:`jupyter repo2cwl https://github.com/giannisdoukas/cwl-annotated-jupyter-notebook.git -o cwlbuild`

Indices and tables
==================
HOW IT WORKS?
------------------

IPython2CWL parses each IPython notebook and finds the variables with the typing annotations. For each input variable,
the assigment of that variable will be generalised as a command line argument. Each output variable will be mapped
in the cwl description as an output file.

SUPPORTED TYPES
------------------

.. automodule:: ipython2cwl.iotypes
:members:


THAT'S COOL! WHAT ABOUT LIST & OPTIONAL ARGUMENTS?
"""""""""""""""""""""""""""""""""""""""""""""""""""

The basic input data types can be combined with the List and Optional annotations. For example, write the following
annotation:

.. code-block:: python
file_inputs: List[CWLFilePathInput] = ['data1.txt', 'data2.txt', 'data3.txt']
example: Optional[CWLStringInput] = None
SEEMS INTERESTING! WHAT ABOUT A DEMO?
----------------------------------------

If you would like to see a demo before you want to start annotating your notebooks check here!
`github.com/giannisdoukas/ipython2cwl-demo <https://github.com/giannisdoukas/ipython2cwl-demo>`_


WHAT IF I WANT TO VALIDATE THAT THE GENERATED SCRIPTS ARE CORRECT?
------------------------------------------------------------------

All the generated scripts are stored in the docker image under the directory :code:`/app/cwl/bin`. You can see the list
of the files by running :code:`docker run [IMAGE_ID] find /app/cwl/bin/ -type f`.



* :ref:`genindex`
* :ref:`modindex`
* :ref:`search`
2 changes: 1 addition & 1 deletion ipython2cwl/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = "0.0.3"
__version__ = "0.0.4"
138 changes: 97 additions & 41 deletions ipython2cwl/cwltoolextractor.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,24 +5,29 @@
import tarfile
import tempfile
from collections import namedtuple
from copy import deepcopy
from pathlib import Path
from typing import Dict, Any
from typing import Dict, Any, List

import astor
import nbconvert
import yaml
from nbformat.notebooknode import NotebookNode

from .iotypes import CWLFilePathInput, CWLBooleanInput, CWLIntInput, CWLStringInput, CWLFilePathOutput
from .iotypes import CWLFilePathInput, CWLBooleanInput, CWLIntInput, CWLStringInput, CWLFilePathOutput, \
CWLDumpableFile, CWLDumpableBinaryFile, CWLDumpable
from .requirements_manager import RequirementsManager

with open(os.sep.join([os.path.abspath(os.path.dirname(__file__)), 'templates', 'template.dockerfile'])) as f:
DOCKERFILE_TEMPLATE = f.read()
with open(os.sep.join([os.path.abspath(os.path.dirname(__file__)), 'templates', 'template.setup'])) as f:
SETUP_TEMPLATE = f.read()

_VariableNameTypePair = namedtuple(
'VariableNameTypePair',
['name', 'cwl_typeof', 'argparse_typeof', 'required', 'is_input', 'is_output', 'value']
)

# TODO: check if supports recursion if main function exists

class AnnotatedVariablesExtractor(ast.NodeTransformer):
input_type_mapper = {
Expand Down Expand Up @@ -52,12 +57,19 @@ class AnnotatedVariablesExtractor(ast.NodeTransformer):
}}

output_type_mapper = {
CWLFilePathOutput.__name__
(CWLFilePathOutput.__name__,)
}

dumpable_mapper = {
(CWLDumpableFile.__name__,): "with open('{var_name}', 'w') as f:\n\tf.write({var_name})",
(CWLDumpableBinaryFile.__name__,): "with open('{var_name}', 'wb') as f:\n\tf.write({var_name})",
(CWLDumpable.__name__, CWLDumpable.dump.__name__): None,
}

def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self.extracted_nodes = []
self.extracted_variables: List = []
self.to_dump: List = []

def __get_annotation__(self, type_annotation):
annotation = None
Expand All @@ -70,30 +82,84 @@ def __get_annotation__(self, type_annotation):
annotation = self.__get_annotation__(ann_expr.value)
elif isinstance(type_annotation, ast.Subscript):
annotation = (type_annotation.value.id, *self.__get_annotation__(type_annotation.slice.value))
elif isinstance(type_annotation, ast.Call):
annotation = (type_annotation.func.value.id, type_annotation.func.attr)
return annotation

@classmethod
def conv_AnnAssign_to_Assign(cls, node):
return ast.Assign(
col_offset=node.col_offset,
lineno=node.lineno,
targets=[node.target],
value=node.value
)

def _visit_input_ann_assign(self, node, annotation):
mapper = self.input_type_mapper[annotation]
self.extracted_variables.append(_VariableNameTypePair(
node.target.id, mapper[0], mapper[1], not mapper[0].endswith('?'), True, False, None)
)
return None

def _visit_default_dumper(self, node, dumper):
dump_tree = ast.parse(dumper.format(var_name=node.target.id))
self.to_dump.append(dump_tree.body)
self.extracted_variables.append(_VariableNameTypePair(
node.target.id, None, None, None, False, True, node.target.id)
)
return self.conv_AnnAssign_to_Assign(node)

def _visit_user_defined_dumper(self, node):
load_ctx = ast.Load()
func_name = deepcopy(node.annotation.args[0].value)
func_name.ctx = load_ctx
ast.fix_missing_locations(func_name)

new_dump_node = ast.Expr(
col_offset=0, lineno=0,
value=ast.Call(
args=node.annotation.args[1:], keywords=node.annotation.keywords, col_offset=0,
func=ast.Attribute(
attr=node.annotation.args[0].attr,
value=func_name,
col_offset=0, ctx=load_ctx, lineno=0,
),
)
)
ast.fix_missing_locations(new_dump_node)
self.to_dump.append([new_dump_node])
self.extracted_variables.append(_VariableNameTypePair(
node.target.id, None, None, None, False, True, node.annotation.args[1].s)
)
# removing type annotation
return self.conv_AnnAssign_to_Assign(node)

def _visit_output_type(self, node):
self.extracted_variables.append(_VariableNameTypePair(
node.target.id, None, None, None, False, True, node.value.s)
)
# removing type annotation
return ast.Assign(
col_offset=node.col_offset,
lineno=node.lineno,
targets=[node.target],
value=node.value
)

def visit_AnnAssign(self, node):
try:
annotation = self.__get_annotation__(node.annotation)
if annotation in self.input_type_mapper:
mapper = self.input_type_mapper[annotation]
self.extracted_nodes.append(
(node, mapper[0], mapper[1], not mapper[0].endswith('?'), True, False)
)
return None

elif (isinstance(node.annotation, ast.Name) and node.annotation.id in self.output_type_mapper) or \
(isinstance(node.annotation, ast.Str) and node.annotation.s in self.output_type_mapper):
self.extracted_nodes.append(
(node, None, None, None, False, True)
)
# removing type annotation
return ast.Assign(
col_offset=node.col_offset,
lineno=node.lineno,
targets=[node.target],
value=node.value
)
return self._visit_input_ann_assign(node, annotation)
elif annotation in self.dumpable_mapper:
dumper = self.dumpable_mapper[annotation]
if dumper is not None:
return self._visit_default_dumper(node, dumper)
else:
return self._visit_user_defined_dumper(node)
elif annotation in self.output_type_mapper:
return self._visit_output_type(node)
except Exception:
pass
return node
Expand Down Expand Up @@ -123,12 +189,6 @@ class AnnotatedIPython2CWLToolConverter:
"""

_code: str

_VariableNameTypePair = namedtuple(
'VariableNameTypePair',
['name', 'cwl_typeof', 'argparse_typeof', 'required', 'is_input', 'is_output', 'value']
)

"""The annotated python code to convert."""

def __init__(self, annotated_ipython_code: str):
Expand All @@ -137,19 +197,15 @@ def __init__(self, annotated_ipython_code: str):

self._code = annotated_ipython_code
extractor = AnnotatedVariablesExtractor()
self._tree = ast.fix_missing_locations(extractor.visit(ast.parse(self._code)))
self._tree = extractor.visit(ast.parse(self._code))
[self._tree.body.extend(d) for d in extractor.to_dump]
self._tree = ast.fix_missing_locations(self._tree)
self._variables = []
for node, cwl_type, click_type, required, is_input, is_output in extractor.extracted_nodes:
if is_input:
self._variables.append(
self._VariableNameTypePair(node.target.id, cwl_type, click_type, required, is_input, is_output,
None)
)
if is_output:
self._variables.append(
self._VariableNameTypePair(node.target.id, cwl_type, click_type, required, is_input, is_output,
node.value.s)
)
for variable in extractor.extracted_variables: # type: _VariableNameTypePair
if variable.is_input:
self._variables.append(variable)
if variable.is_output:
self._variables.append(variable)

@classmethod
def from_jupyter_notebook_node(cls, node: NotebookNode) -> 'AnnotatedIPython2CWLToolConverter':
Expand Down

0 comments on commit af7d2c5

Please sign in to comment.