Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
26 commits
Select commit Hold shift + click to select a range
42f5470
init readthedocs
giannisdoukas Jun 23, 2020
c3bc592
add readthedocs badge
giannisdoukas Jun 23, 2020
b364ad0
init repo2cwl
giannisdoukas Jun 26, 2020
c8f338b
add requirement
giannisdoukas Jun 26, 2020
1605973
rm test from that branch
giannisdoukas Jun 26, 2020
6ca19b4
fix permissions issues
giannisdoukas Jun 27, 2020
59c876a
add docker service in travis
giannisdoukas Jun 27, 2020
c0cf051
add sudo in travis
giannisdoukas Jun 27, 2020
267b4d9
fix issue with entrypoint for travis
giannisdoukas Jun 27, 2020
1c04268
rm sudo from travis
giannisdoukas Jun 27, 2020
a93ea54
add docker for osx matrix
giannisdoukas Jun 27, 2020
b1fa5c8
skip tests with docker in travis-osx
giannisdoukas Jun 27, 2020
0bb0426
support non annotated jupyter notebook & notebooks with the same name…
giannisdoukas Jun 27, 2020
c141c7f
add main function for repo2cwl
giannisdoukas Jun 27, 2020
2b87df7
change imports
giannisdoukas Jun 27, 2020
b79de81
support typing hints as strings
giannisdoukas Jun 27, 2020
8c81ffa
add console script
giannisdoukas Jun 27, 2020
47289cb
update gitignore
giannisdoukas Jun 27, 2020
c1b45f8
rm forgotten print
giannisdoukas Jun 27, 2020
52c44fc
support magic and system commands
giannisdoukas Jun 27, 2020
a571fe0
fix issue with nbconvert
giannisdoukas Jun 27, 2020
1a10131
rm TODO
giannisdoukas Jun 27, 2020
86ae407
add ipython as requirements
giannisdoukas Jun 27, 2020
c5cf43a
update doc
giannisdoukas Jun 27, 2020
8a1611b
fix micro issues on cloning from remote git
giannisdoukas Jun 27, 2020
716cc4d
update the docs
giannisdoukas Jun 27, 2020
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -243,3 +243,5 @@ cython_debug/
/external_examples/
/tests/jn/output/
tmp.py
/html/
cwlbuild
2 changes: 2 additions & 0 deletions .readthedocs.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
python:
setup_py_install: true
6 changes: 5 additions & 1 deletion .travis.yml
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
services:
- docker
language: python
python:
- "3.6"
Expand All @@ -10,6 +12,7 @@ before_install:
install:
- pip install -r test-requirements.txt
- python setup.py install
- pip freeze
script:
- pycodestyle --max-line-length=119 $(find ipython2cwl -name '*.py')
- coverage run --source ipython2cwl -m unittest discover tests
Expand All @@ -27,4 +30,5 @@ matrix:
- virtualenv -p python3 venv
- source venv/bin/activate
- pip3 install -U -r test-requirements.txt
script: coverage run --source ipython2cwl -m unittest discover tests
script: coverage run --source ipython2cwl -m unittest discover tests
env: TRAVIS_IGNORE_DOCKER=true
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,5 @@

[![Build Status](https://travis-ci.com/giannisdoukas/ipython2cwl.svg)](https://travis-ci.com/giannisdoukas/ipython2cwl)
[![Coverage Status](https://coveralls.io/repos/github/giannisdoukas/ipython2cwl/badge.svg?branch=dev)](https://coveralls.io/github/giannisdoukas/ipython2cwl?branch=dev)
[![Documentation Status](https://readthedocs.org/projects/ipython2cwl/badge/?version=latest)](https://ipython2cwl.readthedocs.io/en/latest/?badge=latest)

53 changes: 53 additions & 0 deletions docs/conf.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
# Configuration file for the Sphinx documentation builder.
#
# This file only contains a selection of the most common options. For a full
# list see the documentation:
# http://www.sphinx-doc.org/en/master/config

# -- Path setup --------------------------------------------------------------

# If extensions (or modules to document with autodoc) are in another directory,
# add these directories to sys.path here. If the directory is relative to the
# documentation root, use os.path.abspath to make it absolute, like shown here.
#
# import os
# import sys
# sys.path.insert(0, os.path.abspath('.'))

# -- Project information -----------------------------------------------------

project = 'ipython2cwl'
copyright = '2020, Yannis Doukas'
author = 'Yannis Doukas'

# The full version, including alpha/beta/rc tags
release = "0.1"

# -- General configuration ---------------------------------------------------

# Add any Sphinx extension module names here, as strings. They can be
# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
# ones.
extensions = ['sphinx.ext.autodoc']

# Add any paths that contain templates here, relative to this directory.
templates_path = ['_templates']

# List of patterns, relative to source directory, that match files and
# directories to ignore when looking for source files.
# This pattern also affects html_static_path and html_extra_path.
exclude_patterns = []

# -- Options for HTML output -------------------------------------------------

# The theme to use for HTML and HTML Help pages. See the documentation for
# a list of builtin themes.
#
html_theme = 'alabaster'

# Add any paths that contain custom static files (such as style sheets) here,
# relative to this directory. They are copied after the builtin static files,
# so a file named "default.css" will overwrite the builtin "default.css".
html_static_path = ['_static']

master_doc = 'index'
46 changes: 46 additions & 0 deletions docs/index.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
IPython2CWL: Convert Jupyter Notebook to CWL
================================================================================

.. image:: https://travis-ci.com/giannisdoukas/ipython2cwl.svg?branch=master
:target: https://travis-ci.com/giannisdoukas/ipython2cwl
.. image:: https://coveralls.io/repos/github/giannisdoukas/ipython2cwl/badge.svg?branch=master
:target: https://coveralls.io/github/giannisdoukas/ipython2cwl?branch=master


------------------------------------------------------------------------------------------

IPython2CWL is a tool for converting `IPython <https://ipython.org/>`_ Jupyter Notebooks to
`CWL <https://www.commonwl.org/>`_ Command Line Tools by simply providing typing annotation.

.. code-block:: python

from ipython2cwl.iotypes import CWLFilePathInput, CWLFilePathOutput
import csv
input_filename: 'CWLFilePathInput' = 'data.csv'
with open(input_filename) as f:
csv_reader = csv.reader(f)
data = [line for line in csv_reader]
number_of_lines = len(data)
result_file: 'CWLFilePathOutput' = 'number_of_lines.txt'
with open(result_file, 'w') as f:
f.write(str(number_of_lines))


------------------------------------------------------------------------------------------

IPython2CWL is based on `repo2docker <https://github.com/jupyter/repo2docker>`_, the same tool
used by `mybinder <https://mybinder.org/>`_. Now, by writing Jupyter Notebook and publish them, including repo2docker
configuration, the community can not only execute the notebooks remotely but also to use them as steps in scientific
workflows.

* Install ipython2cwl
* Ensure that you have docker running
* Create a directory to store the generated cwl files, for example cwlbuild
* Execute :code:`jupyter repo2cwl https://github.com/giannisdoukas/cwl-annotated-jupyter-notebook.git -o cwlbuild`

Indices and tables
==================

* :ref:`genindex`
* :ref:`modindex`
* :ref:`search`
17 changes: 15 additions & 2 deletions ipython2cwl/cwltool.py → ipython2cwl/cwltoolextractor.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,9 @@
from typing import Dict, Any

import astor
import nbconvert
import yaml
from nbformat.notebooknode import NotebookNode

from .iotypes import CWLFilePathInput, CWLBooleanInput, CWLIntInput, CWLStringInput, CWLFilePathOutput
from .requirements_manager import RequirementsManager
Expand Down Expand Up @@ -51,7 +53,8 @@ def __init__(self, *args, **kwargs):

def visit_AnnAssign(self, node):
try:
if isinstance(node.annotation, ast.Name) and node.annotation.id in self.input_type_mapper:
if (isinstance(node.annotation, ast.Name) and node.annotation.id in self.input_type_mapper) or \
(isinstance(node.annotation, ast.Str) and node.annotation.s in self.input_type_mapper):
mapper = self.input_type_mapper[node.annotation.id]
self.extracted_nodes.append(
(node, mapper[0], mapper[1], True, True, False)
Expand All @@ -72,7 +75,8 @@ def visit_AnnAssign(self, node):
(node, mapper[0] + '[]', mapper[1], True, True, False)
)
return None
elif isinstance(node.annotation, ast.Name) and node.annotation.id in self.output_type_mapper:
elif (isinstance(node.annotation, ast.Name) and node.annotation.id in self.output_type_mapper) or \
(isinstance(node.annotation, ast.Str) and node.annotation.s in self.output_type_mapper):
self.extracted_nodes.append(
(node, None, None, None, False, True)
)
Expand Down Expand Up @@ -121,6 +125,9 @@ class AnnotatedIPython2CWLToolConverter:
"""The annotated python code to convert."""

def __init__(self, annotated_ipython_code: str):
"""Creates an AnnotatedIPython2CWLToolConverter. If the annotated_ipython_code contains magic commands use the
from_jupyter_notebook_node method"""

self._code = annotated_ipython_code
extractor = AnnotatedVariablesExtractor()
self._tree = ast.fix_missing_locations(extractor.visit(ast.parse(self._code)))
Expand All @@ -137,6 +144,12 @@ def __init__(self, annotated_ipython_code: str):
node.value.s)
)

@classmethod
def from_jupyter_notebook_node(cls, node: NotebookNode) -> 'AnnotatedIPython2CWLToolConverter':
python_exporter = nbconvert.PythonExporter()
code = python_exporter.from_notebook_node(node)[0]
return cls(code)

@classmethod
def _wrap_script_to_method(cls, tree, variables) -> str:
main_template_code = os.linesep.join([
Expand Down
17 changes: 12 additions & 5 deletions ipython2cwl/ipython2cwl.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,19 @@
import argparse
import json
from io import StringIO
from pathlib import Path
from typing import List, Optional

import nbconvert
import nbformat
from .cwltool import AnnotatedIPython2CWLToolConverter

from .cwltoolextractor import AnnotatedIPython2CWLToolConverter


def jn2code(notebook):
exporter = nbconvert.PythonExporter()
script = exporter.from_file(StringIO(json.dumps(notebook)))
return script


def main(argv: Optional[List[str]] = None):
Expand All @@ -17,10 +27,7 @@ def main(argv: Optional[List[str]] = None):
notebook = nbformat.read(args.jn[0], as_version=4)
output: Path = args.output
args.jn[0].close()
script_code = '\n'.join(
[f"\n\n# --------- cell - {i} ---------\n\n{cell.source}" for i, cell in
enumerate(filter(lambda c: c.cell_type == 'code', notebook.cells), start=1)]
)
script_code = jn2code(notebook)

converter = AnnotatedIPython2CWLToolConverter(script_code)
converter.compile(output)
Expand Down
163 changes: 163 additions & 0 deletions ipython2cwl/repo2cwl.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,163 @@
import argparse
import logging
import os
import shutil
import stat
import sys
import tempfile
from pathlib import Path
from typing import List, Optional, Tuple, Dict
from urllib.parse import urlparse, ParseResult

import git
import nbformat
import yaml
from git import Repo
from repo2docker import Repo2Docker

from .cwltoolextractor import AnnotatedIPython2CWLToolConverter

logger = logging.getLogger('repo2cwl')


def _get_notebook_paths_from_dir(dir_path: str):
notebooks_paths = []
for path, subdirs, files in os.walk(dir_path):
for name in files:
if name.endswith('.ipynb'):
notebooks_paths.append(os.path.join(path, name))
return notebooks_paths


def _store_jn_as_script(notebook_path: str, git_directory_absolute_path: str, bin_absolute_path: str, image_id: str) \
-> Tuple[Optional[Dict], Optional[str]]:
with open(notebook_path) as fd:
notebook = nbformat.read(fd, as_version=4)

converter = AnnotatedIPython2CWLToolConverter.from_jupyter_notebook_node(notebook)

if len(converter._variables) == 0:
logger.info(f"Notebook {notebook_path} does not contains typing annotations. skipping...")
return None, None
script_relative_path = os.path.relpath(notebook_path, git_directory_absolute_path)[:-6]
script_relative_parent_directories = script_relative_path.split(os.sep)
if len(script_relative_parent_directories) > 1:
script_absolute_name = os.path.join(bin_absolute_path, os.sep.join(script_relative_parent_directories[:-1]))
os.makedirs(
script_absolute_name,
exist_ok=True)
script_absolute_name = os.path.join(script_absolute_name, os.path.basename(script_relative_path))
else:
script_absolute_name = os.path.join(bin_absolute_path, script_relative_path)
script = os.linesep.join([
'#!/usr/bin/env ipython',
'"""',
'DO NOT EDIT THIS FILE',
'THIS FILE IS AUTO-GENERATED BY THE ipython2cwl.',
'FOR MORE INFORMATION CHECK https://github.com/giannisdoukas/ipython2cwl',
'"""',
converter._wrap_script_to_method(converter._tree, converter._variables)
])
with open(script_absolute_name, 'w') as fd:
fd.write(script)
tool = converter.cwl_command_line_tool(image_id)
in_git_dir_script_file = os.path.join(bin_absolute_path, script_relative_path)
tool_st = os.stat(in_git_dir_script_file)
os.chmod(in_git_dir_script_file, tool_st.st_mode | stat.S_IEXEC)
return tool, script_relative_path


def existing_path(path: str):
path = Path(path)
if not path.is_dir():
raise ValueError('Directory does not exists')
return path


def parser_arguments(argv: List[str]):
parser = argparse.ArgumentParser()
parser.add_argument('repo', type=lambda uri: urlparse(uri, scheme='file'), nargs=1)
parser.add_argument('-o', '--output', help='Output directory to store the generated cwl files',
type=existing_path,
required=True)
return parser.parse_args(argv[1:])


def setup_logger():
handler = logging.StreamHandler(sys.stdout)
handler.setLevel(logging.INFO)
formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
handler.setFormatter(formatter)
logger.addHandler(handler)


def repo2cwl(argv: Optional[List[str]] = None):
setup_logger()
argv = sys.argv if argv is None else argv
args = parser_arguments(argv)
uri: ParseResult = args.repo[0]
output_directory: Path = args.output
supported_schemes = {'file', 'http', 'https', 'ssh'}
if uri.scheme not in supported_schemes:
raise ValueError(f'Supported schema uris: {supported_schemes}')
local_git_directory = os.path.join(tempfile.mkdtemp(prefix='repo2cwl_'), 'repo')
if uri.scheme == 'file':
if not os.path.isdir(uri.path):
raise ValueError(f'Directory does not exists')
logger.info(f'copy repo to temp directory: {local_git_directory}')
shutil.copytree(uri.path, local_git_directory)
local_git = git.Repo(local_git_directory)
else:
logger.info(f'cloning repo to temp directory: {local_git_directory}')
local_git = git.Repo.clone_from(uri.geturl(), local_git_directory)

image_id, cwl_tools = _repo2cwl(local_git)
logger.info(f'Generated image id: {image_id}')
for tool in cwl_tools:
base_command_script_name = f'{tool["baseCommand"][len("/app/cwl/bin/"):].replace("/", "_")}.cwl'
tool_filename = str(output_directory.joinpath(base_command_script_name))
with open(tool_filename, 'w') as f:
logger.info(f'Creating CWL command line tool: {tool_filename}')
yaml.safe_dump(tool, f)

logger.info(f'Cleaning local temporary directory {local_git_directory}...')
shutil.rmtree(local_git_directory)


def _repo2cwl(git_directory_path: Repo) -> Tuple[str, List[Dict]]:
"""
Takes a Repo mounted to a local directory. That function will create new files and it will commit the changes.
Do not use that function for Repositories you do not want to change them.
:param git_directory_path:
:return: The generated build image id & the cwl description
"""
r2d = Repo2Docker()
r2d.target_repo_dir = os.path.join(os.path.sep, 'app')
r2d.repo = git_directory_path.tree().abspath
bin_path = os.path.join(r2d.repo, 'cwl', 'bin')
os.makedirs(bin_path, exist_ok=True)
notebooks_paths = _get_notebook_paths_from_dir(r2d.repo)

tools = []
for notebook in notebooks_paths:
cwl_command_line_tool, script_name = _store_jn_as_script(
notebook,
git_directory_path.tree().abspath,
bin_path,
r2d.output_image_spec
)
if cwl_command_line_tool is None:
continue
cwl_command_line_tool['baseCommand'] = os.path.join('/app', 'cwl', 'bin', script_name)
tools.append(cwl_command_line_tool)
git_directory_path.index.commit("auto-commit")

r2d.build()
# fix dockerImageId
for cwl_command_line_tool in tools:
cwl_command_line_tool['hints']['DockerRequirement']['dockerImageId'] = r2d.output_image_spec
return r2d.output_image_spec, tools


if __name__ == '__main__':
repo2cwl()
Loading