diff --git a/examples/intro.ipynb b/examples/intro.ipynb index 6934942..c7cdf8c 100644 --- a/examples/intro.ipynb +++ b/examples/intro.ipynb @@ -17,6 +17,7 @@ "metadata": {}, "outputs": [], "source": [ + "%matplotlib inline\n", "import pandas as pd\n", "import matplotlib\n", "from ipython2cwl.iotypes import CWLFilePathInput, CWLFilePathOutput" @@ -31,6 +32,15 @@ "dataset: CWLFilePathInput = 'example.csv'" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "To expose a variable a variable as a CWL output we can use the basic output data types or the dumpables. \n", + "\n", + "Let's suppose that the Jupyter Notebook user wants to save the image to a file we can use the CWLFilePathOutput annotation. " + ] + }, { "cell_type": "code", "execution_count": 3, @@ -51,13 +61,22 @@ ], "source": [ "data = pd.read_csv(dataset)\n", - "# original data\n", "fig = data.plot()\n", "\n", + "# original data\n", "original_image: CWLFilePathOutput = 'original_data.png'\n", "fig.figure.savefig(original_image)" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Now let's say that the Jupyter Notebook user does not want to store the image but in the CWL we want that as an output file. We can use the PNGPlot annotation. The ipython2cwl will store that image to a png file for you in a file with the name `new_data.png`. \n", + "\n", + "> For more complicated use cases check CWLDumpable in the [docs](https://ipython2cwl.readthedocs.io/)" + ] + }, { "cell_type": "code", "execution_count": 4, @@ -65,7 +84,7 @@ "outputs": [ { "data": { - "image/png": "\n", + "image/png": "\n", "text/plain": [ "
" ] @@ -78,37 +97,20 @@ ], "source": [ "# transform data\n", + "import matplotlib.pyplot as plt\n", "data.sort_values(by='Random B', ascending=False, inplace=True, ignore_index=True)\n", - "fig = data.plot()\n", - "\n", - "after_transform_data: CWLFilePathOutput = 'new_data.png'\n", - "fig.figure.savefig(after_transform_data)" + "plt.figure()\n", + "new_data: 'CWLPNGPlot' = plt.plot(data)" ] }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 5, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Traceback (most recent call last):\r\n", - " File \"/Users/dks/.pyenv/versions/3.6.10/bin/jupyter-jn2cwl\", line 33, in \r\n", - " sys.exit(load_entry_point('ipython2cwl', 'console_scripts', 'jupyter-jn2cwl')())\r\n", - " File \"/Users/dks/Workspaces/IPython2CWL/ipython2cwl/ipython2cwl.py\", line 32, in main\r\n", - " converter = AnnotatedIPython2CWLToolConverter(script_code)\r\n", - " File \"/Users/dks/Workspaces/IPython2CWL/ipython2cwl/cwltoolextractor.py\", line 133, in __init__\r\n", - " self._tree = ast.fix_missing_locations(extractor.visit(ast.parse(self._code)))\r\n", - " File \"/Users/dks/.pyenv/versions/3.6.10/lib/python3.6/ast.py\", line 35, in parse\r\n", - " return compile(source, filename, mode, PyCF_ONLY_AST)\r\n", - "TypeError: compile() arg 1 must be a string, bytes or AST object\r\n" - ] - } - ], + "outputs": [], "source": [ - "# !jupyter-jn2cwl -o compiled_tool intro.ipynb" + "#! jupyter-repo2cwl . -o .\n", + "#!open new_data.png" ] }, { @@ -117,67 +119,58 @@ "source": [ "To compile the presented jupyter notebook to a CWL CommandLineTool run the following commands:\n", "```sh\n", - "mkdir tool\n", - "jupyter-jn2cwl -o tool/tool.tar intro.ipynb\n", - "```\n", - "The tar file contains all the required files. Now we can extract them and build the docker image. \n", - "\n", - "```sh\n", - "cd tool\n", - "tar -xvf tool.tar\n", - "docker build . -t jn2cwl:latest .\n", + "jupyter-repo2cwl . -o .\n", "```\n", "\n", "To test the tool as a cwl we can execute the following command:\n", "```sh\n", - "cwltool tool.cwl --dataset ../example.csv\n", + "cwltool intro.cwl --dataset example.csv\n", "```\n", "\n", "`\n", - "INFO /Users/dks/.pyenv/versions/3.6.10/bin/cwltool 3.0.20200530110633\n", - "INFO Resolved 'tool.cwl' to 'file:///Users/dks/Workspaces/IPython2CWL/examples/compiled_tool/tool.cwl'\n", - "INFO [job tool.cwl] /private/tmp/docker_tmpq5oemdog$ docker \\\n", + "INFO /Users/dks/.pyenv/versions/3.6.10/bin/cwltool 3.0.20200706173533\n", + "INFO Resolved 'intro.cwl' to 'file:///Users/dks/Workspaces/IPython2CWL/examples/intro.cwl'\n", + "INFO [job intro.cwl] /private/tmp/docker_tmp7wzg7cbi$ docker \\\n", " run \\\n", " -i \\\n", - " --mount=type=bind,source=/private/tmp/docker_tmpq5oemdog,target=/mWoQja \\\n", - " --mount=type=bind,source=/private/tmp/docker_tmpombd7mgl,target=/tmp \\\n", - " --mount=type=bind,source=/Users/dks/Workspaces/IPython2CWL/examples/example.csv,target=/var/lib/cwl/stgf1649a28-7fa0-4a19-9b59-54d4839f363e/example.csv,readonly \\\n", - " --workdir=/mWoQja \\\n", + " --mount=type=bind,source=/private/tmp/docker_tmp7wzg7cbi,target=/Oxibvb \\\n", + " --mount=type=bind,source=/private/tmp/docker_tmpje9_oz4b,target=/tmp \\\n", + " --mount=type=bind,source=/Users/dks/Workspaces/IPython2CWL/examples/example.csv,target=/var/lib/cwl/stg5a294c1c-b254-4c5b-b925-ccabe08460ca/example.csv,readonly \\\n", + " --workdir=/Oxibvb \\\n", " --read-only=true \\\n", " --net=none \\\n", " --user=501:20 \\\n", " --rm \\\n", " --env=TMPDIR=/tmp \\\n", - " --env=HOME=/mWoQja \\\n", - " --cidfile=/private/tmp/docker_tmprs7uv65u/20200622183924-428346.cid \\\n", - " jn2cwl:latest \\\n", - " notebookTool \\\n", + " --env=HOME=/Oxibvb \\\n", + " --cidfile=/private/tmp/docker_tmp2wk9z9z0/20200709182229-968352.cid \\\n", + " r2d-2fvar-2ffolders-2fk8-2f800hfw-5fn2md-5f2zb44lhhtqqr0000gn-2ft-2frepo2cwl-5f3n29rdzx-2frepo1594315330 \\\n", + " /app/cwl/bin/intro \\\n", + " -- \\\n", " --dataset \\\n", - " /var/lib/cwl/stgf1649a28-7fa0-4a19-9b59-54d4839f363e/example.csv\n", - "INFO [job tool.cwl] Max memory used: 198MiB\n", - "INFO [job tool.cwl] completed success\n", + " /var/lib/cwl/stg5a294c1c-b254-4c5b-b925-ccabe08460ca/example.csv\n", + "INFO [job intro.cwl] Max memory used: 227MiB\n", + "INFO [job intro.cwl] completed success\n", "{\n", - " \"after_transform_data\": {\n", - " \"location\": \"file:///Users/dks/Workspaces/IPython2CWL/examples/compiled_tool/new_data.png\",\n", + " \"new_data\": {\n", + " \"location\": \"file:///Users/dks/Workspaces/IPython2CWL/examples/new_data.png\",\n", " \"basename\": \"new_data.png\",\n", " \"class\": \"File\",\n", - " \"checksum\": \"sha1$d4d3a83c00d744931753c9aa93981d4a599ed391\",\n", - " \"size\": 40115,\n", - " \"path\": \"/Users/dks/Workspaces/IPython2CWL/examples/compiled_tool/new_data.png\"\n", + " \"checksum\": \"sha1$5d1154b55c741efc5adcd5e200abf626345dda3c\",\n", + " \"size\": 22656,\n", + " \"path\": \"/Users/dks/Workspaces/IPython2CWL/examples/new_data.png\"\n", " },\n", " \"original_image\": {\n", - " \"location\": \"file:///Users/dks/Workspaces/IPython2CWL/examples/compiled_tool/original_data.png\",\n", + " \"location\": \"file:///Users/dks/Workspaces/IPython2CWL/examples/original_data.png\",\n", " \"basename\": \"original_data.png\",\n", " \"class\": \"File\",\n", - " \"checksum\": \"sha1$48966757640d677f3065b4e79ece68e5d4b324dd\",\n", - " \"size\": 52590,\n", - " \"path\": \"/Users/dks/Workspaces/IPython2CWL/examples/compiled_tool/original_data.png\"\n", + " \"checksum\": \"sha1$f5dd2d7ce249b247b48bc31018aa793a342dd120\",\n", + " \"size\": 31326,\n", + " \"path\": \"/Users/dks/Workspaces/IPython2CWL/examples/original_data.png\"\n", " }\n", "}\n", "INFO Final process status is success\n", - "`\n", - "\n", - "Currently, in the presented version of the ipython2cwl the tool does not support magic commands but that feature will be added in soon!! for the reason if we write commands in the format \"!ipython2cwl intro.ipynb\" it will not work!" + "`" ] } ], diff --git a/examples/new_data.png b/examples/new_data.png index bb756b7..a97e7d3 100644 Binary files a/examples/new_data.png and b/examples/new_data.png differ diff --git a/examples/requirements.txt b/examples/requirements.txt new file mode 100644 index 0000000..5d56fdd --- /dev/null +++ b/examples/requirements.txt @@ -0,0 +1,2 @@ +pandas +matplotlib diff --git a/ipython2cwl/cwltoolextractor.py b/ipython2cwl/cwltoolextractor.py index dae0c1a..abe9962 100644 --- a/ipython2cwl/cwltoolextractor.py +++ b/ipython2cwl/cwltoolextractor.py @@ -15,7 +15,7 @@ from nbformat.notebooknode import NotebookNode # type: ignore from .iotypes import CWLFilePathInput, CWLBooleanInput, CWLIntInput, CWLStringInput, CWLFilePathOutput, \ - CWLDumpableFile, CWLDumpableBinaryFile, CWLDumpable + CWLDumpableFile, CWLDumpableBinaryFile, CWLDumpable, CWLPNGPlot, CWLPNGFigure from .requirements_manager import RequirementsManager with open(os.sep.join([os.path.abspath(os.path.dirname(__file__)), 'templates', 'template.dockerfile'])) as f: @@ -64,9 +64,21 @@ class AnnotatedVariablesExtractor(ast.NodeTransformer): } dumpable_mapper = { - (CWLDumpableFile.__name__,): "with open('{var_name}', 'w') as f:\n\tf.write({var_name})", - (CWLDumpableBinaryFile.__name__,): "with open('{var_name}', 'wb') as f:\n\tf.write({var_name})", + (CWLDumpableFile.__name__,): ( + (None, "with open('{var_name}', 'w') as f:\n\tf.write({var_name})",), + lambda node: node.target.id + ), + (CWLDumpableBinaryFile.__name__,): ( + (None, "with open('{var_name}', 'wb') as f:\n\tf.write({var_name})"), + lambda node: node.target.id + ), (CWLDumpable.__name__, CWLDumpable.dump.__name__): None, + (CWLPNGPlot.__name__,): ( + (None, '{var_name}[-1].figure.savefig("{var_name}.png")'), + lambda node: str(node.target.id) + '.png'), + (CWLPNGFigure.__name__,): ( + ('import matplotlib.pyplot as plt\nplt.figure()', '{var_name}[-1].figure.savefig("{var_name}.png")'), + lambda node: str(node.target.id) + '.png'), } def __init__(self, *args, **kwargs): @@ -110,12 +122,18 @@ def _visit_input_ann_assign(self, node, annotation): return None def _visit_default_dumper(self, node, dumper): - dump_tree = ast.parse(dumper.format(var_name=node.target.id)) - self.to_dump.append(dump_tree.body) + if dumper[0][0] is None: + pre_code_body = [] + else: + pre_code_body = ast.parse(dumper[0][0].format(var_name=node.target.id)).body + if dumper[0][1] is None: + post_code_body = [] + else: + post_code_body = ast.parse(dumper[0][1].format(var_name=node.target.id)).body self.extracted_variables.append(_VariableNameTypePair( - node.target.id, None, None, None, False, True, node.target.id) + node.target.id, None, None, None, False, True, dumper[1](node)) ) - return self.conv_AnnAssign_to_Assign(node) + return [*pre_code_body, self.conv_AnnAssign_to_Assign(node), *post_code_body] def _visit_user_defined_dumper(self, node): load_ctx = ast.Load() diff --git a/ipython2cwl/iotypes.py b/ipython2cwl/iotypes.py index 6ae99db..0adf544 100644 --- a/ipython2cwl/iotypes.py +++ b/ipython2cwl/iotypes.py @@ -158,3 +158,50 @@ class CWLDumpableBinaryFile(CWLDumpable): and at the CWL, the data, will be mapped as a output. """ pass + + +class CWLPNGPlot(CWLDumpable): + """Use that annotation to define that after the assigment of that variable the plt.savefig() should + be called. + + >>> import matplotlib.pyplot as plt + >>> data = [1,2,3] + >>> new_data: 'CWLPNGPlot' = plt.plot(data) + + the converter will tranform these lines to + + >>> import matplotlib.pyplot as plt + >>> data = [1,2,3] + >>> new_data: 'CWLPNGPlot' = plt.plot(data) + >>> plt.savefig('new_data.png') + + + Note that by default if you have multiple plot statements in the same notebook will be written + in the same file. If you want to write them in separates you have to do it in separate figures. + To do that in your notebook you have to create a new figure before the plot command or use the CWLPNGFigure. + + >>> import matplotlib.pyplot as plt + >>> data = [1,2,3] + >>> plt.figure() + >>> new_data: 'CWLPNGPlot' = plt.plot(data) + """ + pass + + +class CWLPNGFigure(CWLDumpable): + """The same with :class:`~ipython2cwl.iotypes.CWLPNGPlot` but creates new figures before plotting. Use that + annotation of you don't want to write multiple graphs in the same image + + >>> import matplotlib.pyplot as plt + >>> data = [1,2,3] + >>> new_data: 'CWLPNGPlot' = plt.plot(data) + + the converter will tranform these lines to + + >>> import matplotlib.pyplot as plt + >>> data = [1,2,3] + >>> plt.figure() + >>> new_data: 'CWLPNGPlot' = plt.plot(data) + >>> plt.savefig('new_data.png') + + """ diff --git a/test-requirements.txt b/test-requirements.txt index 34e0730..8afa0bd 100644 --- a/test-requirements.txt +++ b/test-requirements.txt @@ -4,6 +4,7 @@ coveralls>=2.0.0 virtualenv>=3.1.0 gitpython>=3.1.3 docker>=4.2.1 -git+https://github.com/giannisdoukas/cwltool.git#egg=cwltool +cwltool==3.0.20200706173533 pandas==1.0.5 mypy +matplotlib \ No newline at end of file diff --git a/tests/test_cwltoolextractor.py b/tests/test_cwltoolextractor.py index 3c50fa6..724a030 100644 --- a/tests/test_cwltoolextractor.py +++ b/tests/test_cwltoolextractor.py @@ -474,3 +474,87 @@ def test_AnnotatedIPython2CWLToolConverter_custom_dumpables(self): os.remove(f) except FileNotFoundError: pass + + def test_AnnotatedIPython2CWLToolConverter_CWLPNGPlot(self): + code = os.linesep.join([ + "import matplotlib.pyplot as plt", + "new_data: 'CWLPNGPlot' = plt.plot([1,2,3,4])", + ]) + converter = AnnotatedIPython2CWLToolConverter(code) + new_script = converter._wrap_script_to_method( + converter._tree, + converter._variables + ) + try: + os.remove('new_data.png') + except FileNotFoundError: + pass + exec(new_script) + locals()['main']() + self.assertTrue(os.path.isfile('new_data.png')) + os.remove('new_data.png') + + tool = converter.cwl_command_line_tool() + self.assertDictEqual( + { + 'cwlVersion': "v1.1", + 'class': 'CommandLineTool', + 'baseCommand': 'notebookTool', + 'hints': { + 'DockerRequirement': {'dockerImageId': 'jn2cwl:latest'} + }, + 'arguments': ['--'], + 'inputs': {}, + 'outputs': { + 'new_data': { + 'type': 'File', + 'outputBinding': { + 'glob': 'new_data.png' + } + } + }, + }, + tool + ) + + def test_AnnotatedIPython2CWLToolConverter_CWLPNGFigure(self): + code = os.linesep.join([ + "import matplotlib.pyplot as plt", + "new_data: 'CWLPNGFigure' = plt.plot([1,2,3,4])", + ]) + converter = AnnotatedIPython2CWLToolConverter(code) + new_script = converter._wrap_script_to_method( + converter._tree, + converter._variables + ) + try: + os.remove('new_data.png') + except FileNotFoundError: + pass + exec(new_script) + locals()['main']() + self.assertTrue(os.path.isfile('new_data.png')) + os.remove('new_data.png') + + tool = converter.cwl_command_line_tool() + self.assertDictEqual( + { + 'cwlVersion': "v1.1", + 'class': 'CommandLineTool', + 'baseCommand': 'notebookTool', + 'hints': { + 'DockerRequirement': {'dockerImageId': 'jn2cwl:latest'} + }, + 'arguments': ['--'], + 'inputs': {}, + 'outputs': { + 'new_data': { + 'type': 'File', + 'outputBinding': { + 'glob': 'new_data.png' + } + } + }, + }, + tool + ) \ No newline at end of file diff --git a/tests/test_system_tests.py b/tests/test_system_tests.py index d48b4fd..2ca482b 100644 --- a/tests/test_system_tests.py +++ b/tests/test_system_tests.py @@ -29,8 +29,8 @@ def test_repo2cwl(self): self.assertListEqual(['example1.cwl'], [f for f in os.listdir(output_dir) if not f.startswith('.')]) with open(os.path.join(output_dir, 'example1.cwl')) as f: - print(20 * '=') print('workflow file') + print(20 * '=') print(f.read()) print(20 * '=')