From fbb3ca2be3b21776420812c812725c1ae2cfce92 Mon Sep 17 00:00:00 2001
From: Giannis Doukas <giannisdoukas2311@gmail.com>
Date: Wed, 1 Jul 2020 21:57:45 +0100
Subject: [PATCH 1/9] fix new lines after the docs in the autogerated scirpt

---
 ipython2cwl/repo2cwl.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/ipython2cwl/repo2cwl.py b/ipython2cwl/repo2cwl.py
index 1cd536a..3a9ab37 100644
--- a/ipython2cwl/repo2cwl.py
+++ b/ipython2cwl/repo2cwl.py
@@ -56,8 +56,7 @@ def _store_jn_as_script(notebook_path: str, git_directory_absolute_path: str, bi
         'DO NOT EDIT THIS FILE',
         'THIS FILE IS AUTO-GENERATED BY THE ipython2cwl.',
         'FOR MORE INFORMATION CHECK https://github.com/giannisdoukas/ipython2cwl',
-        '\n\n',
-        '"""',
+        '"""\n\n',
         converter._wrap_script_to_method(converter._tree, converter._variables)
     ])
     with open(script_absolute_name, 'w') as fd:

From 667f53d9c2e3e19f393161e789769e8ebba1a2bf Mon Sep 17 00:00:00 2001
From: Giannis Doukas <giannisdoukas2311@gmail.com>
Date: Wed, 1 Jul 2020 23:16:28 +0100
Subject: [PATCH 2/9] init dumpables

---
 ipython2cwl/cwltoolextractor.py | 66 ++++++++++++++++++++-------------
 ipython2cwl/iotypes.py          | 29 ++++++++++++---
 tests/test_cwltoolextractor.py  | 53 ++++++++++++++++++++++++++
 3 files changed, 117 insertions(+), 31 deletions(-)

diff --git a/ipython2cwl/cwltoolextractor.py b/ipython2cwl/cwltoolextractor.py
index 861c861..b5992a0 100644
--- a/ipython2cwl/cwltoolextractor.py
+++ b/ipython2cwl/cwltoolextractor.py
@@ -6,14 +6,15 @@
 import tempfile
 from collections import namedtuple
 from pathlib import Path
-from typing import Dict, Any
+from typing import Dict, Any, List
 
 import astor
 import nbconvert
 import yaml
 from nbformat.notebooknode import NotebookNode
 
-from .iotypes import CWLFilePathInput, CWLBooleanInput, CWLIntInput, CWLStringInput, CWLFilePathOutput
+from .iotypes import CWLFilePathInput, CWLBooleanInput, CWLIntInput, CWLStringInput, CWLFilePathOutput, CWLDumpableFile, \
+    CWLDumpableBinaryFile
 from .requirements_manager import RequirementsManager
 
 with open(os.sep.join([os.path.abspath(os.path.dirname(__file__)), 'templates', 'template.dockerfile'])) as f:
@@ -21,6 +22,11 @@
 with open(os.sep.join([os.path.abspath(os.path.dirname(__file__)), 'templates', 'template.setup'])) as f:
     SETUP_TEMPLATE = f.read()
 
+_VariableNameTypePair = namedtuple(
+    'VariableNameTypePair',
+    ['name', 'cwl_typeof', 'argparse_typeof', 'required', 'is_input', 'is_output', 'value']
+)
+
 
 # TODO: check if supports recursion if main function exists
 
@@ -55,9 +61,15 @@ class AnnotatedVariablesExtractor(ast.NodeTransformer):
         CWLFilePathOutput.__name__
     }
 
+    dumpable_mapper = {
+        (CWLDumpableFile.__name__,): "with open('{var_name}', 'w') as f:\n\tf.write({var_name})",
+        (CWLDumpableBinaryFile.__name__,): "with open('{var_name}', 'wb') as f:\n\tf.write({var_name})",
+    }
+
     def __init__(self, *args, **kwargs):
         super().__init__(*args, **kwargs)
-        self.extracted_nodes = []
+        self.extracted_variables: List = []
+        self.to_dump: List = []
 
     def __get_annotation__(self, type_annotation):
         annotation = None
@@ -77,15 +89,27 @@ def visit_AnnAssign(self, node):
             annotation = self.__get_annotation__(node.annotation)
             if annotation in self.input_type_mapper:
                 mapper = self.input_type_mapper[annotation]
-                self.extracted_nodes.append(
-                    (node, mapper[0], mapper[1], not mapper[0].endswith('?'), True, False)
+                self.extracted_variables.append(_VariableNameTypePair(
+                    node.target.id, mapper[0], mapper[1], not mapper[0].endswith('?'), True, False, None)
                 )
                 return None
-
+            elif annotation in self.dumpable_mapper:
+                dump_tree = ast.parse(self.dumpable_mapper[annotation].format(var_name=node.target.id))
+                self.to_dump.append(dump_tree.body)
+                self.extracted_variables.append(_VariableNameTypePair(
+                    node.target.id, None, None, None, False, True, node.target.id)
+                )
+                # removing type annotation
+                return ast.Assign(
+                    col_offset=node.col_offset,
+                    lineno=node.lineno,
+                    targets=[node.target],
+                    value=node.value
+                )
             elif (isinstance(node.annotation, ast.Name) and node.annotation.id in self.output_type_mapper) or \
                     (isinstance(node.annotation, ast.Str) and node.annotation.s in self.output_type_mapper):
-                self.extracted_nodes.append(
-                    (node, None, None, None, False, True)
+                self.extracted_variables.append(_VariableNameTypePair(
+                    node.target.id, None, None, None, False, True, node.value.s)
                 )
                 # removing type annotation
                 return ast.Assign(
@@ -123,12 +147,6 @@ class AnnotatedIPython2CWLToolConverter:
     """
 
     _code: str
-
-    _VariableNameTypePair = namedtuple(
-        'VariableNameTypePair',
-        ['name', 'cwl_typeof', 'argparse_typeof', 'required', 'is_input', 'is_output', 'value']
-    )
-
     """The annotated python code to convert."""
 
     def __init__(self, annotated_ipython_code: str):
@@ -137,19 +155,15 @@ def __init__(self, annotated_ipython_code: str):
 
         self._code = annotated_ipython_code
         extractor = AnnotatedVariablesExtractor()
-        self._tree = ast.fix_missing_locations(extractor.visit(ast.parse(self._code)))
+        self._tree = extractor.visit(ast.parse(self._code))
+        [self._tree.body.extend(d) for d in extractor.to_dump]
+        self._tree = ast.fix_missing_locations(self._tree)
         self._variables = []
-        for node, cwl_type, click_type, required, is_input, is_output in extractor.extracted_nodes:
-            if is_input:
-                self._variables.append(
-                    self._VariableNameTypePair(node.target.id, cwl_type, click_type, required, is_input, is_output,
-                                               None)
-                )
-            if is_output:
-                self._variables.append(
-                    self._VariableNameTypePair(node.target.id, cwl_type, click_type, required, is_input, is_output,
-                                               node.value.s)
-                )
+        for variable in extractor.extracted_variables:  # type: _VariableNameTypePair
+            if variable.is_input:
+                self._variables.append(variable)
+            if variable.is_output:
+                self._variables.append(variable)
 
     @classmethod
     def from_jupyter_notebook_node(cls, node: NotebookNode) -> 'AnnotatedIPython2CWLToolConverter':
diff --git a/ipython2cwl/iotypes.py b/ipython2cwl/iotypes.py
index 8cb6688..3f64d39 100644
--- a/ipython2cwl/iotypes.py
+++ b/ipython2cwl/iotypes.py
@@ -1,19 +1,38 @@
+class _CWLInput:
+    pass
+
+
+class CWLFilePathInput(_CWLInput):
+    pass
+
+
+class CWLBooleanInput(_CWLInput):
+    pass
+
+
+class CWLStringInput(_CWLInput):
+    pass
+
+
+class CWLIntInput(_CWLInput):
+    pass
+
 
-class CWLFilePathInput:
+class _CWLOutput:
     pass
 
 
-class CWLBooleanInput:
+class CWLFilePathOutput(_CWLOutput):
     pass
 
 
-class CWLStringInput:
+class _CWLDumpable(_CWLOutput):
     pass
 
 
-class CWLIntInput:
+class CWLDumpableFile(_CWLDumpable):
     pass
 
 
-class CWLFilePathOutput:
+class CWLDumpableBinaryFile(_CWLDumpable):
     pass
diff --git a/tests/test_cwltoolextractor.py b/tests/test_cwltoolextractor.py
index da573ad..b6332c5 100644
--- a/tests/test_cwltoolextractor.py
+++ b/tests/test_cwltoolextractor.py
@@ -377,3 +377,56 @@ def test_AnnotatedIPython2CWLToolConverter_optional_array_input(self):
         self.assertListEqual([], AnnotatedIPython2CWLToolConverter(os.linesep.join([
             'x1: "RANDOM CHARACTERS!!!!!!" = True'
         ]))._variables)
+
+    def test_AnnotatedIPython2CWLToolConverter_dumpables(self):
+        script = os.linesep.join([
+            'message: CWLDumpableFile = "this is a text from a dumpable"',
+            'message2: "CWLDumpableFile" = "this is a text from a dumpable 2"',
+            'binary_message: CWLDumpableBinaryFile = b"this is a text from a binary dumpable"',
+            'print("Message:", message)',
+            'print(b"Binary Message:" + binary_message)',
+        ])
+        converter = AnnotatedIPython2CWLToolConverter(script)
+        generated_script = AnnotatedIPython2CWLToolConverter._wrap_script_to_method(
+            converter._tree, converter._variables
+        )
+        for f in ['message', 'binary_message', 'message2']:
+            try:
+                os.remove(f)
+            except FileNotFoundError:
+                pass
+        exec(generated_script)
+        print(generated_script)
+        locals()['main']()
+        with open('message') as f:
+            self.assertEqual('this is a text from a dumpable', f.read())
+        with open('message2') as f:
+            self.assertEqual('this is a text from a dumpable 2', f.read())
+        with open('binary_message', 'rb') as f:
+            self.assertEqual(b'this is a text from a binary dumpable', f.read())
+
+        cwl_tool = converter.cwl_command_line_tool()
+        print(cwl_tool)
+        self.assertDictEqual(
+            {
+                'message': {
+                    'type': 'File',
+                    'outputBinding': {
+                        'glob': 'message'
+                    }
+                },
+                'message2': {
+                    'type': 'File',
+                    'outputBinding': {
+                        'glob': 'message2'
+                    }
+                },
+                'binary_message': {
+                    'type': 'File',
+                    'outputBinding': {
+                        'glob': 'binary_message'
+                    }
+                }
+            },
+            cwl_tool['outputs']
+        )

From 12028efb9c312d028aee755a6ac4d256f5dcc206 Mon Sep 17 00:00:00 2001
From: Giannis Doukas <giannisdoukas2311@gmail.com>
Date: Wed, 1 Jul 2020 23:16:57 +0100
Subject: [PATCH 3/9] rm files generated from tests

---
 .gitignore | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/.gitignore b/.gitignore
index 3ee1292..56a1207 100644
--- a/.gitignore
+++ b/.gitignore
@@ -247,3 +247,6 @@ tmp.py
 cwlbuild
 /tests/repo-like/result.yaml
 /tests/repo-like/messages.txt
+/tests/binary_message
+/tests/message
+/tests/message2

From 6853f88fc397a705ccc4f390c64c2566cd2def14 Mon Sep 17 00:00:00 2001
From: Giannis Doukas <giannisdoukas2311@gmail.com>
Date: Wed, 1 Jul 2020 23:28:07 +0100
Subject: [PATCH 4/9] fix code style

---
 ipython2cwl/cwltoolextractor.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/ipython2cwl/cwltoolextractor.py b/ipython2cwl/cwltoolextractor.py
index b5992a0..d3344c8 100644
--- a/ipython2cwl/cwltoolextractor.py
+++ b/ipython2cwl/cwltoolextractor.py
@@ -13,8 +13,8 @@
 import yaml
 from nbformat.notebooknode import NotebookNode
 
-from .iotypes import CWLFilePathInput, CWLBooleanInput, CWLIntInput, CWLStringInput, CWLFilePathOutput, CWLDumpableFile, \
-    CWLDumpableBinaryFile
+from .iotypes import CWLFilePathInput, CWLBooleanInput, CWLIntInput, CWLStringInput, CWLFilePathOutput, \
+    CWLDumpableFile, CWLDumpableBinaryFile
 from .requirements_manager import RequirementsManager
 
 with open(os.sep.join([os.path.abspath(os.path.dirname(__file__)), 'templates', 'template.dockerfile'])) as f:

From 398226092125c5c4c9960e87ecceb7f1c19b4e85 Mon Sep 17 00:00:00 2001
From: Giannis Doukas <giannisdoukas2311@gmail.com>
Date: Thu, 2 Jul 2020 16:33:08 +0100
Subject: [PATCH 5/9] add custom dumpables functionality

---
 ipython2cwl/cwltoolextractor.py | 65 ++++++++++++++++++++++++++-------
 ipython2cwl/iotypes.py          | 20 ++++++----
 test-requirements.txt           |  3 +-
 tests/test_cwltoolextractor.py  | 44 ++++++++++++++++++++++
 4 files changed, 111 insertions(+), 21 deletions(-)

diff --git a/ipython2cwl/cwltoolextractor.py b/ipython2cwl/cwltoolextractor.py
index d3344c8..65e1d6d 100644
--- a/ipython2cwl/cwltoolextractor.py
+++ b/ipython2cwl/cwltoolextractor.py
@@ -5,6 +5,7 @@
 import tarfile
 import tempfile
 from collections import namedtuple
+from copy import deepcopy
 from pathlib import Path
 from typing import Dict, Any, List
 
@@ -14,7 +15,7 @@
 from nbformat.notebooknode import NotebookNode
 
 from .iotypes import CWLFilePathInput, CWLBooleanInput, CWLIntInput, CWLStringInput, CWLFilePathOutput, \
-    CWLDumpableFile, CWLDumpableBinaryFile
+    CWLDumpableFile, CWLDumpableBinaryFile, CWLDumpable
 from .requirements_manager import RequirementsManager
 
 with open(os.sep.join([os.path.abspath(os.path.dirname(__file__)), 'templates', 'template.dockerfile'])) as f:
@@ -64,6 +65,7 @@ class AnnotatedVariablesExtractor(ast.NodeTransformer):
     dumpable_mapper = {
         (CWLDumpableFile.__name__,): "with open('{var_name}', 'w') as f:\n\tf.write({var_name})",
         (CWLDumpableBinaryFile.__name__,): "with open('{var_name}', 'wb') as f:\n\tf.write({var_name})",
+        (CWLDumpable.__name__, CWLDumpable.dump.__name__): None,
     }
 
     def __init__(self, *args, **kwargs):
@@ -82,6 +84,8 @@ def __get_annotation__(self, type_annotation):
                 annotation = self.__get_annotation__(ann_expr.value)
         elif isinstance(type_annotation, ast.Subscript):
             annotation = (type_annotation.value.id, *self.__get_annotation__(type_annotation.slice.value))
+        elif isinstance(type_annotation, ast.Call):
+            annotation = (type_annotation.func.value.id, type_annotation.func.attr)
         return annotation
 
     def visit_AnnAssign(self, node):
@@ -94,18 +98,53 @@ def visit_AnnAssign(self, node):
                 )
                 return None
             elif annotation in self.dumpable_mapper:
-                dump_tree = ast.parse(self.dumpable_mapper[annotation].format(var_name=node.target.id))
-                self.to_dump.append(dump_tree.body)
-                self.extracted_variables.append(_VariableNameTypePair(
-                    node.target.id, None, None, None, False, True, node.target.id)
-                )
-                # removing type annotation
-                return ast.Assign(
-                    col_offset=node.col_offset,
-                    lineno=node.lineno,
-                    targets=[node.target],
-                    value=node.value
-                )
+                dumper = self.dumpable_mapper[annotation]
+                if dumper is not None:
+                    dump_tree = ast.parse(dumper.format(var_name=node.target.id))
+                    self.to_dump.append(dump_tree.body)
+                    self.extracted_variables.append(_VariableNameTypePair(
+                        node.target.id, None, None, None, False, True, node.target.id)
+                    )
+                    # removing type annotation
+                    return ast.Assign(
+                        col_offset=node.col_offset,
+                        lineno=node.lineno,
+                        targets=[node.target],
+                        value=node.value
+                    )
+                else:
+                    load_ctx = ast.Load()
+                    func_name = deepcopy(node.annotation.args[0].value)
+                    func_name.ctx = load_ctx
+                    ast.fix_missing_locations(func_name)
+
+                    new_dump_node = ast.Expr(
+                        col_offset=0, lineno=0,
+                        value=ast.Call(
+                            args=node.annotation.args[1:],
+                            col_offset=0,
+                            func=ast.Attribute(
+                                attr=node.annotation.args[0].attr,
+                                col_offset=0,
+                                ctx=load_ctx,
+                                lineno=0,
+                                value=func_name,
+                            ),
+                            keywords=node.annotation.keywords
+                        )
+                    )
+                    ast.fix_missing_locations(new_dump_node)
+                    self.to_dump.append([new_dump_node])
+                    self.extracted_variables.append(_VariableNameTypePair(
+                        node.target.id, None, None, None, False, True, node.annotation.args[1].s)
+                    )
+                    # removing type annotation
+                    return ast.Assign(
+                        col_offset=node.col_offset,
+                        lineno=node.lineno,
+                        targets=[node.target],
+                        value=node.value
+                    )
             elif (isinstance(node.annotation, ast.Name) and node.annotation.id in self.output_type_mapper) or \
                     (isinstance(node.annotation, ast.Str) and node.annotation.s in self.output_type_mapper):
                 self.extracted_variables.append(_VariableNameTypePair(
diff --git a/ipython2cwl/iotypes.py b/ipython2cwl/iotypes.py
index 3f64d39..c0b31e0 100644
--- a/ipython2cwl/iotypes.py
+++ b/ipython2cwl/iotypes.py
@@ -1,8 +1,11 @@
+from typing import Callable
+
+
 class _CWLInput:
     pass
 
 
-class CWLFilePathInput(_CWLInput):
+class CWLFilePathInput(str, _CWLInput):
     pass
 
 
@@ -10,7 +13,7 @@ class CWLBooleanInput(_CWLInput):
     pass
 
 
-class CWLStringInput(_CWLInput):
+class CWLStringInput(str, _CWLInput):
     pass
 
 
@@ -22,17 +25,20 @@ class _CWLOutput:
     pass
 
 
-class CWLFilePathOutput(_CWLOutput):
+class CWLFilePathOutput(str, _CWLOutput):
     pass
 
 
-class _CWLDumpable(_CWLOutput):
-    pass
+class CWLDumpable(_CWLOutput):
+
+    @classmethod
+    def dump(cls, dumper: Callable, *args, **kwargs):
+        return _CWLOutput
 
 
-class CWLDumpableFile(_CWLDumpable):
+class CWLDumpableFile(CWLDumpable):
     pass
 
 
-class CWLDumpableBinaryFile(_CWLDumpable):
+class CWLDumpableBinaryFile(CWLDumpable):
     pass
diff --git a/test-requirements.txt b/test-requirements.txt
index 06fa332..5896fc7 100644
--- a/test-requirements.txt
+++ b/test-requirements.txt
@@ -4,4 +4,5 @@ coveralls>=2.0.0
 virtualenv>=3.1.0
 gitpython>=3.1.3
 docker>=4.2.1
-git+https://github.com/giannisdoukas/cwltool.git#egg=cwltool
\ No newline at end of file
+git+https://github.com/giannisdoukas/cwltool.git#egg=cwltool
+pandas==1.0.5
diff --git a/tests/test_cwltoolextractor.py b/tests/test_cwltoolextractor.py
index b6332c5..3c50fa6 100644
--- a/tests/test_cwltoolextractor.py
+++ b/tests/test_cwltoolextractor.py
@@ -430,3 +430,47 @@ def test_AnnotatedIPython2CWLToolConverter_dumpables(self):
             },
             cwl_tool['outputs']
         )
+
+    def test_AnnotatedIPython2CWLToolConverter_custom_dumpables(self):
+        script = os.linesep.join([
+            'import pandas',
+            'from ipython2cwl.iotypes import CWLDumpable',
+            'd: CWLDumpable.dump(d.to_csv, "dumpable.csv", sep="\\t", index=False) = pandas.DataFrame([[1,2,3], [4,5,6], [7,8,9]])'
+        ])
+        converter = AnnotatedIPython2CWLToolConverter(script)
+        generated_script = AnnotatedIPython2CWLToolConverter._wrap_script_to_method(
+            converter._tree, converter._variables
+        )
+        for f in ["dumpable.csv"]:
+            try:
+                os.remove(f)
+            except FileNotFoundError:
+                pass
+        exec(generated_script)
+        print(generated_script)
+        locals()['main']()
+        import pandas
+        data_file = pandas.read_csv('dumpable.csv', sep="\t")
+        self.assertListEqual(
+            [[0, 0, 0], [0, 0, 0], [0, 0, 0]],
+            (data_file.to_numpy() - pandas.DataFrame([[1, 2, 3], [4, 5, 6], [7, 8, 9]]).to_numpy()).tolist()
+        )
+
+        cwl_tool = converter.cwl_command_line_tool()
+        print(cwl_tool)
+        self.assertDictEqual(
+            {
+                'd': {
+                    'type': 'File',
+                    'outputBinding': {
+                        'glob': 'dumpable.csv'
+                    }
+                },
+            },
+            cwl_tool['outputs']
+        )
+        for f in ["dumpable.csv"]:
+            try:
+                os.remove(f)
+            except FileNotFoundError:
+                pass

From a39f33632705e4367b22c320da91cc2eb4ca8a12 Mon Sep 17 00:00:00 2001
From: Giannis Doukas <giannisdoukas2311@gmail.com>
Date: Thu, 2 Jul 2020 20:19:02 +0100
Subject: [PATCH 6/9] add documentation

---
 docs/index.rst         |  51 +++++++++++++++---
 ipython2cwl/iotypes.py | 118 ++++++++++++++++++++++++++++++++++++++++-
 2 files changed, 160 insertions(+), 9 deletions(-)

diff --git a/docs/index.rst b/docs/index.rst
index 05b8055..4490e8f 100644
--- a/docs/index.rst
+++ b/docs/index.rst
@@ -30,18 +30,53 @@ IPython2CWL is a tool for converting `IPython <https://ipython.org/>`_ Jupyter N
 ------------------------------------------------------------------------------------------
 
 IPython2CWL is based on `repo2docker <https://github.com/jupyter/repo2docker>`_, the same tool
-used by `mybinder <https://mybinder.org/>`_. Now, by writing Jupyter Notebook and publish them, including repo2docker
-configuration, the community can not only execute the notebooks remotely but also to use them as steps in scientific
+used by `mybinder <https://mybinder.org/>`_. Now, by writing Jupyter Notebook and publishing them, including repo2docker
+configuration, the community can not only execute the notebooks remotely but can also use them as steps in scientific
 workflows.
 
-* Install ipython2cwl: :code:`pip install python2cwl`
+* `Install ipython2cwl <https://pypi.org/project/ipython2cwl/>`_: :code:`pip install ipython2cwl`
 * Ensure that you have docker running
 * Create a directory to store the generated cwl files, for example cwlbuild
 * Execute :code:`jupyter repo2cwl https://github.com/giannisdoukas/cwl-annotated-jupyter-notebook.git -o cwlbuild`
 
-Indices and tables
-==================
+HOW IT WORKS?
+------------------
+
+IPython2CWL parses each IPython notebook and finds the variables with the typing annotations. For each input variable,
+the assigment of that variable will be generalised as a command line argument. Each output variable will be mapped
+in the cwl description as an output file.
+
+SUPPORTED TYPES
+------------------
+
+.. automodule:: ipython2cwl.iotypes
+   :members:
+
+
+THAT'S COOL! WHAT ABOUT LIST & OPTIONAL ARGUMENTS?
+"""""""""""""""""""""""""""""""""""""""""""""""""""
+
+The basic input data types can be combined with the List and Optional annotations. For example, write the following
+annotation:
+
+.. code-block:: python
+
+  file_inputs: List[CWLFilePathInput] = ['data1.txt', 'data2.txt', 'data3.txt']
+  example: Optional[CWLStringInput] = None
+
+
+SEEMS INTERESTING! WHAT ABOUT A DEMO?
+----------------------------------------
+
+If you would like to see a demo before you want to start annotating your notebooks check here!
+`github.com/giannisdoukas/ipython2cwl-demo <https://github.com/giannisdoukas/ipython2cwl-demo>`_
+
+
+WHAT IF I WANT TO VALIDATE THAT THE GENERATED SCRIPTS ARE CORRECT?
+------------------------------------------------------------------
+
+All the generated scripts are stored in the docker image under the directory :code:`/app/cwl/bin`. You can see the list
+of the files by running :code:`docker run [IMAGE_ID] find /app/cwl/bin/ -type f`.
+
+
 
-* :ref:`genindex`
-* :ref:`modindex`
-* :ref:`search`
diff --git a/ipython2cwl/iotypes.py b/ipython2cwl/iotypes.py
index c0b31e0..6ae99db 100644
--- a/ipython2cwl/iotypes.py
+++ b/ipython2cwl/iotypes.py
@@ -1,3 +1,37 @@
+"""
+
+Basic Data Types
+^^^^^^^^^^^^^^^^^
+
+Each variable can be an input or an output. The basic data types are:
+
+* Inputs:
+
+  * CWLFilePathInput
+
+  * CWLBooleanInput
+
+  * CWLStringInput
+
+  * CWLIntInput
+
+* Outputs:
+
+  * CWLFilePathOutput
+
+  * CWLDumpableFile
+
+  * CWLDumpableBinaryFile
+
+
+Complex Dumpables Types
+^^^^^^^^^^^^^^^^^^^^^^^^
+
+Dumpables are variables which are able to be written to a file, but the jupyter notebook developer
+does not want to write it, for example to avoid the IO overhead. To bypass that, you can use
+Dumpables annotation. See :func:`~iotypes.CWLDumpable.dump` for more details.
+
+"""
 from typing import Callable
 
 
@@ -6,18 +40,50 @@ class _CWLInput:
 
 
 class CWLFilePathInput(str, _CWLInput):
+    """Use that hint to annotate that a variable is a string-path input. You can use the typing annotation
+    as a string by importing it. At the generated script a command line argument with the name of the variable
+    will be created and the assignment of value will be generalised.
+
+    >>> dataset1: CWLFilePathInput = './data/data.csv'
+    >>> dataset2: 'CWLFilePathInput' = './data/data.csv'
+
+    """
     pass
 
 
 class CWLBooleanInput(_CWLInput):
+    """Use that hint to annotate that a variable is a boolean input. You can use the typing annotation
+    as a string by importing it. At the generated script a command line argument with the name of the variable
+    will be created and the assignment of value will be generalised.
+
+    >>> dataset1: CWLBooleanInput = True
+    >>> dataset2: 'CWLBooleanInput' = False
+
+    """
     pass
 
 
 class CWLStringInput(str, _CWLInput):
+    """Use that hint to annotate that a variable is a string input. You can use the typing annotation
+        as a string by importing it. At the generated script a command line argument with the name of the variable
+        will be created and the assignment of value will be generalised.
+
+        >>> dataset1: CWLBooleanInput = 'this is a message input'
+        >>> dataset2: 'CWLBooleanInput' = 'yet another message input'
+
+        """
     pass
 
 
 class CWLIntInput(_CWLInput):
+    """Use that hint to annotate that a variable is a integer input. You can use the typing annotation
+    as a string by importing it. At the generated script a command line argument with the name of the variable
+    will be created and the assignment of value will be generalised.
+
+    >>> dataset1: CWLBooleanInput = 1
+    >>> dataset2: 'CWLBooleanInput' = 2
+
+    """
     pass
 
 
@@ -26,19 +92,69 @@ class _CWLOutput:
 
 
 class CWLFilePathOutput(str, _CWLOutput):
+    """Use that hint to annotate that a variable is a string-path to an output file. You can use the typing annotation
+    as a string by importing it. The generated file will be mapped as a CWL output.
+
+    >>> filename: CWLBooleanInput = 'data.csv'
+
+    """
     pass
 
 
 class CWLDumpable(_CWLOutput):
+    """Use that class to define custom Dumpables variables."""
 
     @classmethod
-    def dump(cls, dumper: Callable, *args, **kwargs):
+    def dump(cls, dumper: Callable, filename, *args, **kwargs):
+        """
+        Set the function to be used to dump the variable to a file.
+
+        >>> import pandas
+        >>> d: CWLDumpable.dump(d.to_csv, "dumpable.csv", sep="\\t", index=False) = pandas.DataFrame(
+        ...     [[1,2,3], [4,5,6], [7,8,9]]
+        ... )
+
+        In that example the converter will add at the end of the script the following line:
+        >>> d.to_csv("dumpable.csv", sep="\\t", index=False)
+
+        :param dumper: The function that has to be called to write the variable to a file.
+        :param filename: The name of the generated file. That string must be the first argument
+                        in the dumper function. That file will also be mapped as an output in
+                        the CWL file.
+        :param args: Any positional arguments you want to pass to dumper after the filename
+        :param kwargs: Any keyword arguments you want to pass to dumper
+        """
         return _CWLOutput
 
 
 class CWLDumpableFile(CWLDumpable):
+    """Use that annotation to define that a variable should be dumped to a text file. For example for the annotation:
+
+    >>> data: CWLDumpableFile = "this is text data"
+
+
+    the converter will append at the end of the script the following lines:
+
+
+    >>> with open('data', 'w') as f:
+    ...     f.write(data)
+
+
+    and at the CWL, the data, will be mapped as a output.
+    """
     pass
 
 
 class CWLDumpableBinaryFile(CWLDumpable):
+    """Use that annotation to define that a variable should be dumped to a binary file. For example for the annotation:
+
+    >>> data: CWLDumpableBinaryFile = b"this is text data"
+
+    the converter will append at the end of the script the following lines:
+
+    >>> with open('data', 'wb') as f:
+    ...     f.write(data)
+
+    and at the CWL, the data, will be mapped as a output.
+    """
     pass

From 1b09ddac9c19c85af9930dd15f29577f9c286f63 Mon Sep 17 00:00:00 2001
From: Giannis Doukas <giannisdoukas2311@gmail.com>
Date: Thu, 2 Jul 2020 20:32:40 +0100
Subject: [PATCH 7/9] cleanup

---
 ipython2cwl/cwltoolextractor.py | 131 ++++++++++++++++----------------
 1 file changed, 67 insertions(+), 64 deletions(-)

diff --git a/ipython2cwl/cwltoolextractor.py b/ipython2cwl/cwltoolextractor.py
index 65e1d6d..61ac743 100644
--- a/ipython2cwl/cwltoolextractor.py
+++ b/ipython2cwl/cwltoolextractor.py
@@ -29,8 +29,6 @@
 )
 
 
-# TODO: check if supports recursion if main function exists
-
 class AnnotatedVariablesExtractor(ast.NodeTransformer):
     input_type_mapper = {
         (CWLFilePathInput.__name__,): (
@@ -59,7 +57,7 @@ class AnnotatedVariablesExtractor(ast.NodeTransformer):
     }}
 
     output_type_mapper = {
-        CWLFilePathOutput.__name__
+        (CWLFilePathOutput.__name__,)
     }
 
     dumpable_mapper = {
@@ -88,75 +86,80 @@ def __get_annotation__(self, type_annotation):
             annotation = (type_annotation.func.value.id, type_annotation.func.attr)
         return annotation
 
+    @classmethod
+    def conv_AnnAssign_to_Assign(cls, node):
+        return ast.Assign(
+            col_offset=node.col_offset,
+            lineno=node.lineno,
+            targets=[node.target],
+            value=node.value
+        )
+
+    def _visit_input_ann_assign(self, node, annotation):
+        mapper = self.input_type_mapper[annotation]
+        self.extracted_variables.append(_VariableNameTypePair(
+            node.target.id, mapper[0], mapper[1], not mapper[0].endswith('?'), True, False, None)
+        )
+        return None
+
+    def _visit_default_dumper(self, node, dumper):
+        dump_tree = ast.parse(dumper.format(var_name=node.target.id))
+        self.to_dump.append(dump_tree.body)
+        self.extracted_variables.append(_VariableNameTypePair(
+            node.target.id, None, None, None, False, True, node.target.id)
+        )
+        return self.conv_AnnAssign_to_Assign(node)
+
+    def _visit_user_defined_dumper(self, node):
+        load_ctx = ast.Load()
+        func_name = deepcopy(node.annotation.args[0].value)
+        func_name.ctx = load_ctx
+        ast.fix_missing_locations(func_name)
+
+        new_dump_node = ast.Expr(
+            col_offset=0, lineno=0,
+            value=ast.Call(
+                args=node.annotation.args[1:], keywords=node.annotation.keywords, col_offset=0,
+                func=ast.Attribute(
+                    attr=node.annotation.args[0].attr,
+                    value=func_name,
+                    col_offset=0, ctx=load_ctx, lineno=0,
+                ),
+            )
+        )
+        ast.fix_missing_locations(new_dump_node)
+        self.to_dump.append([new_dump_node])
+        self.extracted_variables.append(_VariableNameTypePair(
+            node.target.id, None, None, None, False, True, node.annotation.args[1].s)
+        )
+        # removing type annotation
+        return self.conv_AnnAssign_to_Assign(node)
+
+    def _visit_output_type(self, node):
+        self.extracted_variables.append(_VariableNameTypePair(
+            node.target.id, None, None, None, False, True, node.value.s)
+        )
+        # removing type annotation
+        return ast.Assign(
+            col_offset=node.col_offset,
+            lineno=node.lineno,
+            targets=[node.target],
+            value=node.value
+        )
+
     def visit_AnnAssign(self, node):
         try:
             annotation = self.__get_annotation__(node.annotation)
             if annotation in self.input_type_mapper:
-                mapper = self.input_type_mapper[annotation]
-                self.extracted_variables.append(_VariableNameTypePair(
-                    node.target.id, mapper[0], mapper[1], not mapper[0].endswith('?'), True, False, None)
-                )
-                return None
+                return self._visit_input_ann_assign(node, annotation)
             elif annotation in self.dumpable_mapper:
                 dumper = self.dumpable_mapper[annotation]
                 if dumper is not None:
-                    dump_tree = ast.parse(dumper.format(var_name=node.target.id))
-                    self.to_dump.append(dump_tree.body)
-                    self.extracted_variables.append(_VariableNameTypePair(
-                        node.target.id, None, None, None, False, True, node.target.id)
-                    )
-                    # removing type annotation
-                    return ast.Assign(
-                        col_offset=node.col_offset,
-                        lineno=node.lineno,
-                        targets=[node.target],
-                        value=node.value
-                    )
+                    return self._visit_default_dumper(node, dumper)
                 else:
-                    load_ctx = ast.Load()
-                    func_name = deepcopy(node.annotation.args[0].value)
-                    func_name.ctx = load_ctx
-                    ast.fix_missing_locations(func_name)
-
-                    new_dump_node = ast.Expr(
-                        col_offset=0, lineno=0,
-                        value=ast.Call(
-                            args=node.annotation.args[1:],
-                            col_offset=0,
-                            func=ast.Attribute(
-                                attr=node.annotation.args[0].attr,
-                                col_offset=0,
-                                ctx=load_ctx,
-                                lineno=0,
-                                value=func_name,
-                            ),
-                            keywords=node.annotation.keywords
-                        )
-                    )
-                    ast.fix_missing_locations(new_dump_node)
-                    self.to_dump.append([new_dump_node])
-                    self.extracted_variables.append(_VariableNameTypePair(
-                        node.target.id, None, None, None, False, True, node.annotation.args[1].s)
-                    )
-                    # removing type annotation
-                    return ast.Assign(
-                        col_offset=node.col_offset,
-                        lineno=node.lineno,
-                        targets=[node.target],
-                        value=node.value
-                    )
-            elif (isinstance(node.annotation, ast.Name) and node.annotation.id in self.output_type_mapper) or \
-                    (isinstance(node.annotation, ast.Str) and node.annotation.s in self.output_type_mapper):
-                self.extracted_variables.append(_VariableNameTypePair(
-                    node.target.id, None, None, None, False, True, node.value.s)
-                )
-                # removing type annotation
-                return ast.Assign(
-                    col_offset=node.col_offset,
-                    lineno=node.lineno,
-                    targets=[node.target],
-                    value=node.value
-                )
+                    return self._visit_user_defined_dumper(node)
+            elif annotation in self.output_type_mapper:
+                return self._visit_output_type(node)
         except Exception:
             pass
         return node

From 0e3a5079fac202b6e96ebd68f7cf160828ee89a1 Mon Sep 17 00:00:00 2001
From: Giannis Doukas <giannisdoukas2311@gmail.com>
Date: Thu, 2 Jul 2020 20:34:53 +0100
Subject: [PATCH 8/9] change path in example in readme

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 4468b11..c0b8c4a 100644
--- a/README.md
+++ b/README.md
@@ -37,7 +37,7 @@ pip install ipython2cwl
 ### Example
  
 ```
-jupyter repo2cwl https://github.com/giannisdoukas/cwl-annotated-jupyter-notebook.git -o cwlbuild
+jupyter repo2cwl https://github.com/giannisdoukas/cwl-annotated-jupyter-notebook.git -o .
 ```
 
 ### Docs

From fb7bfc46f9320754c4b5a33b84284bace6302742 Mon Sep 17 00:00:00 2001
From: Giannis Doukas <giannisdoukas2311@gmail.com>
Date: Thu, 2 Jul 2020 20:38:49 +0100
Subject: [PATCH 9/9] update version

---
 README.md               | 2 +-
 ipython2cwl/__init__.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/README.md b/README.md
index c0b8c4a..9f95cea 100644
--- a/README.md
+++ b/README.md
@@ -23,7 +23,7 @@ with open(result_file, 'w') as f:
 ```
 
 IPython2CWL is based on [repo2docker](https://github.com/jupyter/repo2docker), the same tool
-used by [mybinder](https://mybinder.org/). Now, by writing Jupyter Notebook and publish them, including repo2docker
+used by [mybinder](https://mybinder.org/). Now, by writing Jupyter Notebook and publishing them, including repo2docker
 configuration, the community can not only execute the notebooks remotely but also to use them as steps in scientific
 workflows.
 
diff --git a/ipython2cwl/__init__.py b/ipython2cwl/__init__.py
index 27fdca4..81f0fde 100644
--- a/ipython2cwl/__init__.py
+++ b/ipython2cwl/__init__.py
@@ -1 +1 @@
-__version__ = "0.0.3"
+__version__ = "0.0.4"