In [2]:
import ast, mistletoe, textwrap

In [3]:
def quote(str, punc=''):
    str, leading_ws = ''.join(str), []
    lines = str.splitlines(True)
    _ = '"""'
    if _ in str: _ = "'''"
    if not str.strip(): _ = punc = ''
    while lines and not lines[0]: leading_ws.append(lines.pop())
    str = ''.join(lines)
    end = len(str.rstrip())
    str, ending_ws = str[:end], str[end:]
    if str and str.endswith(_[0]): str += ' '                    
    return F"{''.join(leading_ws)}{_}{str}{_}{punc}{ending_ws}"

In [4]:
def get_first_line(lines, line=''):
    for line in lines or ['']: 
        if line.strip(): break
    return line

def get_line_indent(line):  return len(line) - len(line.lstrip())

In [5]:
EmptyCodeBlock = mistletoe.block_token.BlockCode('')
EmptyCodeBlock.children = [mistletoe.span_token.RawText('')]

In [6]:
import functools

In [7]:
def render_string_as_document(callable):
    @functools.wraps(callable)
    def caller(self, token):
        if isinstance(token, str):
            self.original = list(map(str.rstrip, ''.join(token).splitlines()))
            self.final, self.buffer, self.min_indent = [], mistletoe.span_token.RawText(''), 0
            token = mistletoe.Document(token)
            isinstance(token.children[-1], mistletoe.block_token.BlockCode) or token.children.append(EmptyCodeBlock)        
        return callable(self, token)
    return caller

In [8]:
class MarkdownPythonRendererContext(mistletoe.base_renderer.BaseRenderer):
    _original_block_tokens = None
    _original_span_tokens = None
    _active_block_tokens = None
    _active_span_tokens = None
    def __enter__(self):
        self._original_block_tokens = mistletoe.block_token._token_types
        self._original_span_tokens = mistletoe.span_token._token_types
        mistletoe.block_token._token_types = self._active_block_tokens or self._original_block_tokens
        mistletoe.span_token._token_types = self._active_span_tokens or self._original_span_tokens
        return super().__enter__()
    
    def __exit__(self, *exc):
        mistletoe.block_token._token_types = self._original_block_tokens
        mistletoe.span_token._token_types = self._original_span_tokens
        return super().__exit__(*exc)
    
    render = render_string_as_document(mistletoe.base_renderer.BaseRenderer.render)

In [77]:
class MarkdownPython(MarkdownPythonRendererContext):
    _active_block_tokens = [mistletoe.block_token.List, mistletoe.block_token.BlockCode, mistletoe.block_token.Paragraph]
    def render_document(self, token, punctation = ''): 
        [self.render(child) for child in token.children if hasattr(child, 'children')]
        source = '\n'.join(self.final)
        if token.children[-1] == EmptyCodeBlock:
            # IPython use the magic function if the string is not stripped
            source = source.rstrip() +';'
        return source
    def render_paragraph(self, token): 
        """Paragraphs are staged in the buffer so they can be indented."""
        for child in token.children:  
            self.buffer.content += child.content + '\n'
        return ''
        
    def render_block_code(self, token):
        
        buffer = self.buffer.content.splitlines()
        while buffer and not buffer[0]:   
            self.final.append(buffer.pop(0))
        body = '\n'.join(buffer)

        block = token.children[0].content.splitlines()
        last_line = get_first_line(reversed(self.final))
        prior_indent = get_line_indent(last_line)
        
        definition, returns = last_line.rstrip().endswith(':'), last_line.lstrip().startswith('return')

        this_indent = get_line_indent(get_first_line(block))
            
        if body.strip() and not self.min_indent: self.min_indent = this_indent

        indent = max(self.min_indent, (returns and min or max)(prior_indent, this_indent))
        indent += 4 * (definition and prior_indent == indent)
            
        self.final.extend(textwrap.indent(quote(body), ' '*indent).splitlines() + block) 

        self.buffer.content = ''
        return ''
        
    
    def render(self, token: [str, 'Token']):
        if isinstance(token, mistletoe.block_token.Document): ...
        elif isinstance(token, (mistletoe.block_token.Paragraph, mistletoe.block_token.BlockCode)):    
            lines = []
            for child in token.children:
                for line in map(str.strip, getattr(child, 'content', '').splitlines()):
                    if line:
                        while self.original and (not lines or (line not in lines[-1])):  
                            lines.append(self.original.pop(0).rstrip())
            token = type(token)([])
            token.children = mistletoe.span_token.RawText('\n'.join(lines)),

        return super().render(token)
            


In [78]:
def markdown_to_python(str): 
    with MarkdownPython() as object: return object.render(str)

print(markdown_to_python("""asdfadf
        
        PRINT
        range


"""))

In [79]:
MarkdownPythonRendererContext._original_span_tokens

In [108]:
class MarkdownUserExpressions(MarkdownPythonRendererContext):
    _active_block_tokens = [mistletoe.block_token.List, mistletoe.block_token.Paragraph]
    def render_inline_code(self, token):
        return self.expressions.append(token.children[0].content) or ''
    def render_line_break(self, token):  return  ''
    
    def render_document(self, token):
        self.expressions = []
        if hasattr(token, 'children'): super().render_inner(token)
        return self.expressions
        

In [109]:
class MarkdownPythonUserExpressions(MarkdownPython):
    user_expressions = None
    _active_span_tokens = [
        mistletoe.span_token.InlineCode, mistletoe.span_token.RawText
    ]
    def render_paragraph(self, token): 
        """Paragraphs are staged in the buffer so they can be indented."""
        buffer = ''
        for child in token.children:  
            buffer += child.content + '\n'
        self.buffer.content += buffer
        with MarkdownUserExpressions() as user_expressions:
            self.user_expressions.extend(user_expressions.render(mistletoe.Document(buffer)))
        return super().render_paragraph(token)

    def render_document(self, token):
        return super().render_document(token), self.user_expressions
    def render_inner(self, token):
        return super().render_inner(token) if hasattr(token, 'children') else ''
    
    def render(self, token):
        self.user_expressions = self.user_expressions or []
        return super().render(token)

In [110]:
def markdown_to_python_user_expressions(str): 
    with MarkdownPythonUserExpressions() as object: 
        return object.render(str)

In [111]:
markdown_to_python_user_expressions("""Notes from meeting with Siu

Previous experience compiling UDFs with numba:
Try to create special target for Numba to emit code for cloudera database Impala
https://github.com/cloudera/impyla/tree/udf
Numba generate LLVM ir which cloudera directly consumes
Inline this directly in database
Gets tricky as LLVM versions change
More than four years ago
Approach involves too much internals
Lesson learned: try not to go to deep into internals of Numba
Main goal to be execute 
Numba never considers cross compiling
We could get numba to cross compile, but this is not supported yet
Two options: (both would work according to Siu)
Embedding
It doesn’t ever free the function that is loaded
Because numba types are immortal
Because each type is unique


Emit static shared library
How could we load and unload this?
It is possible to generate regular shared libraries from Numba, that are not python specific
See note below regarding system linker requirement
GPU backend
Functions can be unloaded
Cannot generate shared library for GPU backend
Recommended: shared library approach:
AOT documentation: http://numba.pydata.org/numba-doc/latest/user/pycc.html?highlight=aot
This might do too much, focuses on user interface
We could also manually generate wrappers like we do for numba-xnd
https://github.com/Quansight/numba-xnd/blob/9a914ea76442385ada1940bde963a14faad9dee3/numba_xnd/gumath.py#L158-L199
NOTE: llvm doesn’t have linker. All it can do is generate `.o` files. We have to use system linker to generate actual shared library
IPC approach
Send data from mapd to python over IPC and call it

Follow up  notes regarding IPC approach:
Provides a quick prototype as communication would be in between two Python processes: one embedded in mapd and other in a thread (Ah I didn't realize we would also embed Python in MapD. Do we need Python in MapD now that we can use IPC to move the data to the python process running Numba?
Python in MapD process is not necessary provided that one can use some C++ implementation of IPC. I was just thinking that it would be easier (more flexible) to develop in Python. Getting the UDF sources from client, firing up a Python process, sending the source there, providing the mapd data via IPC (results would be stored in-place to IPC wrapped memory), etc are the tasks for MapD process.
)
When the running Python process dies, the mapd process will not be affected
We could use arrow IPC for data sharing. Might work also when using CUDA.
Should we have one Python-numba process for different UDFs or would there be a separate numba process for each UDF?

*    Isolated and unloadable execution engine of jit code:
    https://github.com/numba/numba/issues/3464
    This feature request assumes numba in mapd process.


> Internal MVP: allow users to do what they can with extension functions at to be loaded at runtime.

With quansight: 


TODO for next two weeks: create diagram

They don’t like IPC process as much
Don’t worry too much about unloading first
We want to be able to run them in parallel on multiple threads
""")

('"""Notes from meeting with Siu\nNotes from meeting with Siu\n\nPrevious experience compiling UDFs with numba:\nTry to create special target for Numba to emit code for cloudera database Impala\nhttps://github.com/cloudera/impyla/tree/udf\nNumba generate LLVM ir which cloudera directly consumes\nInline this directly in database\nGets tricky as LLVM versions change\nMore than four years ago\nApproach involves too much internals\nLesson learned: try not to go to deep into internals of Numba\nMain goal to be execute\nNumba never considers cross compiling\nWe could get numba to cross compile, but this is not supported yet\nTwo options: (both would work according to Siu)\nEmbedding\nIt doesn’t ever free the function that is loaded\nBecause numba types are immortal\nBecause each type is unique\n\nPrevious experience compiling UDFs with numba:\nTry to create special target for Numba to emit code for cloudera database Impala\nhttps://github.com/cloudera/impyla/tree/udf\nNumba generate LLVM ir 

In [112]:
    source, user_expressions = markdown_to_python_user_expressions('testiong `range` asdfkjadf asdfkjhaskdjfh `print`')

In [68]:
    markdown_to_python_user_expressions("""This is all working `print(a)`

        a = 10

    ---""")

[<class 'mistletoe.span_token.EscapeSequence'>, <class 'mistletoe.span_token.Strikethrough'>, <class 'mistletoe.span_token.AutoLink'>, <class 'mistletoe.span_token.CoreTokens'>, <class 'mistletoe.span_token.InlineCode'>, <class 'mistletoe.span_token.RawText'>]


('    """This is all working `print(a)`\n    This is all working `print(a)`"""\n\n    a = 10\n\n    """---\n\n    ---""";',
 ['print(a)'])

In [39]:
    markdown_to_python_user_expressions("""# Hmm what the fuck

    * <https://docs.google.com/document/d/1cejQ1SGD4-dNOzsueUq5Fx_V4gKjEgNTDxr6tC__PVw/edit?usp=sharing>
    * <https://docs.google.com/document/d/1cejQ1SGD4-dNOzsueUq5Fx_V4gKjEgNTDxr6tC__PVw/edit?usp=sharing>""")

('"""# Hmm what the fuck\n# Hmm what the fuck""";', [])