`tangle` provides functions for converting markdown to executable source in a semi-lossy manner.

`quote` wrotes non code objects in triple ticks.

In [1]:
import ast, mistletoe, textwrap, functools, itertools

In [2]:
def quote(str, punc=''):
    str, leading_ws = ''.join(str), []
    lines = str.splitlines(True)
    _ = '"""'
    if _ in str: _ = "'''"
    if not str.strip(): _ = punc = ''
    while lines and not lines[0]: leading_ws.append(lines.pop())
    str = ''.join(lines)
    end = len(str.rstrip())
    str, ending_ws = str[:end], str[end:]
    if str and str.endswith(_[0]): str += ' '                    
    return F"{''.join(leading_ws)}{_}{str}{_}{punc}{ending_ws}"

`get_first_line` get the first non-`iter`able strings in `lines`

In [3]:
def get_first_line(lines, line=''):
    for line in lines or ['']: 
        if line.strip(): break
    return line

`get_line_indent` computes the indent of a string.

In [4]:
def get_line_indent(line):  return len(line) - len(line.lstrip())

In [69]:
def tokenize(str)->mistletoe.Document:
    doc = mistletoe.Document('')
    doc.children = mistletoe.block_tokenizer.tokenize(str.splitlines(), [
        mistletoe.block_token.BlockCode, List, Paragraph
    ])
    (
        doc.children and isinstance(doc.children[-1], mistletoe.block_token.BlockCode)
    ) or doc.children.append(EmptyBlockCode)        
    return doc

In [71]:
class List(mistletoe.block_token.List):
    def __init__(self, matches):
        self.children = []
        for match, indent, list_token in matches:
            for token, args in match:
                token = {
                    mistletoe.block_token.Paragraph: Paragraph,
                    mistletoe.block_token.List: List}.get(token, token)
                self.children.append(token(args))

`EmptyBlockCode` is an empty `mistletoe.block_token.BlockCode` object that is used to acts an identifier during Markdown parsing.

In [72]:
EmptyBlockCode = mistletoe.block_token.BlockCode('')
EmptyBlockCode.children = [mistletoe.span_token.RawText('')]

`render_string_as_document` decorates a `mistletoe.base_renderer` attributes to accept pure strings as input.  It adds an `EmptyBlockCode` if the last cell is a `Paragraph`.

Our markdown renderer appends the final source on block codes.  Ending in block code assures trying parapgraphs are captured.

In [73]:
def render_string_as_document(callable):
    @functools.wraps(callable)
    def caller(self, token):
        if isinstance(token, str):
            self.original = list(map(str.rstrip, ''.join(token).splitlines()))
            self.final, self.buffer, self.min_indent = [], mistletoe.span_token.RawText(''), 0
            self.expressions = []
            token = tokenize(token)
        return callable(self, token)
    return caller

In [74]:
class Paragraph(mistletoe.block_token.Paragraph):
    def __init__(self, lines):
        self.children = [mistletoe.span_token.RawText(''.join([line.lstrip() for line in lines]).strip())]


In [75]:
class InlineUserExpressions(mistletoe.base_renderer.BaseRenderer):
    def render_inline_code(self, token):
        return self.expressions.append(token.children[0].content) or ''
    
    def render_line_break(self, token):  return  ''
    render_thematic_break =  render_line_break
    
    def render_document(self, token):
        self.expressions = []
        if hasattr(token, 'children'): super().render_inner(token)
        return self.expressions

In [76]:
def inline_user_expressions(str): 
    with InlineUserExpressions() as object:  return object.render(mistletoe.Document(str.splitlines()))

In [77]:
class MarkdownPython(mistletoe.base_renderer.BaseRenderer):
    def render_document(self, token, punctation = ''): 
        [self.render(child) for child in token.children if hasattr(child, 'children')]
        source = '\n'.join(self.final)
        if token.children[-1] == EmptyBlockCode:
            # IPython use the magic function if the string is not stripped
            source = source.rstrip() +';'
        return source
    
    def render_paragraph(self, token): 
        """Paragraphs are staged in the buffer so they can be indented."""
        for child in token.children:  
            self.buffer.content += child.content + '\n'
        return ''
        
    def render_block_code(self, token):
        buffer = self.buffer.content.splitlines()
        while buffer and not buffer[0]:   
            self.final.append(buffer.pop(0))
        body = '\n'.join(buffer)

        block = token.children[0].content.splitlines()
        last_line = get_first_line(reversed(self.final))
        prior_indent = get_line_indent(last_line)
        
        definition, returns = last_line.rstrip().endswith(':'), last_line.lstrip().startswith('return')

        this_indent = get_line_indent(get_first_line(block))
            
        if body.strip() and not self.min_indent: self.min_indent = this_indent

        indent = max(self.min_indent, (returns and min or max)(prior_indent, this_indent))
        indent += 4 * (definition and prior_indent == indent)
        self.final.extend(textwrap.indent(quote(body), ' '*indent).splitlines() + block) 

        self.buffer.content = ''
        return ''
        
    
    @render_string_as_document
    def render(self, token: [str, 'Token']):
        if isinstance(token, mistletoe.block_token.Document): ...
        elif isinstance(token, (mistletoe.block_token.Paragraph, mistletoe.block_token.BlockCode)):    
            lines = []
            for child in token.children:
                for line in map(str.strip, getattr(child, 'content', '').splitlines()):
                    if line:
                        while self.original and (
                            not lines or (line not in lines[-1]) or (line in lines[-1])
                        ):  
                            lines.append(self.original.pop(0).rstrip())
                            if (line in lines[-1]): break
            token = type(token)([])
            token.children = mistletoe.span_token.RawText('\n'.join(lines)),
        return super().render(token)
            


In [78]:
def markdown_to_python(str): 
    with MarkdownPython() as object: return object.render(str)

In [79]:
class MarkdownPythonExpressions(MarkdownPython):
    def render_block_code(self, token):
        if self.buffer.content.strip():
            self.expressions += inline_user_expressions(self.buffer.content)
        return super().render_block_code(token)

In [80]:
def markdown_to_python_user_expressions(str): 
    with MarkdownPythonExpressions() as object: 
        return object.render(str), object.expressions

In [81]:
    def test_edge_case():
        markdown_to_python("""    vega_transforms = "https://vega.github.io/vega-lite/docs/transform.html"
            vega_transforms""")

In [82]:
    def test_exp():
        source, user_expressions = markdown_to_python_user_expressions('testiong `range` asdfkjadf asdfkjhaskdjfh `print`')
        assert user_expressions

In [85]:
    if 'pytest' in __import__('sys').modules:
        import hypothesis.strategies
        @hypothesis.given(hypothesis.strategies.text())
        def test_random_strings(str): 
            markdown_to_python_user_expressions(str)    