<a href="https://colab.research.google.com/github/engbJapan/Programming/blob/main/Python/Issue/reconstruct_python.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [4]:
%matplotlib inline


# Reconstruct Python

Demonstrates how Lark's experimental text-reconstruction feature can recreate
functional Python code from its parse-tree, using just the correct grammar and
a small formatter.


In [12]:
%pip install lark -qq
from lark import Token, Lark
from lark.reconstruct import Reconstructor
from lark.indenter import PythonIndenter

# Official Python grammar by Lark
python_parser3 = Lark.open_from_package('lark', 'python.lark', ['grammars'],
                                        parser='lalr', postlex=PythonIndenter(), start='file_input',
                                        maybe_placeholders=False    # Necessary for reconstructor
                                        )

SPACE_AFTER = set(',+-*/~@<>="|:')
SPACE_BEFORE = (SPACE_AFTER - set(',:')) | set('\'')


def special(sym):
    return Token('SPECIAL', sym.name)

def postproc(items):
    stack = ['\n']
    actions = []
    last_was_whitespace = True
    for item in items:
        if isinstance(item, Token) and item.type == 'SPECIAL':
            actions.append(item.value)
        else:
            if actions:
                assert actions[0] == '_NEWLINE' and '_NEWLINE' not in actions[1:], actions

                for a in actions[1:]:
                    if a == '_INDENT':
                        stack.append(stack[-1] + ' ' * 4)
                    else:
                        assert a == '_DEDENT'
                        stack.pop()
                actions.clear()
                yield stack[-1]
                last_was_whitespace = True
            if not last_was_whitespace:
                if item[0] in SPACE_BEFORE:
                    yield ' '
            yield item
            last_was_whitespace = item[-1].isspace()
            if not last_was_whitespace:
                if item[-1] in SPACE_AFTER:
                    yield ' '
                    last_was_whitespace = True
    yield "\n"


class PythonReconstructor:
    def __init__(self, parser):
        self._recons = Reconstructor(parser, {'_NEWLINE': special, '_DEDENT': special, '_INDENT': special})

    def reconstruct(self, tree):
        return self._recons.reconstruct(tree, postproc)


def test():
    python_reconstructor = PythonReconstructor(python_parser3)

#    self_contents = open(__file__).read()

    self_contents = open(
        f"os.getcwd()/gram"
        ).read()
    tree = python_parser3.parse(self_contents+'\n')
    output = python_reconstructor.reconstruct(tree)

    tree_new = python_parser3.parse(output)
    print(tree.pretty())
    print(tree_new.pretty())
    # assert tree.pretty() == tree_new.pretty()
    assert tree == tree_new

    print(output)


if __name__ == '__main__':
    test()

FileNotFoundError: ignored

In [8]:
import os

print('getcwd:      ', os.getcwd())
#print('__file__:    ', __file__)

getcwd:       /content


In [18]:
from lark import Lark

grammar = r"""
start: list | dict

list: "[" _seperated{atom, ","} "]"
dict: "{" _seperated{key_value, ","} "}"
key_value: atom ":" atom

_seperated{x, sep}: x (sep x)*  // Define a sequence of 'x sep x sep x ...'

atom: NUMBER | ESCAPED_STRING

%import common (NUMBER, ESCAPED_STRING, WS)
%ignore WS
"""


parser = Lark(grammar)

print(parser.parse('[1, "a", 2]'))
print(parser.parse('{"a": 2, "b": 6}'))

Tree(Token('RULE', 'start'), [Tree(Token('RULE', 'list'), [Tree(Token('RULE', 'atom'), [Token('NUMBER', '1')]), Tree(Token('RULE', 'atom'), [Token('ESCAPED_STRING', '"a"')]), Tree(Token('RULE', 'atom'), [Token('NUMBER', '2')])])])
Tree(Token('RULE', 'start'), [Tree(Token('RULE', 'dict'), [Tree(Token('RULE', 'key_value'), [Tree(Token('RULE', 'atom'), [Token('ESCAPED_STRING', '"a"')]), Tree(Token('RULE', 'atom'), [Token('NUMBER', '2')])]), Tree(Token('RULE', 'key_value'), [Tree(Token('RULE', 'atom'), [Token('ESCAPED_STRING', '"b"')]), Tree(Token('RULE', 'atom'), [Token('NUMBER', '6')])])])])


In [15]:
%%bash
cat <<EOFFF > ./grammar.lark
?start: statement+
 
?statement: function
    | instruction
    | function_call
 
code_block: "{" statement+ "}"
function: "関数" new_symbol "(" ")" code_block
function_call: symbol "(" ")"
instruction: "出力" "(" string ")" -> out
 
string    : ESCAPED_STRING
symbol    : WORD
new_symbol: WORD
 
%import common.ESCAPED_STRING
 
%import common.WORD
 
%import common.WS
 
%ignore WS
EOFFF
cat ./grammar.lark

?start: statement+
 
?statement: function
    | instruction
    | function_call
 
code_block: "{" statement+ "}"
function: "関数" new_symbol "(" ")" code_block
function_call: symbol "(" ")"
instruction: "出力" "(" string ")" -> out
 
string    : ESCAPED_STRING
symbol    : WORD
new_symbol: WORD
 
%import common.ESCAPED_STRING
 
%import common.WORD
 
%import common.WS
 
%ignore WS


In [17]:
# -*- coding: utf-8 -*-
from lark import Lark, Transformer
 
def out(string):
    print(string[0])
 
class Main(Transformer):
    def __init__(self):
        self._functions = {}
 
    def function(self, token):
        self._functions[token[0]] = token[1:]
    
    def function_call(self, token):
        function = self._functions[token[0]]
        for state in function:
            eval(state.data)(state.children)
 
    def code_block(self, tree):
        return tree[0]
 
    def new_symbol(self, token):
        return token[0].value
    
    def symbol(self, token):
        return token[0].value
 
    def string(self, token):
        return token[0][1:-1]
 
text = '''
    関数 main(){
        出力("Hello world")
    }
    main()
    print("dosita?")
    '''
 
# ファイル分けしたので、中身全部読んで変数に入れる
grammar = ""
with open('./grammar.lark', 'r', encoding='utf-8') as a_file:
    grammar = ''.join([line for line in a_file])
 
parser = Lark(grammar, parser='lalr', transformer=Main())
parser.parse(text)

Hello world


UnexpectedToken: ignored