In [1]:
using Pipe
using Match

# Parse

Language syntax rules:
* each syntax rule translates to typed block, i.e. block with type at the beginning:
    - types: 
        - __nl__: new line
        - __dot__: dot syntax
        - __call__: call syntax 
        - __index__: get index syntax
        - __parentheses__: parentheses 
        - __square_brackets__: square brackets
        - __curly_brackets__: curly brackets
* separated symbols:
    - example: 
        ```julia
        a+b -> a + b 
        a: 1 -> a : 1
        ```
* indents
    - example: 
        ```julia
        let fib(n)
            a: 1
            b: 0
            for _ in 0..n
                a, b: b, a + b
            return b
        
        (__nl__ let fib(n)
            (__nl__ a : 1)
            (__nl__ b : 0)
            (__nl__ for _ in 0..n
                (__nl__ a, b: b, a + b))
            (__nl__ return b))
        ```
    - special rule: 
        ```julia
        if pred
              this
           pred2
              that
           else
              and this
        
        # missing indent gets inserted
        
        if pred
          __begin__
            this
          pred2
            that
          else
            and this
        ```
* brackets
    - example: 
        ```julia
        ((1,2),(1,2)) -> ((1 2) (1 2))
        ```
* call:
    - example:
        ```julia
        asdf(...) -> (__call__ asdf ...)
        asdf[...] -> (__index__ asdf ...)
        ```
* dot syntax
    - example:
        ```julia
        foo.bar -> (__dot__ foo bar)
        foo.bar.baz -> (__dot__ (__dot__ foo bar) baz)
        ```
    - example with call:
        ```julia
        foo.bar(...) -1-> foo.(bar ...) -2-> (__dot__ foo (bar ...))
        ```

Special symbols:

In [49]:
TESTS = [
    """
    a:c+d
    :a c+d
    """,
    """
    let fib(n)
        a: 1
        b: 0
        for _ in 0..n
            a, b: b, a+b
        return b
    """,
    """
    if a = 1
         println 3
       a = 2
         println(2)
       else
         println 1
    """,
    """
    __begin__
        a()
        a(b)
        a(b())
        a(b)(c)
        a(b(c))
        a(b(c),d) 
        a.b
        a.b()
        a.b(c)
        a.b.c
        a.b().c
        a.b(c).d
        a[]
        a[b]
        a[b[]]
        a[b[c]]
        a[b[c],d] 
        a.b
        a.b[]
        a.b[c]
        a.b.c
        a.b[].c
        a.b[c].d
        a(b[c])
        (a)(b)
        [a](b)
        (a)[b]
        [a][b]
        (x) 
        (lambda (x) 3)
        a[b](c)
        [[a]]
    """,
]

4-element Vector{String}:
 "a:c+d\n:a c+d\n"
 "let fib(n)\n    a: 1\n    b: 0\n  " ⋯ 19 bytes ⋯ "     a, b: b, a+b\n    return b\n"
 "if a = 1\n     println 3\n   a = 2\n     println(2)\n   else\n     println 1\n"
 "__begin__\n    a()\n    a(b)\n    " ⋯ 305 bytes ⋯ " (x) 3)\n    a[b](c)\n    [[a]]\n"

In [50]:
SEPARATED_SYMBOLS = [
    "+" => "__plus__",
    "-" => "__minus__",
    "*" => "__asterisk__",
    "/" => "__slash__",
    "^" => "__circumflex_accent__", 
    "=" => "__equal_sign__",
    ">" => "__greater_than_sign__",
    "<" => "__less_than_sign__",
    ":" => "__colon__", 
    ";" => "__semicolon__",
   ".." => "__double_dot__",
    "." => "__dot__", 
    "," => "__comma__", 
]

CALLS = [
    ("(", ")", "__call__"),
    ("[", "]", "__index__"),
]

2-element Vector{Tuple{String, String, String}}:
 ("(", ")", "__call__")
 ("[", "]", "__index__")

In [60]:
function to_line_tokens(str)
    lines = split(str, '\n')
    return collect(zip((x -> x isa Nothing ? 1 : x).(findfirst.(!=(' '), lines)), split.(lines)))
end

function get_brackets!(line)
    c = 0
    ret = []
    while !isempty(line)
        if line[1] == "("
            popfirst!(line)
            push!(ret, get_brackets!(line))
        elseif line[1] == ")"
            popfirst!(line)
            break
        else
            push!(ret, popfirst!(line))
        end
    end
    return (ret...,)
end

function get_block!(lines)
    
    h, ts = popfirst!(lines)
    ts = get_brackets!(ts)
    if isempty(lines) || lines[1][1] <= h
        return ("__nl__", ts...) 
    end
        
    # Check if the __begin__ block should be added 
    hmin = lines[1][1]
    i = 1
    while i <= length(lines) &&  h < lines[i][1]
        hmin = min(lines[i][1], hmin)
        i += 1
    end 
    if hmin < lines[1][1]
        pushfirst!(lines, (hmin, [SubString("__begin__")]))
    end
    
    # Add Blocks
    blocks = Any[]
    while !isempty(lines) && lines[1][1] > h
        push!(blocks, get_block!(lines))
    end
    return ("__nl__", ts..., blocks...)

end 

# function parser(str)
#     converge(f, xₙ) = (xₙ₊₁ = f(xₙ); xₙ₊₁ == xₙ ? xₙ₊₁ : converge(f, xₙ₊₁))
#     rs__sepsym__ = r"(\S)([\+\-\*\/\^\@\=\>\<\:\;\,]|(\.\.))" => s"\1 \2 "
#     rs__sepsymbegin__ = r"(^|\s)([\+\-\*\/\^\@\=\>\<\:\;\,])" => s"\1\2 "
#     rs__tuple__  = r"(^|[\(\[\s\.])(?<rec>\((?<n>(?>[^\(\)]+|\g<rec>?)+)\))" => s"\1( __tuple__ \g<n> )"
#     rs__list__   = r"(^|[\(\[\s\.])(?<rec>\[(?<n>(?>[^\[\]]+|\g<rec>?)+)\])" => s"\1( __list__ \g<n> )"
#     rs__call__   = r"(?<l>(?<rec0>\((?>[^\(\)\[\]]+|\g<rec0>|\g<rec1>?)+\))|(?<rec1>\[(?>[^\(\)\[\]]+|\g<rec0>|\g<rec1>?)+\])|[^\(\)\[\]\s\.]+)(?<rec2>\((?<j>(?>[^\(\)]+|\g<rec2>?)+)\))" => s"( __call__ \g<l> \g<j> )"
#     rs__index__  = r"(?<l>(?<rec0>\((?>[^\(\)\[\]]+|\g<rec0>|\g<rec1>?)+\))|(?<rec1>\[(?>[^\(\)\[\]]+|\g<rec0>|\g<rec1>?)+\])|[^\(\)\[\]\s\.]+)(?<rec2>\[(?<j>(?>[^\[\]]+|\g<rec2>?)+)\])" => s"( __index__ \g<l> \g<j> )"
#     rs__dot__ = r"(?<l>(?<rec>\((?>[^\(\)]+|\g<rec>?)+\))|[^\(\)\s\.]+)\.(?<r>\g<rec>|[^\(\)\s\.]+)" => s"( __dot__ \g<l> \g<r> )"
#     return @pipe str |> 
#         replace(_, rs__sepsymbegin__) |>
#         replace(_, rs__sepsym__) |>
#         replace(_, rs__tuple__) |> 
#         replace(_, rs__list__) |> 
#         converge(t -> replace(t, rs__index__), _) |>
#         converge(t -> replace(t,  rs__call__), _) |> 
#         converge(t -> replace(t,   rs__dot__), _) |> 
#         to_line_tokens |> 
#         get_block!
# end

function parser(str)
    converge(f, xₙ) = (xₙ₊₁ = f(xₙ); xₙ₊₁ == xₙ ? xₙ₊₁ : converge(f, xₙ₊₁))
    rs__sepsym__ = r"(\S)([\+\-\*\/\^\@\=\>\<\:\;\,]|(\.\.))" => s"\1 \2 "
    rs__sepsymbegin__ = r"(^|\s)([\+\-\*\/\^\@\=\>\<\:\;\,])" => s"\1\2 "
    rs__tuple__  = r"(^|[\(\[\s\.])(?<rec>\((?!\s__tuple__)(?<n>(?>[^\(\)]+|\g<rec>?)+)\))" => s"\1( __tuple__ \g<n> )"
    rs__list__   = r"(^|[\(\[\s\.])(?<rec>\[(?<n>(?>[^\[\]]+|\g<rec>?)+)\])" => s"\1( __list__ \g<n> )"
    rs__call__   = r"(?<l>(?<rec0>\((?>[^\(\)\[\]]+|\g<rec0>|\g<rec1>?)+\))|(?<rec1>\[(?>[^\(\)\[\]]+|\g<rec0>|\g<rec1>?)+\])|[^\(\)\[\]\s\.]+)(?<rec2>\((?<j>(?>[^\(\)]+|\g<rec2>?)+)\))" => s"( __call__ \g<l> \g<j> )"
    rs__index__  = r"(?<l>(?<rec0>\((?>[^\(\)\[\]]+|\g<rec0>|\g<rec1>?)+\))|(?<rec1>\[(?>[^\(\)\[\]]+|\g<rec0>|\g<rec1>?)+\])|[^\(\)\[\]\s\.]+)(?<rec2>\[(?<j>(?>[^\[\]]+|\g<rec2>?)+)\])" => s"( __index__ \g<l> \g<j> )"
    rs__dot__ = r"(?<l>(?<rec>\((?>[^\(\)]+|\g<rec>?)+\))|[^\(\)\s\.]+)\.(?<r>\g<rec>|[^\(\)\s\.]+)" => s"( __dot__ \g<l> \g<r> )"
    return @pipe str |> 
        replace(_, rs__sepsymbegin__) |>
        replace(_, rs__sepsym__) |>
        converge(t -> replace(t, rs__tuple__), _) |> 
        converge(t -> replace(t, rs__list__), _) |> 
        converge(t -> replace(t, rs__index__), _) |>
        converge(t -> replace(t,  rs__call__), _) |> 
        converge(t -> replace(t,   rs__dot__), _) |> 
        to_line_tokens |> 
        get_block!
end

# function parser(str)
#     converge(f, xₙ) = (xₙ₊₁ = f(xₙ); xₙ₊₁ == xₙ ? xₙ₊₁ : converge(f, xₙ₊₁))
#     rs__sepsym__ = r"(\S)([\+\-\*\/\^\@\=\>\<\:\;\,]|(\.\.))" => s"\1 \2 "
#     rs__sepsymbegin__ = r"(^|\s)([\+\-\*\/\^\@\=\>\<\:\;\,])" => s"\1\2 "
#     rs__tuple__  = r"(^|[\(\[\s\.])(?<rec>\((?<n>(?>[^\(\)]+|\g<rec>?)+)\))" => s"\1__bra__ __tuple__ \g<n> __ket__"
#     rs__list__   = r"(^|[\(\[\s\.])(?<rec>\[(?<n>(?>[^\[\]]+|\g<rec>?)+)\])" => s"\1__bra__ __list__ \g<n> __ket__"
#     rs__call__   = r"(?<l>(?<rec0>\((?>[^\(\)\[\]]+|\g<rec0>|\g<rec1>?)+\))|(?<rec1>\[(?>[^\(\)\[\]]+|\g<rec0>|\g<rec1>?)+\])|[^\(\)\[\]\s\.]+)(?<rec2>\((?<j>(?>[^\(\)]+|\g<rec2>?)+)\))" => s"__bra__ __call__ \g<l> \g<j> __ket__"
#     rs__index__  = r"(?<l>(?<rec0>\((?>[^\(\)\[\]]+|\g<rec0>|\g<rec1>?)+\))|(?<rec1>\[(?>[^\(\)\[\]]+|\g<rec0>|\g<rec1>?)+\])|[^\(\)\[\]\s\.]+)(?<rec2>\[(?<j>(?>[^\[\]]+|\g<rec2>?)+)\])" => s"__bra__ __index__ \g<l> \g<j> __ket__"
#     rs__dot__ = r"(?<l>(?<rec>\((?>[^\(\)]+|\g<rec>?)+\))|[^\(\)\s\.]+)\.(?<r>\g<rec>|[^\(\)\s\.]+)" => s"__bra__ __dot__ \g<l> \g<r> __ket__"
#     return @pipe str |> 
#         replace(_, rs__sepsymbegin__) |>
#         replace(_, rs__sepsym__) |>
#         converge(t -> replace(t, rs__tuple__), _) |> 
#         converge(t -> replace(t,  rs__list__), _) |> 
#         converge(t -> replace(t, rs__index__), _) |>
#         converge(t -> replace(t,  rs__call__), _) |> 
#         converge(t -> replace(t,   rs__dot__), _) |> 
#         to_line_tokens |> 
#         get_block!
# end

function pretty_str(ast::Union{String, SubString}, height=0, indent=3)
    return ast == "__bra__" ? "(" : ast == "__ket__" ? ")" : ast
end

function pretty_str(ast, height=0, indent=3)
    # println(ast)
    return @match ast[1] begin
        "__nl__" => '\n' * ' '^height * join(pretty_str.(ast[2:end], height+indent, indent), ' ')
        "__call__"  => pretty_str(ast[2], height, indent) * "(" * join(pretty_str.(ast[3:end], height, indent), ' ') * ")"
        "__index__" => pretty_str(ast[2], height, indent) * "[" * join(pretty_str.(ast[3:end], height, indent), ' ') * "]"
        "__tuple__" => "(" * join(pretty_str.(ast[2:end], height, indent), ' ') * ")"
        "__list__" => "[" * join(pretty_str.(ast[2:end], height, indent), ' ') * "]"
        "__dot__" => pretty_str(ast[2], height, indent) * '.' * pretty_str(ast[3], height, indent)
    end
end
function pretty_print(ast, height=0, indent=3)
    print(pretty_str(ast, height, indent))
end

pretty_print (generic function with 3 methods)

In [61]:
parser(TESTS[4])

("__nl__", "__begin__", ("__nl__", ("__call__", "a")), ("__nl__", ("__call__", "a", "b")), ("__nl__", ("__call__", "a", ("__call__", "b"))), ("__nl__", ("__call__", ("__call__", "a", "b"), "c")), ("__nl__", ("__call__", "a", ("__call__", "b", "c"))), ("__nl__", ("__call__", "a", ("__call__", "b", "c"), ",", "d")), ("__nl__", ("__dot__", "a", "b")), ("__nl__", ("__dot__", "a", ("__call__", "b"))), ("__nl__", ("__dot__", "a", ("__call__", "b", "c"))), ("__nl__", ("__dot__", ("__dot__", "a", "b"), "c")), ("__nl__", ("__dot__", ("__dot__", "a", ("__call__", "b")), "c")), ("__nl__", ("__dot__", ("__dot__", "a", ("__call__", "b", "c")), "d")), ("__nl__", ("__index__", "a")), ("__nl__", ("__index__", "a", "b")), ("__nl__", ("__index__", "a", ("__index__", "b"))), ("__nl__", ("__index__", "a", ("__index__", "b", "c"))), ("__nl__", ("__index__", "a", ("__index__", "b", "c"), ",", "d")), ("__nl__", ("__dot__", "a", "b")), ("__nl__", ("__dot__", "a", ("__index__", "b"))), ("__nl__", ("__dot__", "

In [62]:
pretty_print(parser(TESTS[4]))


__begin__ 
   a() 
   a(b) 
   a(b()) 
   a(b)(c) 
   a(b(c)) 
   a(b(c) , d) 
   a.b 
   a.b() 
   a.b(c) 
   a.b.c 
   a.b().c 
   a.b(c).d 
   a[] 
   a[b] 
   a[b[]] 
   a[b[c]] 
   a[b[c] , d] 
   a.b 
   a.b[] 
   a.b[c] 
   a.b.c 
   a.b[].c 
   a.b[c].d 
   a(b[c]) 
   (a)(b) 
   [a](b) 
   (a)[b] 
   [a][b] 
   (x) 
   (lambda (x) 3) 
   a[b](c) 
   [[a]]

In [26]:
print(TESTS[3])

if a = 1
     println 3
   a = 2
     println(2)
   else
     println 1


Functional:

```lisp
> (define (fib n)
>     (let L ((a 1) (b 0) (c n))
>         (if (= c 0)
>              b
>              (L b (+ a b) (- c 1)))))
#<unspecified>
> (fib 3)
2
> (map fib (list 1 2 3 4 5 6 7 8 9))
(1 1 2 3 5 8 13 21 34)
```

In our notation:

```julia
function fib(n)
    loop rec(a: 1, b: 0, c: n)
        b if c = 0 else rec(b, a+b, c-1)
    
function fib(n)
    rec(a, b, c): b if c = 0 else rec(b, a+b, c-1)
    return rec(1, 0, n)


fib(3)
2
map(fib, [1 2 3 4 5 6 7 8 9]) # [1 1 2 3 5 8 13 21 34]

```

```julia
function fib(n)
    rec(a, b, c): if c = 0 then b else rec(b, a+b, c-1)
    return rec(1, 0, n)
```

```julia
function fib(n)
    return loop rec(a: 1, b: 0, c: n)
        if c = 0 then b else rec(b, a+b, c-1)
```

# Parser 2

In [53]:
r_bracket = r"(?<rec>\((?<n>(?>([^\(\)]|\g<rec>)*))\))"
r_word = r"\w+"

r"\w+"

In [54]:
replace("( asd asd( foo, bar() ) fasdf ).adsf(    )", r_bracket => s"[ \g<n> ]", count=1)

"[  asd asd( foo, bar() ) fasdf  ].adsf(    )"

In [55]:
rs_dot = r"([^\.]+)\.([^\.]+)" => s"( __dot__ \1 \2 )"

r"([^\.]+)\.([^\.]+)" => s"( __dot__ \1 \2 )"

In [56]:
replace(replace("foo.bar.baz", rs_dot), rs_dot)

"( __dot__ ( __dot__ foo bar ) baz )"

In [None]:
function parser(str)
    converge(f, xₙ) = (xₙ₊₁ = f(xₙ); xₙ₊₁ == xₙ ? xₙ₊₁ : converge(f, xₙ₊₁))
    rs__sepsym__ = r"(\S)([\+\-\*\/\^\@\=\>\<\:\;\,]|(\.\.))" => s"\1 \2 "
    rs__sepsymbegin__ = r"(^|\s)([\+\-\*\/\^\@\=\>\<\:\;\,])" => s"\1\2 "
    rs__tuple__  = r"(^|[\(\[\s\.])(?<rec>\((?!\s__tuple__)(?<n>(?>[^\(\)]+|\g<rec>?)+)\))" => s"\1( __tuple__ \g<n> )"
    rs__list__   = r"(^|[\(\[\s\.])(?<rec>\[(?<n>(?>[^\[\]]+|\g<rec>?)+)\])" => s"\1( __list__ \g<n> )"
    rs__call__   = r"(?<l>(?<rec0>\((?>[^\(\)\[\]]+|\g<rec0>|\g<rec1>?)+\))|(?<rec1>\[(?>[^\(\)\[\]]+|\g<rec0>|\g<rec1>?)+\])|[^\(\)\[\]\s\.]+)(?<rec2>\((?<j>(?>[^\(\)]+|\g<rec2>?)+)\))" => s"( __call__ \g<l> \g<j> )"
    rs__index__  = r"(?<l>(?<rec0>\((?>[^\(\)\[\]]+|\g<rec0>|\g<rec1>?)+\))|(?<rec1>\[(?>[^\(\)\[\]]+|\g<rec0>|\g<rec1>?)+\])|[^\(\)\[\]\s\.]+)(?<rec2>\[(?<j>(?>[^\[\]]+|\g<rec2>?)+)\])" => s"( __index__ \g<l> \g<j> )"
    rs__dot__ = r"(?<l>(?<rec>\((?>[^\(\)]+|\g<rec>?)+\))|[^\(\)\s\.]+)\.(?<r>\g<rec>|[^\(\)\s\.]+)" => s"( __dot__ \g<l> \g<r> )"
    return @pipe str |> 
        replace(_, rs__sepsymbegin__) |>
        replace(_, rs__sepsym__) |>
        converge(t -> replace(t, rs__tuple__), _) |> 
        converge(t -> replace(t, rs__list__), _) |> 
        converge(t -> replace(t, rs__index__), _) |>
        converge(t -> replace(t,  rs__call__), _) |> 
        converge(t -> replace(t,   rs__dot__), _) |> 
        to_line_tokens |> 
        get_block!
end