In [74]:
using Pipe

LoadError: importing Pipe into Main conflicts with an existing identifier

# Parse

Language syntax rules:
* each syntax rule translates to typed block, i.e. block with type at the beginning:
    - types: 
        - __nl__: new line
        - __dot__: dot syntax
        - __call__: call syntax 
        - __index__: get index syntax
        - __parentheses__: parentheses 
        - __square_brackets__: square brackets
        - __curly_brackets__: curly brackets
* separated symbols:
    - example: 
        ```julia
        a+b -> a + b 
        a: 1 -> a : 1
        ```
* indents
    - example: 
        ```julia
        let fib(n)
            a: 1
            b: 0
            for _ in 0..n
                a, b: b, a + b
            return b
        
        (__nl__ let fib(n)
            (__nl__ a : 1)
            (__nl__ b : 0)
            (__nl__ for _ in 0..n
                (__nl__ a, b: b, a + b))
            (__nl__ return b))
        ```
    - special rule: 
        ```julia
        if pred
              this
           pred2
              that
           else
              and this
        
        # missing indent gets inserted
        
        if pred
          __begin__
            this
          pred2
            that
          else
            and this
        ```
* brackets
    - example: 
        ```julia
        ((1,2),(1,2)) -> ((1 2) (1 2))
        ```
* call:
    - example:
        ```julia
        asdf(...) -> (__call__ asdf ...)
        asdf[...] -> (__index__ asdf ...)
        ```
* dot syntax
    - example:
        ```julia
        foo.bar -> (__dot__ foo bar)
        foo.bar.baz -> (__dot__ (__dot__ foo bar) baz)
        ```
    - example with call:
        ```julia
        foo.bar(...) -1-> foo.(bar ...) -2-> (__dot__ foo (bar ...))
        ```

Special symbols:

In [107]:
TESTS = [
    """
    a: c+d
    : a c+d
    """,
    """
    let fib(n)
        a: 1
        b: 0
        for _ in 0..n
            a, b: b, a+b
        return b
    """,
    """
    if a = 1
         println 3
       a = 2
         println 2
       else
         println 1
    """,
    """
    a()
    a(b)
    a(b())
    a(b)(c)
    a(b(c))
    a(b(c),d) 
    
    a.b
    a.b()
    a.b(c)
    a.b.c
    a.b().c
    a.b(c).d
    
    a[]
    a[b]
    a[b[]]
    a[b[c]]
    a[b[c],d] 
    a.b
    a.b[]
    a.b[c]
    a.b.c
    a.b[].c
    a.b[c].d
    
    a(b[c])
    (a)(b)
    [a](b)
    (a)[b]
    [a][b]
    """,
]

4-element Vector{String}:
 "a: c+d\n: a c+d\n"
 "let fib(n)\n    a: 1\n    b: 0\n  " ⋯ 19 bytes ⋯ "     a, b: b, a+b\n    return b\n"
 "if a = 1\n     println 3\n   a = 2\n     println 2\n   else\n     println 1\n"
 "a()\na(b)\na(b())\na(b)(c)\na(b(c))" ⋯ 136 bytes ⋯ ")\n(a)(b)\n[a](b)\n(a)[b]\n[a][b]\n"

In [108]:
SEPARATED_SYMBOLS = [
    "+" => "__plus__",
    "-" => "__minus__",
    "*" => "__asterisk__",
    "/" => "__slash__",
    "^" => "__circumflex_accent__", 
    "=" => "__equal_sign__",
    ">" => "__greater-than_sign__",
    "<" => "__less-than_sign__",
    ":" => "__colon__", 
    ";" => "__semicolon__",
   ".." => "__double_dot__",
    "." => "__dot__", 
    "," => "__comma__", 
]

CALLS = [
    ("(", ")", "__call__"),
    ("[", "]", "__index__"),
]

2-element Vector{Tuple{String, String, String}}:
 ("(", ")", "__call__")
 ("[", "]", "__index__")

In [109]:
function separate_symbols(str, SEPARATED_SYMBOLS=SEPARATED_SYMBOLS)
    for (k,v) in SEPARATED_SYMBOLS
        str = replace(str, Regex("(\\S)\\$k") => SubstitutionString("\\1 $v "))
        str = replace(str, Regex("(\\s)\\$k") => SubstitutionString("\\1$v "))
    end
    return str
end

function calls_indexes(str, CALLS=CALLS)
    for (k,t,v) in CALLS
        str = replace(str, k => "$k ")
        str = replace(str, Regex("(\\S+)\\$k") => SubstitutionString("( $v \\1"))
        str = replace(str, Regex("\\$t") => " ) ")
    end
    return str
end 

calls_indexes (generic function with 2 methods)

In [110]:
print(separate_symbols(TESTS[1]))

a __colon__  c __plus__ d
__colon__  a c __plus__ d


In [111]:
test = TESTS[4]

"a()\na(b)\na(b())\na(b)(c)\na(b(c))\na(b(c),d) \n\na.b\na.b()\na.b(c)\na.b.c\na.b().c\na.b(c).d\n\na[]\na[b]\na[b[]]\na[b[c]]\na[b[c],d] \na.b\na.b[]\na.b[c]\na.b.c\na.b[].c\na.b[c].d\n\na(b[c])\n(a)(b)\n[a](b)\n(a)[b]\n[a][b]\n"

In [129]:
converge(f, xₙ) = (xₙ₊₁ = f(xₙ); xₙ₊₁ == xₙ ? xₙ₊₁ : converge(f, xₙ₊₁))

# rs__lb__ = r"([\(\[])" => s"\1 "
rs__sepsym__ = r"(\S)([\+\-\*\/\^\@\=\>\<\:\;\,])" => s"\1 \2 "
rs__sepsymbegin__ = r"(\s)([\+\-\*\/\^\@\=\>\<\:\;\,])" => s"\1\2 "
rs__call__   = r"(?<l>(?<rec0>\((?>[^\(\)\[\]]+|\g<rec0>|\g<rec1>?)+\))|(?<rec1>\[(?>[^\(\)\[\]]+|\g<rec0>|\g<rec1>?)+\])|[^\(\)\[\]\s\.\,]+)(?<rec2>\((?<j>(?>[^\(\)]+|\g<rec2>?)+)\))" => s"( __call__ \g<l> \g<j> )"
rs__index__  = r"(?<l>(?<rec0>\((?>[^\(\)\[\]]+|\g<rec0>|\g<rec1>?)+\))|(?<rec1>\[(?>[^\(\)\[\]]+|\g<rec0>|\g<rec1>?)+\])|[^\(\)\[\]\s\.\,]+)(?<rec2>\[(?<j>(?>[^\[\]]+|\g<rec2>?)+)\])" => s"( __index__ \g<l> \g<j> )"
rs__dot__ = r"(?<l>(?<rec>\((?>[^\(\)]+|\g<rec>?)+\))|[^\(\)\s\.\,]+)\.(?<r>\g<rec>|[^\(\)\s\.\,]+)" => s"( __dot__ \g<l> \g<r> )"
println(test)
println("-"^100, '\n')
@pipe test |> 
    # replace(_, rs__lb__) |>
    replace(_, rs__sepsymbegin__) |>
    replace(_, rs__sepsym__) |>
    converge(t -> replace(t, rs__index__), _) |>
    converge(t -> replace(t, rs__call__), _) |> 
    converge(t -> replace(t,  rs__dot__), _) |> 
    print

a()
a(b)
a(b())
a(b)(c)
a(b(c))
a(b(c),d) 

a.b
a.b()
a.b(c)
a.b.c
a.b().c
a.b(c).d

a[]
a[b]
a[b[]]
a[b[c]]
a[b[c],d] 
a.b
a.b[]
a.b[c]
a.b.c
a.b[].c
a.b[c].d

a(b[c])
(a)(b)
[a](b)
(a)[b]
[a][b]

----------------------------------------------------------------------------------------------------

( __call__ a  )
( __call__ a b )
( __call__ a ( __call__ b  ) )
( __call__ ( __call__ a b ) c )
( __call__ a ( __call__ b c ) )
( __call__ a ( __call__ b c ) , d ) 

( __dot__ a b )
( __dot__ a ( __call__ b  ) )
( __dot__ a ( __call__ b c ) )
( __dot__ ( __dot__ a b ) c )
( __dot__ ( __dot__ a ( __call__ b  ) ) c )
( __dot__ ( __dot__ a ( __call__ b c ) ) d )

( __index__ a  )
( __index__ a b )
( __index__ a ( __index__ b  ) )
( __index__ a ( __index__ b c ) )
( __index__ a ( __index__ b c ) , d ) 
( __dot__ a b )
( __dot__ a ( __index__ b  ) )
( __dot__ a ( __index__ b c ) )
( __dot__ ( __dot__ a b ) c )
( __dot__ ( __dot__ a ( __index__ b  ) ) c )
( __dot__ ( __dot__ a ( __index__ b c ) ) 