Skip to content

Commit

Permalink
transform with iter + more regexp
Browse files Browse the repository at this point in the history
  • Loading branch information
andrewcooke committed Mar 4, 2016
1 parent c4fe807 commit 840b415
Show file tree
Hide file tree
Showing 9 changed files with 96 additions and 51 deletions.
2 changes: 1 addition & 1 deletion REQUIRE
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
julia 0.3
Compat 0.7.6
Compat 0.7.12
AutoHashEquals 0.0.8

2 changes: 1 addition & 1 deletion src/ParserCombinator.jl
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ Epsilon, Insert, Dot, Fail, Drop, Equal,
Repeat, Depth, Breadth, Depth!, Breadth!, ALL,
Series, Seq, And, Seq!, And!, Alt, Alt!, Lookahead, Not, Pattern, Delayed, Eos,
ParserError, Error,
Transform, App, Appl,
Transform, App, Appl, ITransform, IApp, IAppl,
@p_str, @P_str, @e_str, @E_str, Opt, Opt!,
Parse, PUInt, PUInt8, PUInt16, PUInt32, PUInt64,
PInt, PInt8, PInt16, PInt32, PInt64, PFloat32, PFloat64,
Expand Down
2 changes: 0 additions & 2 deletions src/core/matchers.jl
Original file line number Diff line number Diff line change
@@ -1,7 +1,5 @@


# TODO - why aren't State instances immutable?

# some basic definitions for generic matches

execute(k::Config, m, s, i) = error("$m did not expect to be called with state $s")
Expand Down
28 changes: 26 additions & 2 deletions src/core/transforms.jl
Original file line number Diff line number Diff line change
Expand Up @@ -21,9 +21,27 @@ always_print(::Transform) = false
success(k::Config, m::Transform, s, t, i, r::Value) = Success(TransformState(t), i, m.f(r))


# as above, but function also takes iterator

# simplified version for transforming Success (remove and re-add the Success
# wrapper).
@auto_hash_equals type ITransform<:Delegate
name::Symbol
matcher::Matcher
f::Function
ITransform(matcher, f) = new(:ITransform, matcher, f)
end

@auto_hash_equals immutable ITransformState<:DelegateState
state::State
end

always_print(::ITransform) = false

success(k::Config, m::ITransform, s, t, i, r::Value) = Success(ITransformState(t), i, m.f(i, r))



# simplified versions for transforming Success (remove and re-add the
# Success wrapper).

Appl(m::Matcher, f::Applicable) = Transform(m, x -> Any[f(x)])

Expand All @@ -34,3 +52,9 @@ function App(m::Matcher, f::Applicable)
Transform(m, x -> Any[f(x...)])
end
end

IAppl(m::Matcher, f::Applicable) = ITransform(m, (i, x) -> Any[f(i, x)])

function IApp(m::Matcher, f::Applicable)
ITransform(m, (i, x) -> Any[f(i, x...)])
end
59 changes: 32 additions & 27 deletions src/core/types.jl
Original file line number Diff line number Diff line change
Expand Up @@ -22,33 +22,38 @@ abstract Config{S,I}

# important notes on mutability / hash / equality

# 1 - immutable types in julia are not "just" immutable. they are effectively
# values - they are passed by value. so do not use "immutable" just because the data should not change. think about details.

# 2 - immutable types have an automatic equality and hash based on content
# (which is copied). mutable types have an automatic equality and hash based
# on address. so default hash and equality for immutable types that contain
# mutable types, and for mutable types, may not be what is required.

# 3 - caching within the parser REQUIRES that bpth Matcher and State instances
# have 'useful' equality and hash values.

# 4 - for matchers, which are created when the grammar is defined, and then
# unchanged, the default hash and equality are likely just fine, even for
# mutable objects (in fact, mutable may be slightly faster since equality is
# just a comparison of an Int64 address, presumably).

# 5 - for states, which often includes mutable result objects, more care is
# needed:

# 5a - whether or not State instances are mutable or immutable, they, and
# their contents, must not change during matching. so all arrays, for
# example, must be copied when new instances are created with different
# values.

# 5b - structurally identical states must be equal, and hash equally. this is
# critical for efficienct caching. so it it likely that custom hash and
# equality methods will be needed (see above and auto.jl).
# 1 - immutable types in julia are not "just" immutable. they are
# effectively values - they are passed by value. so do not use
# "immutable" just because the data should not change. think about
# details.

# 2 - immutable types have an automatic equality and hash based on
# content (which is copied). mutable types have an automatic equality
# and hash based on address. so default hash and equality for
# immutable types that contain mutable types, and for mutable types,
# may not be what is required.

# 3 - caching within the parser REQUIRES that bpth Matcher and State
# instances have 'useful' equality and hash values.

# 4 - for matchers, which are created when the grammar is defined, and
# then unchanged, the default hash and equality are likely just fine,
# even for mutable objects (in fact, mutable may be slightly faster
# since equality is just a comparison of an Int64 address,
# presumably).

# 5 - for states, which often includes mutable result objects, more
# care is needed:

# 5a - whether or not State instances are mutable or immutable, they,
# and their contents, must not change during matching. so all arrays,
# for example, must be copied when new instances are created with
# different values.

# 5b - structurally identical states must be equal, and hash equally.
# this is critical for efficienct caching. so it it likely that
# custom hash and equality methods will be needed (see above and
# auto.jl).


# defaults for mismatching types and types with no content
Expand Down
32 changes: 24 additions & 8 deletions src/regex/Regex.jl
Original file line number Diff line number Diff line change
Expand Up @@ -39,10 +39,24 @@ end

function make_pattern()

group_count = 0
function make_group(pattern)
group_count += 1
Group(group_count, pattern)
# group numbering is tricky - we need to number fomr the left and
# avoid repetitions on backtracking. so push entries (keyed by
# iter) to a stack on entering the group, pop on leaving.
group_count = Dict{Any, Int}()
group_stack = []
group_popped = Dict{Any, Int}()
function pre_group(i, p)
if !haskey(group_count, i)
group_count[i] = length(group_count) + 1
push!(group_stack, group_count[i])
end
p
end
function post_group(i, p)
if !haskey(group_popped, i)
group_popped[i] = pop!(group_stack)
end
Group(group_popped[i], p)
end

make_sequence(p) = length(p) == 1 ? p[1] : Sequence(p)
Expand All @@ -51,22 +65,24 @@ function make_pattern()

literal = p"[^[\].*+\\|(){}?]" > Literal
any = E"." > Dot

outseq = Delayed()

atom = literal | any | outseq
plus = atom + E"+" > make_rpt(1)
star = atom + E"*" > make_rpt(0)
opt = atom + E"?" > make_rpt(0, 1)
once = atom + !(E"*"|E"+"|E"?")

inseq = Plus(plus | star | opt | once) |> make_sequence
choice = PlusList(inseq, E"|") |> make_choice
gchoice = E"(" + !(e"?") + choice + E")" > make_group

open = ITransform(E"("+ !(e"?") , pre_group)
gchoice = IApp(open + choice + E")", post_group)
nchoice = E"(?:" + choice + E")"

outseq.matcher = Plus(gchoice | nchoice | literal) |> make_sequence

return choice + Eos()
end

pattern = make_pattern()

end
4 changes: 0 additions & 4 deletions test/dot/examples.jl
Original file line number Diff line number Diff line change
@@ -1,10 +1,6 @@

using ParserCombinator.Parsers.DOT; D = ParserCombinator.Parsers.DOT

if VERSION < v"0.5-"
readstring = readall
end

# from http://graphs.grevian.org/example

d = parse_dot(open(readstring, "dot/simple.dot"))[1]
Expand Down
4 changes: 0 additions & 4 deletions test/gml/errors.jl
Original file line number Diff line number Diff line change
Expand Up @@ -23,10 +23,6 @@ for (text, msg) in [("a 1 ]", "Expected key"),
end


if VERSION < v"0.5-"
readstring = readall
end

s = open(readstring, "gml/error.gml")
try
parse_raw(s)
Expand Down
14 changes: 12 additions & 2 deletions test/regex/pattern.jl
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,19 @@ for (s, r) in [("abc",
[R.Sequence([R.Literal('a'), R.Literal('b'), R.Literal('c')])]),
("a|b",
[R.Choice([R.Literal('a'), R.Literal('b')])]),
("aa|b",
[R.Choice([R.Sequence([R.Literal('a'), R.Literal('a')]), R.Literal('b')])]),
("(a|b)",
[R.Group(1, R.Choice([R.Literal('a'), R.Literal('b')]))]),
("(a|(b))",
[R.Group(1, R.Choice([R.Literal('a'), R.Group(2, R.Literal('b'))]))]),
("(?:a|b)",
[R.Choice([R.Literal('a'), R.Literal('b')])]),
("a|b(?:c|d)",
[R.Choice([R.Literal('a'), R.Sequence([R.Literal('b'), R.Choice([R.Literal("c"), R.Literal("d")])])])])
]
@test parse_dbg(s, R.pattern) == r
println("$s ok")
print("$s...")
pattern = R.make_pattern()
@test parse_dbg(s, pattern; debug=true) == r
println(" ok")
end

0 comments on commit 840b415

Please sign in to comment.