/
parse.jl
213 lines (189 loc) · 5.8 KB
/
parse.jl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
module Parse
import DataStructures: OrderedDict
using ..Commands
using ..Motions
using ..Util
import ..Util.@debug
export well_formed, matched_rule, parse_command, synonym, partial_well_formed
const REGS = (
text_object=r"^.?([ai][wWsp])$",
# complete =
)
"""
Vim's language:
1. Insert commands:
a
A
i
O
2. single-key motion
w E
^
h j k l
3. multi-key motion
gg
gE
fx
Fx
2. operator [motion|textobject]
dw
cW
yE
ciw
3. operator operator
yy
dd
3dd
"""
const REPEAT = "(?:[1-9]\\d*)?"
const MOTION = begin
implemented_keys = join([k for k in keys(simple_motions) if k != '0'])
"[$implemented_keys]"
end
# motions with multiple keystrokes e.g. 'fx'
function complex_motion(partial::Bool=false)::String
motion_regexes = partial ? partial_complex_motions : collect(keys(complex_motions))
patterns = map(motion_regexes) do regex
regex.pattern
end
join(patterns, "|")
end
const UNDO_REDO = "(?|(u)|(\x12))"
const TEXTOBJECT = "$REPEAT[ai][wWsp]"
const PARTIALTEXTOBJECT = "$REPEAT[ai]([wWsp])?"
const DELETECHARS = "[xXDCS]"
const INSERTCHARS = "[aAiIoO]"
const OPERATOR = "[ydc]"
const RULES = TupleDict(
"^(?<c>$INSERTCHARS)\$" |> Regex => InsertCommand, # insert commands
"^0\$" |> Regex => ZeroCommand, # Special case: `0` is a motion command
"^(?<n1>$REPEAT)(?<c>$DELETECHARS)\$" |> Regex => SynonymCommand,
"^(?<n1>$REPEAT)$UNDO_REDO\$" |> Regex => HistoryCommand,
"^(?<n1>$REPEAT)($MOTION)\$" |> Regex => SimpleMotionCommand,
"^(?<n1>$REPEAT)((?|$(complex_motion())))\$" |> Regex => CompositeMotionCommand,
"^(?<n1>$REPEAT)(?<op>$OPERATOR)(?<n2>$REPEAT)(?|($TEXTOBJECT)|($MOTION))\$" |> Regex => OperatorCommand,
"^(?<n1>$REPEAT)(?<op>$OPERATOR)(?<n2>$REPEAT)((?|$(complex_motion())))\$" |> Regex => OperatorCommand,
"^(?<n1>$REPEAT)(?<op>$OPERATOR)(\\k<op>)\$" |> Regex => LineOperatorCommand,
"^(?<n1>$REPEAT)r(.)\$" |> Regex => ReplaceCommand
)
# same as above, but valid for partially completed string commands. This is to determine when the key stack should be cleared.
const PARTIAL_RULES = (
"^(?<c>$INSERTCHARS)?\$" |> Regex, # InsertCommand
"^0?\$" |> Regex, # ZeroCommand
"^(?<n1>$REPEAT)(?<c>$DELETECHARS)?\$" |> Regex, # SynonymCommand
"^(?<n1>$REPEAT)($UNDO_REDO)?\$" |> Regex, # HistoryCommand,
"^(?<n1>$REPEAT)($MOTION)?\$" |> Regex, # SimpleMotionCommand
"^(?<n1>$REPEAT)((?|$(complex_motion(true))))?\$" |> Regex, # CompositeMotionCommand
"^(?<n1>$REPEAT)((?<op>$OPERATOR)((?<n2>$REPEAT)((?|($PARTIALTEXTOBJECT)|($MOTION)))?)?)?\$" |> Regex, # OperatorCommand
"^(?<n1>$REPEAT)((?<op>$OPERATOR)((?<n2>$REPEAT)((?|$(complex_motion(true))))?)?)?\$" |> Regex, # OperatorCommand (2)
"^(?<n1>$REPEAT)((?<op>$OPERATOR)(\\k<op>)?)?\$" |> Regex, # LineOperatorCommand
"^(?<n1>$REPEAT)(r(.)?)?\$" |> Regex # ReplaceCommand
)
# Note that many of these are redundant. This is written for consistency.
"""
Determines whether the given string is accepted as a vim command.
"""
function well_formed(cmd::String)::Bool
for rule in keys(RULES)
if occursin(rule, cmd)
return true
end
end
return false
end
"""
Determines whether the given string could be accepted, if more keys were pressed.
"""
function partial_well_formed(cmd::String)::Bool
for rule in PARTIAL_RULES
if occursin(rule, cmd)
return true
end
end
return false
end
function matched_rule(cmd::String)
for rule in keys(RULES)
if occursin(rule, cmd)
return rule
end
end
return nothing
end
"""
Get the typed value of `item`
"""
function parse_value(item::Union{Nothing,AbstractString})::ParseValue
if item === nothing || isempty(item)
return ParseValue(nothing)
end
if match(r"^\d+$", item) !== nothing
return ParseValue(parse(Int, item))
end
if length(item) == 1
return ParseValue(item[1])
end
return ParseValue(item)
end
"""
Attempt to parse a command, return nothing if `s` could not be parsed into a command
"""
function parse_command(s::AbstractString)::Union{Command,Nothing}
r = matched_rule(s)
if r === nothing
@warn "command not well formed", s
return nothing
end
r::Regex
m = match(r, s)
m === nothing && return nothing
m::RegexMatch
dtype = RULES[r]
return command_constructor(dtype, parse_value.(m.captures)...)
end
"""
Return a struct corresponding to a regex match's Vim command
"""
function synonym(command::SynonymCommand)::Command
synonyms = Dict(
'x' => "dl",
'X' => "dh"
)
return parse_command("$(command.r1)$(synonyms[command.operator])")
end
function lookup_synonym(n::Integer, c::Char)
end
# function parse(cmd :: String)
# rule = matched_rule(cmd)
# @assert rule !== nothing
# m = match(rule, cmd)
# return m
# end
function Base.Dict(m::RegexMatch)
d = OrderedDict{Symbol,Any}()
idx_to_capture_name = Base.PCRE.capture_names(m.regex.regex)
if !isempty(m.captures)
for i = 1:Base.length(m.captures)
capture_name = get(idx_to_capture_name, i, i) |> Symbol
d[capture_name] = m.captures[i]
end
end
# Dict(Symbol(n)=>m[Symbol(n)] for n in values(Base.PCRE.capture_names(m.regex.regex)))
return d
end
function text_object_part(cmd::AbstractString)::Union{String,Nothing}
m = match(REGS.text_object, cmd)
if m === nothing
return nothing
end
return m.captures[1]
end
function verb_part(cmd::AbstractString)::Union{Char,Nothing}
reg = Regex("\\d*($OPERATOR).*")
m = match(reg, cmd)
if m === nothing
return nothing
end
return m.captures[1][1]
end
end