/
simple_parser.rb
145 lines (122 loc) · 2.84 KB
/
simple_parser.rb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
require 'strscan'
module SimpleParser
class Tokenizer
include Enumerable
def initialize(rules)
@rules = rules.map { |r|
r = r.dup
pattern = r.shift
pattern = /#{Regexp.quote(pattern)}/ if pattern.is_a?(String)
[pattern, compile_rule(r)]
}
end
def reset(text)
@scanner = StringScanner.new(text)
@token = fetch_next_token
nil
end
def eos?
@token.nil?
end
def next_token
token = @token
@token = fetch_next_token
token
end
def each(&block)
while !eos?
yield next_token
end
end
def scan(text)
reset(text)
inject([]) { |m, t| m << t }
end
private
def fetch_next_token
while !@scanner.eos?
pattern, rule, token = nil, nil, nil
@rules.each do |p, r|
if matched_token = @scanner.scan(p)
if token.nil? || matched_token.length > token.length
pattern, rule, token = p, r, matched_token
end
@scanner.unscan
end
end
if pattern
@scanner.pos += token.length
return rule.call(token) unless rule.nil?
else
raise "scan error; remaing text:\n\n#{@scanner.post_match}"
end
end
nil
end
def compile_rule(rule)
p1, p2 = rule.shift, rule.shift
if p1.nil?
nil
elsif p1.respond_to?(:call)
p1
elsif p1.respond_to?(:to_sym)
if p2
if p2.respond_to?(:call)
lambda { |t| [p1, p2.call(t)] }
elsif p2.respond_to?(:to_sym)
lambda { |t| [p1, t.send(p2.to_sym)] }
else
raise "second rule param must respond to :call or :to_sym"
end
else
lambda { |t| [p1, t] }
end
else
raise "first rule param must respond to :call or :to_sym"
end
end
end
class Parser
def initialize(token_rules)
@token_rules = token_rules
end
def parse(text)
reset(text)
accept
ret = parse_program
stop
ret
end
def eval(text)
parse(text)
end
def tokenizer
@tokenizer ||= build_tokenizer
end
def build_tokenizer
Tokenizer.new(@token_rules)
end
private
def parse_program
raise "implement me"
end
def token_name
@token ? @token[0] : nil
end
def token_value
@token ? @token[1] : nil
end
def accept(token = nil)
raise "parse error, expecting #{token.inspect}" if token && token != token_name
retval = token_value
@token = tokenizer.next_token
retval
end
def stop
raise "expecting EOF" unless @token.nil?
end
def reset(text)
tokenizer.reset(text)
end
end
end