-
-
Notifications
You must be signed in to change notification settings - Fork 3k
/
grammar.rb
135 lines (128 loc) · 4.66 KB
/
grammar.rb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
# encoding: utf-8
module Rubocop
module Cop
class Grammar
def initialize(tokens)
@tokens_without_pos = tokens.map { |t| [t.type, t.text] }
process_embedded_expressions
@token_indexes = {}
@tokens_without_pos.each_with_index do |t, i|
@token_indexes[t] ||= []
@token_indexes[t] << i
end
@ix = 0
@table = {}
token_positions = tokens.map { |t| [t.pos.lineno, t.pos.column] }
@index_by_pos = Hash[*token_positions.each_with_index.to_a.flatten(1)]
@special = {
assign: [:on_op, '='],
brace_block: [:on_lbrace, '{']
}
end
# The string "#{x}" will give the tokens
# [:on_tstring_beg, '"'], [:on_embexpr_beg, '#{'], [:on_ident, 'x'],
# [:on_rbrace, '}'], [:on_tstring_end, '"']
# which is not so good for us. We want to distinguish between a
# right brace that ends an embedded expression inside a string
# and an ordinary right brace. So we replace :on_rbrace with the
# made up :on_embexpr_end.
def process_embedded_expressions
state = :outside
brace_depth = 0
@tokens_without_pos.each_with_index do |(type, _), ix|
case state
when :outside
state = :inside_string if type == :on_tstring_beg
when :inside_string
case type
when :on_tstring_end
state = :outside
when :on_embexpr_beg
brace_depth = 1
state = :inside_expr
end
when :inside_expr
case type
when :on_lbrace
brace_depth += 1
when :on_rbrace
if brace_depth == 1
@tokens_without_pos[ix][0] = :on_embexpr_end
state = :inside_string
end
brace_depth -= 1
end
end
end
end
# Returns a hash mapping indexes in the token array to grammar
# paths, e.g.:
# { 0 => [:program, :assign, :var_field, :@ident],
# 1 => [:program, :assign],
# 2 => [:program, :assign, :@int],
# 4 => [:program, :assign, :var_field, :@ident],
# 5 => [:program, :assign],
# 7 => [:program, :assign, :@int],
# 9 => [:program, :assign, :var_field, :@ident],
# 11 => [:program, :assign],
# 12 => [:program, :assign, :@int] }
def correlate(sexp, path = [])
case sexp
when Array
case sexp[0]
when /^@/
# Leaves in the grammar have a corresponding token with a
# position, which we search for and advance @ix.
@ix = @index_by_pos[[sexp[-1].lineno, sexp[-1].column]]
fail "#{sexp}\n#{@index_by_pos}" unless @ix
@table[@ix] = path + [sexp[0]]
@ix += 1
when *@special.keys
# Here we don't advance @ix because there may be other
# tokens inbetween the current one and the one we get from
# @special.
find(path, sexp, @special[sexp[0]])
when :block_var # "{ |...|" or "do |...|"
@ix = find(path, sexp, [:on_op, '|']) + 1
find(path, sexp, [:on_op, '|'])
end
path += [sexp[0]] if Symbol === sexp[0]
# Compensate for reverse order of if/unless/while/until modifier.
modifiers = [:if_mod, :unless_mod, :while_mod, :until_mod]
children = modifiers.include?(sexp[0]) ? sexp.reverse : sexp
children.each do |elem|
case elem
when Array
correlate(elem, path) # Dive deeper
when Symbol
unless elem.to_s =~ /^@?[a-z_]+$/
# There's a trailing @ in some symbols in sexp,
# e.g. :-@, that don't appear in tokens. That's why we
# chomp it off.
find(path, [elem], [:on_op, elem.to_s.chomp('@')])
end
end
end
end
@table
end
private
def find(path, sexp, token_to_find)
indices = @token_indexes[token_to_find] or return
ix = indices.find { |i| i >= @ix } or return
@table[ix] = path + [sexp[0]]
add_matching_rbrace(ix) if token_to_find == [:on_lbrace, '{']
ix
end
def add_matching_rbrace(ix)
brace_depth = 0
rbrace_offset = @tokens_without_pos[ix..-1].index do |t|
brace_depth += 1 if t == [:on_lbrace, '{']
brace_depth -= 1 if t == [:on_rbrace, '}']
brace_depth == 0 && t == [:on_rbrace, '}']
end
@table[ix + rbrace_offset] = @table[ix] if rbrace_offset
end
end
end
end