-
-
Notifications
You must be signed in to change notification settings - Fork 3k
/
redundant_regexp_escape.rb
136 lines (117 loc) · 4.31 KB
/
redundant_regexp_escape.rb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
# frozen_string_literal: true
module RuboCop
module Cop
module Style
# Checks for redundant escapes inside Regexp literals.
#
# @example
# # bad
# %r{foo\/bar}
#
# # good
# %r{foo/bar}
#
# # good
# /foo\/bar/
#
# # good
# %r/foo\/bar/
#
# # good
# %r!foo\!bar!
#
# # bad
# /a\-b/
#
# # good
# /a-b/
#
# # bad
# /[\+\-]\d/
#
# # good
# /[+\-]\d/
class RedundantRegexpEscape < Base
include RangeHelp
extend AutoCorrector
MSG_REDUNDANT_ESCAPE = 'Redundant escape inside regexp literal'
ALLOWED_ALWAYS_ESCAPES = " \n[]^\\#".chars.freeze
ALLOWED_WITHIN_CHAR_CLASS_METACHAR_ESCAPES = '-'.chars.freeze
ALLOWED_OUTSIDE_CHAR_CLASS_METACHAR_ESCAPES = '.*+?{}()|$'.chars.freeze
def on_regexp(node)
each_escape(node) do |char, index, within_character_class|
next if char.valid_encoding? && allowed_escape?(node, char, index,
within_character_class)
location = escape_range_at_index(node, index)
add_offense(location, message: MSG_REDUNDANT_ESCAPE) do |corrector|
corrector.remove_leading(escape_range_at_index(node, index), 1)
end
end
end
private
def allowed_escape?(node, char, index, within_character_class)
# Strictly speaking a few single-letter metachars are currently
# unnecessary to "escape", e.g. i, E, F, but enumerating them is
# rather difficult, and their behavior could change over time with
# different versions of Ruby so that e.g. /\i/ != /i/
return true if /[[:alnum:]]/.match?(char)
return true if ALLOWED_ALWAYS_ESCAPES.include?(char) || delimiter?(node, char)
if within_character_class
ALLOWED_WITHIN_CHAR_CLASS_METACHAR_ESCAPES.include?(char) &&
!char_class_begins_or_ends_with_escaped_hyphen?(node, index)
else
ALLOWED_OUTSIDE_CHAR_CLASS_METACHAR_ESCAPES.include?(char)
end
end
def char_class_begins_or_ends_with_escaped_hyphen?(node, index)
# The hyphen character is allowed to be escaped within a character class
# but it's not necessary to escape hyphen if it's the first or last character
# within the character class. This method checks if that's the case.
# e.g. "[0-9\\-]" or "[\\-0-9]" would return true
content = contents_range(node).source
if content[index + 2] == ']'
true
elsif content[index - 1] == '['
index < 2 || content[index - 2] != '\\'
else
false
end
end
def delimiter?(node, char)
delimiters = [node.loc.begin.source[-1], node.loc.end.source[0]]
delimiters.include?(char)
end
if Gem::Version.new(Regexp::Parser::VERSION) >= Gem::Version.new('2.0')
def each_escape(node)
node.parsed_tree&.traverse&.reduce(0) do |char_class_depth, (event, expr)|
yield(expr.text[1], expr.ts, !char_class_depth.zero?) if expr.type == :escape
if expr.type == :set
char_class_depth + (event == :enter ? 1 : -1)
else
char_class_depth
end
end
end
# Please remove this `else` branch when support for regexp_parser 1.8 will be dropped.
# It's for compatibility with regexp_parser 1.8 and will never be maintained.
else
def each_escape(node)
node.parsed_tree&.traverse&.reduce(0) do |char_class_depth, (event, expr)|
yield(expr.text[1], expr.start_index, !char_class_depth.zero?) if expr.type == :escape
if expr.type == :set
char_class_depth + (event == :enter ? 1 : -1)
else
char_class_depth
end
end
end
end
def escape_range_at_index(node, index)
regexp_begin = node.loc.begin.end_pos
start = regexp_begin + index
range_between(start, start + 2)
end
end
end
end
end