diff --git a/CHANGELOG.md b/CHANGELOG.md index 9de5479..eac51b7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -14,6 +14,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Fixed +- fixed `NoMethodError` when calling `#starts_at` or `#ts` on empty sequences + * e.g. `Regexp::Parser.parse(/|/)[0].starts_at` + * e.g. `Regexp::Parser.parse(/[&&]/)[0][0].starts_at` - fixed nested comment groups breaking local x-options * e.g. in `/(?x:(?#hello)) /`, the x-option wrongly applied to the whitespace - fixed nested comment groups breaking conditionals diff --git a/lib/regexp_parser/expression/classes/character_set/range.rb b/lib/regexp_parser/expression/classes/character_set/range.rb index affc972..df362f4 100644 --- a/lib/regexp_parser/expression/classes/character_set/range.rb +++ b/lib/regexp_parser/expression/classes/character_set/range.rb @@ -1,10 +1,9 @@ module Regexp::Expression class CharacterSet < Regexp::Expression::Subexpression class Range < Regexp::Expression::Subexpression - def starts_at - expressions.first.starts_at + def ts + (head = expressions.first) ? head.ts : @ts end - alias :ts :starts_at def <<(exp) complete? and raise Regexp::Parser::Error, diff --git a/lib/regexp_parser/expression/classes/conditional.rb b/lib/regexp_parser/expression/classes/conditional.rb index 13a0198..598c331 100644 --- a/lib/regexp_parser/expression/classes/conditional.rb +++ b/lib/regexp_parser/expression/classes/conditional.rb @@ -35,9 +35,9 @@ def <<(exp) expressions.last << exp end - def add_sequence(active_opts = {}) + def add_sequence(active_opts = {}, params = { ts: 0 }) raise TooManyBranches.new if branches.length == 2 - params = { conditional_level: conditional_level + 1 } + params = params.merge({ conditional_level: conditional_level + 1 }) Branch.add_to(self, params, active_opts) end alias :branch :add_sequence diff --git a/lib/regexp_parser/expression/sequence.rb b/lib/regexp_parser/expression/sequence.rb index 6ee4fd8..a284346 100644 --- a/lib/regexp_parser/expression/sequence.rb +++ b/lib/regexp_parser/expression/sequence.rb @@ -12,6 +12,7 @@ def add_to(exp, params = {}, active_opts = {}) level: exp.level, set_level: exp.set_level, conditional_level: params[:conditional_level] || exp.conditional_level, + ts: params[:ts], ) sequence.options = active_opts exp.expressions << sequence @@ -19,10 +20,9 @@ def add_to(exp, params = {}, active_opts = {}) end end - def starts_at - expressions.first.starts_at + def ts + (head = expressions.first) ? head.ts : @ts end - alias :ts :starts_at def quantify(*args) target = expressions.reverse.find { |exp| !exp.is_a?(FreeSpace) } diff --git a/lib/regexp_parser/expression/sequence_operation.rb b/lib/regexp_parser/expression/sequence_operation.rb index 31b6485..4ef26e8 100644 --- a/lib/regexp_parser/expression/sequence_operation.rb +++ b/lib/regexp_parser/expression/sequence_operation.rb @@ -5,17 +5,16 @@ class SequenceOperation < Regexp::Expression::Subexpression alias :operands :expressions alias :operator :text - def starts_at - expressions.first.starts_at + def ts + (head = expressions.first) ? head.ts : @ts end - alias :ts :starts_at def <<(exp) expressions.last << exp end - def add_sequence(active_opts = {}) - self.class::OPERAND.add_to(self, {}, active_opts) + def add_sequence(active_opts = {}, params = { ts: 0 }) + self.class::OPERAND.add_to(self, params, active_opts) end def parts diff --git a/lib/regexp_parser/parser.rb b/lib/regexp_parser/parser.rb index 1fb0032..20b6740 100644 --- a/lib/regexp_parser/parser.rb +++ b/lib/regexp_parser/parser.rb @@ -272,9 +272,9 @@ def conditional(token) nest_conditional(Conditional::Expression.new(token, active_opts)) when :condition conditional_nesting.last.condition = Conditional::Condition.new(token, active_opts) - conditional_nesting.last.add_sequence(active_opts) + conditional_nesting.last.add_sequence(active_opts, { ts: token.te }) when :separator - conditional_nesting.last.add_sequence(active_opts) + conditional_nesting.last.add_sequence(active_opts, { ts: token.te }) self.node = conditional_nesting.last.branches.last when :close conditional_nesting.pop @@ -381,12 +381,12 @@ def meta(token) def sequence_operation(klass, token) unless node.instance_of?(klass) operator = klass.new(token, active_opts) - sequence = operator.add_sequence(active_opts) + sequence = operator.add_sequence(active_opts, { ts: token.ts }) sequence.expressions = node.expressions node.expressions = [] nest(operator) end - node.add_sequence(active_opts) + node.add_sequence(active_opts, { ts: token.te }) end def posixclass(token) diff --git a/spec/parser/alternation_spec.rb b/spec/parser/alternation_spec.rb index 0bfb581..a26d9de 100644 --- a/spec/parser/alternation_spec.rb +++ b/spec/parser/alternation_spec.rb @@ -26,4 +26,20 @@ [0, 0, 0, 0, 0, 1] => [:literal, to_s: 'b??' ], [0, 1] => [Alternative, text: '', count: 1, quantified?: false], [0, 1, 0] => [:capture, count: 1, quantified?: true ] + + # test correct ts values for empty sequences + include_examples 'parse', /|||/, + [0] => [Alternation, text: '|', count: 4, starts_at: 0], + [0, 0] => [Alternative, to_s: '', count: 0, starts_at: 0], + [0, 1] => [Alternative, to_s: '', count: 0, starts_at: 1], + [0, 2] => [Alternative, to_s: '', count: 0, starts_at: 2], + [0, 3] => [Alternative, to_s: '', count: 0, starts_at: 3] + + # test correct ts values for non-empty sequences + include_examples 'parse', /ab|cd|ef|gh/, + [0] => [Alternation, text: '|', count: 4, starts_at: 0], + [0, 0] => [Alternative, to_s: 'ab', count: 1, starts_at: 0], + [0, 1] => [Alternative, to_s: 'cd', count: 1, starts_at: 3], + [0, 2] => [Alternative, to_s: 'ef', count: 1, starts_at: 6], + [0, 3] => [Alternative, to_s: 'gh', count: 1, starts_at: 9] end diff --git a/spec/parser/conditionals_spec.rb b/spec/parser/conditionals_spec.rb index 7582f9c..8be0142 100644 --- a/spec/parser/conditionals_spec.rb +++ b/spec/parser/conditionals_spec.rb @@ -2,20 +2,20 @@ RSpec.describe('Conditional parsing') do include_examples 'parse', /(?a)(?()T|F)/, - [1] => [:conditional, :open, Conditional::Expression, to_s: '(?()T|F)', reference: 'A'], - [1, 0] => [:conditional, :condition, Conditional::Condition, to_s: '()', reference: 'A'], - [1, 1] => [:expression, :sequence, Conditional::Branch, to_s: 'T'], - [1, 1, 0] => [:literal, text: 'T'], - [1, 2] => [:expression, :sequence, Conditional::Branch, to_s: 'F'], - [1, 2, 0] => [:literal, text: 'F'] + [1] => [:conditional, :open, Conditional::Expression, to_s: '(?()T|F)', reference: 'A', ts: 7], + [1, 0] => [:conditional, :condition, Conditional::Condition, to_s: '()', reference: 'A', ts: 9], + [1, 1] => [:expression, :sequence, Conditional::Branch, to_s: 'T', ts: 14], + [1, 1, 0] => [:literal, text: 'T', ts: 14], + [1, 2] => [:expression, :sequence, Conditional::Branch, to_s: 'F', ts: 16], + [1, 2, 0] => [:literal, text: 'F', ts: 16] include_examples 'parse', /(a)(?(1)T|F)/, - [1] => [:conditional, :open, Conditional::Expression, to_s: '(?(1)T|F)', reference: 1], - [1, 0] => [:conditional, :condition, Conditional::Condition, to_s: '(1)', reference: 1], - [1, 1] => [:expression, :sequence, Conditional::Branch, to_s: 'T'], - [1, 1, 0] => [:literal, text: 'T'], - [1, 2] => [:expression, :sequence, Conditional::Branch, to_s: 'F'], - [1, 2, 0] => [:literal, text: 'F'] + [1] => [:conditional, :open, Conditional::Expression, to_s: '(?(1)T|F)', reference: 1, ts: 3], + [1, 0] => [:conditional, :condition, Conditional::Condition, to_s: '(1)', reference: 1, ts: 5], + [1, 1] => [:expression, :sequence, Conditional::Branch, to_s: 'T', ts: 8], + [1, 1, 0] => [:literal, text: 'T', ts: 8], + [1, 2] => [:expression, :sequence, Conditional::Branch, to_s: 'F', ts: 10], + [1, 2, 0] => [:literal, text: 'F', ts: 10] include_examples 'parse', /(foo)(?(1)\d+|(\w)){42}/, [1] => [Conditional::Expression, quantified?: true, to_s: '(?(1)\d+|(\w)){42}'], @@ -65,5 +65,5 @@ # test empty branch include_examples 'parse', /(?a)(?()T|)/, [1] => [Conditional::Expression, count: 3, to_s: '(?()T|)'], - [1, 2] => [Conditional::Branch, to_s: ''] + [1, 2] => [Conditional::Branch, to_s: '', ts: 16] end diff --git a/spec/parser/set/intersections_spec.rb b/spec/parser/set/intersections_spec.rb index 1c32919..8ff5c43 100644 --- a/spec/parser/set/intersections_spec.rb +++ b/spec/parser/set/intersections_spec.rb @@ -45,6 +45,19 @@ [0, 0, 2, 1] => [:literal, text: 'f'], [0, 0, 2, 2] => [:literal, text: 'g'] + # test correct ts values for empty sequences + include_examples 'parse', /[&&]/, + [0, 0] => [CharacterSet::Intersection, text: '&&', count: 2, ts: 1], + [0, 0, 0] => [CharacterSet::IntersectedSequence, count: 0, ts: 1], + [0, 0, 1] => [CharacterSet::IntersectedSequence, count: 0, ts: 3] + + # test correct ts values for non-empty sequences + include_examples 'parse', /[ab&&cd&&ef]/, + [0, 0] => [CharacterSet::Intersection, count: 3, text: '&&', ts: 1], + [0, 0, 0] => [CharacterSet::IntersectedSequence, count: 2, to_s: 'ab', ts: 1], + [0, 0, 1] => [CharacterSet::IntersectedSequence, count: 2, to_s: 'cd', ts: 5], + [0, 0, 2] => [CharacterSet::IntersectedSequence, count: 2, to_s: 'ef', ts: 9] + # Some edge-case patterns are evaluated with #match to make sure that # their matching behavior still reflects the way they are parsed. # #capturing_stderr is used to skip any warnings generated by this.