Skip to content

Commit

Permalink
Add support for Perl quoted and Python named capturing group syntax v…
Browse files Browse the repository at this point in the history
…ariants
  • Loading branch information
asmblah committed May 10, 2024
1 parent 086fbd4 commit 683b72f
Show file tree
Hide file tree
Showing 3 changed files with 94 additions and 5 deletions.
11 changes: 8 additions & 3 deletions src/spec/parserGrammar.ts
Original file line number Diff line number Diff line change
Expand Up @@ -368,9 +368,14 @@ export default {
},
'N_NAMED_CAPTURING_GROUP': {
components: [
/\(\?</,
{ name: 'groupName', what: /[^>]+/ },
/>/,
{
oneOf: [
// Chevron-bracketed Perl or Python named capturing group.
[/\(\?P?</, { name: 'groupName', what: /[^>]+/ }, />/],
// Quoted Perl named capturing group.
[/\(\?'/, { name: 'groupName', what: /[^']+/ }, /'/],
],
},
{ name: 'components', zeroOrMoreOf: 'N_COMPONENT' },
/\)/,
],
Expand Down
28 changes: 27 additions & 1 deletion test/integration/match/group/namedCapturingGroupTest.ts
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ import { expect } from 'chai';

describe('Named capturing group match integration', () => {
describe('in optimised mode', () => {
it('should be able to capture a named capturing group', () => {
it('should be able to capture a single chevron-bracketed Perl named capturing group', () => {
const matcher = emulator.compile('my (?<grabbed>captured) text');

const match = matcher.matchOne('my captured text');
Expand All @@ -25,6 +25,32 @@ describe('Named capturing group match integration', () => {
expect(match?.getNamedCapture('grabbed')).to.equal('captured');
});

it('should be able to capture a single-quoted Perl named capturing group', () => {
const matcher = emulator.compile("my (?'grabbed'captured) text");

const match = matcher.matchOne('my captured text');

expect(match).not.to.be.null;
expect(match?.getCaptureCount()).to.equal(2);
// Named captures are also available by their index.
expect(match?.getNumberedCapture(0)).to.equal('my captured text');
expect(match?.getNumberedCapture(1)).to.equal('captured');
expect(match?.getNamedCapture('grabbed')).to.equal('captured');
});

it('should be able to capture a single Python named capturing group', () => {
const matcher = emulator.compile('my (?P<grabbed>captured) text');

const match = matcher.matchOne('my captured text');

expect(match).not.to.be.null;
expect(match?.getCaptureCount()).to.equal(2);
// Named captures are also available by their index.
expect(match?.getNumberedCapture(0)).to.equal('my captured text');
expect(match?.getNumberedCapture(1)).to.equal('captured');
expect(match?.getNamedCapture('grabbed')).to.equal('captured');
});

it('should be able to backtrack into a capturing group', () => {
const matcher = emulator.compile('my (?<grabbed>a+)aa text');

Expand Down
60 changes: 59 additions & 1 deletion test/integration/parser/group/namedCapturingGroupTest.ts
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ describe('Parser named capturing group integration', () => {
parser = emulator.createParser();
});

it('should be able to parse a regex pattern containing a single capturing group', () => {
it('should be able to parse a regex pattern containing a single chevron-bracketed Perl named capturing group', () => {
const ast = parser.parse('my (?<found>stuff inside) here');

expect(ast.getPattern()).to.equal('my (?<found>stuff inside) here');
Expand Down Expand Up @@ -47,6 +47,64 @@ describe('Parser named capturing group integration', () => {
});
});

it('should be able to parse a regex pattern containing a single-quoted Perl named capturing group', () => {
const ast = parser.parse("my (?'found'stuff inside) here");

expect(ast.getPattern()).to.equal("my (?'found'stuff inside) here");
expect(ast.getParsingAst()).to.deep.equal({
'name': 'N_PATTERN',
'components': [
{
'name': 'N_LITERAL',
'text': 'my ',
},
{
'name': 'N_NAMED_CAPTURING_GROUP',
'groupName': 'found',
'components': [
{
'name': 'N_LITERAL',
'text': 'stuff inside',
},
],
},
{
'name': 'N_LITERAL',
'text': ' here',
},
],
});
});

it('should be able to parse a regex pattern containing a single Python named capturing group', () => {
const ast = parser.parse('my (?P<found>stuff inside) here');

expect(ast.getPattern()).to.equal('my (?P<found>stuff inside) here');
expect(ast.getParsingAst()).to.deep.equal({
'name': 'N_PATTERN',
'components': [
{
'name': 'N_LITERAL',
'text': 'my ',
},
{
'name': 'N_NAMED_CAPTURING_GROUP',
'groupName': 'found',
'components': [
{
'name': 'N_LITERAL',
'text': 'stuff inside',
},
],
},
{
'name': 'N_LITERAL',
'text': ' here',
},
],
});
});

it('should be able to parse a regex pattern containing a named capturing group nested inside a numbered one', () => {
const ast = parser.parse('my (stuff (?<inner>goes) inside) here');

Expand Down

0 comments on commit 683b72f

Please sign in to comment.