Skip to content

Commit

Permalink
Add new SpanScanner.eager().
Browse files Browse the repository at this point in the history
This is more efficient for uses like the YAML parser, which uses the
current line and especially column information frequently while parsing
a file.

R=rnystrom@google.com

Review URL: https://codereview.chromium.org//1318603008 .
  • Loading branch information
nex3 committed Sep 2, 2015
1 parent 494684f commit ad926d6
Show file tree
Hide file tree
Showing 6 changed files with 185 additions and 38 deletions.
5 changes: 5 additions & 0 deletions CHANGELOG.md
@@ -1,3 +1,8 @@
## 0.1.4

* Add `new SpanScanner.eager()` for creating a `SpanScanner` that eagerly
computes its current line and column numbers.

## 0.1.3+2

* Fix `LineScanner`'s handling of carriage returns to match that of
Expand Down
115 changes: 115 additions & 0 deletions lib/src/eager_span_scanner.dart
@@ -0,0 +1,115 @@
// Copyright (c) 2015, the Dart project authors. Please see the AUTHORS file
// for details. All rights reserved. Use of this source code is governed by a
// BSD-style license that can be found in the LICENSE file.

library string_scanner.eager_span_scanner;

import 'package:charcode/ascii.dart';

import 'line_scanner.dart';
import 'span_scanner.dart';

// TODO(nweiz): Currently this duplicates code in line_scanner.dart. Once
// sdk#23770 is fully complete, we should move the shared code into a mixin.

/// A regular expression matching newlines across platforms.
final _newlineRegExp = new RegExp(r"\r\n?|\n");

/// A [SpanScanner] that tracks the line and column eagerly, like [LineScanner].
class EagerSpanScanner extends SpanScanner {
int get line => _line;
int _line = 0;

int get column => _column;
int _column = 0;

LineScannerState get state =>
new _EagerSpanScannerState(this, position, line, column);

bool get _betweenCRLF => peekChar(-1) == $cr && peekChar() == $lf;

set state(LineScannerState state) {
if (state is! _EagerSpanScannerState ||
!identical((state as _EagerSpanScannerState)._scanner, this)) {
throw new ArgumentError("The given LineScannerState was not returned by "
"this LineScanner.");
}

super.position = state.position;
_line = state.line;
_column = state.column;
}

set position(int newPosition) {
var oldPosition = position;
super.position = newPosition;

if (newPosition > oldPosition) {
var newlines = _newlinesIn(string.substring(oldPosition, newPosition));
_line += newlines.length;
if (newlines.isEmpty) {
_column += newPosition - oldPosition;
} else {
_column = newPosition - newlines.last.end;
}
} else {
var newlines = _newlinesIn(string.substring(newPosition, oldPosition));
if (_betweenCRLF) newlines.removeLast();

_line -= newlines.length;
if (newlines.isEmpty) {
_column -= oldPosition - newPosition;
} else {
_column = newPosition -
string.lastIndexOf(_newlineRegExp, newPosition) - 1;
}
}
}

EagerSpanScanner(String string, {sourceUrl, int position})
: super(string, sourceUrl: sourceUrl, position: position);

int readChar() {
var char = super.readChar();
if (char == $lf || (char == $cr && peekChar() != $lf)) {
_line += 1;
_column = 0;
} else {
_column += 1;
}
return char;
}

bool scan(Pattern pattern) {
if (!super.scan(pattern)) return false;

var newlines = _newlinesIn(lastMatch[0]);
_line += newlines.length;
if (newlines.isEmpty) {
_column += lastMatch[0].length;
} else {
_column = lastMatch[0].length - newlines.last.end;
}

return true;
}

/// Returns a list of [Match]es describing all the newlines in [text], which
/// is assumed to end at [position].
List<Match> _newlinesIn(String text) {
var newlines = _newlineRegExp.allMatches(text).toList();
if (_betweenCRLF) newlines.removeLast();
return newlines;
}
}

/// A class representing the state of an [EagerSpanScanner].
class _EagerSpanScannerState implements LineScannerState {
final EagerSpanScanner _scanner;
final int position;
final int line;
final int column;

_EagerSpanScannerState(this._scanner, this.position, this.line, this.column);
}

2 changes: 2 additions & 0 deletions lib/src/line_scanner.dart
Expand Up @@ -8,6 +8,8 @@ import 'package:charcode/ascii.dart';

import 'string_scanner.dart';

// Note that much of this code is duplicated in eager_span_scanner.dart.

/// A regular expression matching newlines across platforms.
final _newlineRegExp = new RegExp(r"\r\n?|\n");

Expand Down
15 changes: 15 additions & 0 deletions lib/src/span_scanner.dart
Expand Up @@ -6,6 +6,7 @@ library string_scanner.span_scanner;

import 'package:source_span/source_span.dart';

import 'eager_span_scanner.dart';
import 'exception.dart';
import 'line_scanner.dart';
import 'string_scanner.dart';
Expand Down Expand Up @@ -56,6 +57,20 @@ class SpanScanner extends StringScanner implements LineScanner {
: _sourceFile = new SourceFile(string, url: sourceUrl),
super(string, sourceUrl: sourceUrl, position: position);

/// Creates a new [SpanScanner] that eagerly computes line and column numbers.
///
/// In general [new SpanScanner] will be more efficient, since it avoids extra
/// computation on every scan. However, eager scanning can be useful for
/// situations where the normal course of parsing frequently involves
/// accessing the current line and column numbers.
///
/// Note that *only* the `line` and `column` fields on the `SpanScanner`
/// itself and its `LineScannerState` are eagerly computed. To limit their
/// memory footprint, returned spans and locations will still lazily compute
/// their line and column numbers.
factory SpanScanner.eager(String string, {sourceUrl, int position}) =
EagerSpanScanner;

/// Creates a [FileSpan] representing the source range between [startState]
/// and the current position.
FileSpan spanFrom(LineScannerState startState, [LineScannerState endState]) {
Expand Down
2 changes: 1 addition & 1 deletion pubspec.yaml
@@ -1,5 +1,5 @@
name: string_scanner
version: 0.1.3+2
version: 0.1.4
author: "Dart Team <misc@dartlang.org>"
homepage: https://github.com/dart-lang/string_scanner
description: >
Expand Down
84 changes: 47 additions & 37 deletions test/span_scanner_test.dart
Expand Up @@ -8,53 +8,63 @@ import 'package:string_scanner/string_scanner.dart';
import 'package:test/test.dart';

void main() {
var scanner;
setUp(() {
scanner = new SpanScanner('foo\nbar\nbaz', sourceUrl: 'source');
testForImplementation("lazy", () {
return new SpanScanner('foo\nbar\nbaz', sourceUrl: 'source');
});

test("tracks the span for the last match", () {
scanner.scan('fo');
scanner.scan('o\nba');
testForImplementation("eager", () {
return new SpanScanner.eager('foo\nbar\nbaz', sourceUrl: 'source');
});
}

var span = scanner.lastSpan;
expect(span.start.offset, equals(2));
expect(span.start.line, equals(0));
expect(span.start.column, equals(2));
expect(span.start.sourceUrl, equals(Uri.parse('source')));
void testForImplementation(String name, SpanScanner create()) {
group("for a $name scanner", () {
var scanner;
setUp(() => scanner = create());

expect(span.end.offset, equals(6));
expect(span.end.line, equals(1));
expect(span.end.column, equals(2));
expect(span.start.sourceUrl, equals(Uri.parse('source')));
test("tracks the span for the last match", () {
scanner.scan('fo');
scanner.scan('o\nba');

expect(span.text, equals('o\nba'));
});
var span = scanner.lastSpan;
expect(span.start.offset, equals(2));
expect(span.start.line, equals(0));
expect(span.start.column, equals(2));
expect(span.start.sourceUrl, equals(Uri.parse('source')));

test(".spanFrom() returns a span from a previous state", () {
scanner.scan('fo');
var state = scanner.state;
scanner.scan('o\nba');
scanner.scan('r\nba');
expect(span.end.offset, equals(6));
expect(span.end.line, equals(1));
expect(span.end.column, equals(2));
expect(span.start.sourceUrl, equals(Uri.parse('source')));

var span = scanner.spanFrom(state);
expect(span.text, equals('o\nbar\nba'));
});
expect(span.text, equals('o\nba'));
});

test(".spanFrom() returns a span from a previous state", () {
scanner.scan('fo');
var state = scanner.state;
scanner.scan('o\nba');
scanner.scan('r\nba');

var span = scanner.spanFrom(state);
expect(span.text, equals('o\nbar\nba'));
});

test(".emptySpan returns an empty span at the current location", () {
scanner.scan('foo\nba');
test(".emptySpan returns an empty span at the current location", () {
scanner.scan('foo\nba');

var span = scanner.emptySpan;
expect(span.start.offset, equals(6));
expect(span.start.line, equals(1));
expect(span.start.column, equals(2));
expect(span.start.sourceUrl, equals(Uri.parse('source')));
var span = scanner.emptySpan;
expect(span.start.offset, equals(6));
expect(span.start.line, equals(1));
expect(span.start.column, equals(2));
expect(span.start.sourceUrl, equals(Uri.parse('source')));

expect(span.end.offset, equals(6));
expect(span.end.line, equals(1));
expect(span.end.column, equals(2));
expect(span.start.sourceUrl, equals(Uri.parse('source')));
expect(span.end.offset, equals(6));
expect(span.end.line, equals(1));
expect(span.end.column, equals(2));
expect(span.start.sourceUrl, equals(Uri.parse('source')));

expect(span.text, equals(''));
expect(span.text, equals(''));
});
});
}

0 comments on commit ad926d6

Please sign in to comment.