Skip to content
Permalink
Browse files

Document and fix HTML-escape.

Update documentation and behavior of HtmlEscape.

BUG= http://dartbug.com/21913, http://dartbug.com/13097
R=floitsch@google.com, sgjesse@google.com

Review URL: https://codereview.chromium.org//1019853002

git-svn-id: https://dart.googlecode.com/svn/branches/bleeding_edge/dart@45003 260f80e4-7a28-3924-810f-c04153c831b5
  • Loading branch information...
lrhn committed Apr 9, 2015
1 parent 0a1f0d3 commit 8b8223d15ce4d55528ab8dffe680b56e1634a38b
Showing with 133 additions and 38 deletions.
  1. +122 −31 sdk/lib/convert/html_escape.dart
  2. +11 −7 tests/lib/convert/html_escape_test.dart
@@ -7,69 +7,160 @@ part of dart.convert;
// TODO(floitsch) - Document - Issue 13097
const HtmlEscape HTML_ESCAPE = const HtmlEscape();

/**
* HTML escape modes.
*
* Allows specifying a mode for HTML escaping that depend on the context
* where the escaped result is going to be used.
* The relevant contexts are:
*
* * as text content of an HTML element.
* * as value of a (single- or double-) quoted attribute value.
*
* All modes require escaping of `&` (ampersand) characters, and may
* enable escaping of more characters.
*/
class HtmlEscapeMode {
final String _name;
/** Whether to escape '<' and '>'. */
final bool escapeLtGt;
/** Whether to escape '"' (quote). */
final bool escapeQuot;
/** Whether to escape "'" (apostrophe). */
final bool escapeApos;
final bool escapeSlash;

// TODO(floitsch) - Document - Issue 13097
/**
* Default escaping mode which escape all characters.
*
* The result of such an escaping is usable both in element content and
* in any attribute value.
*
* The escaping only works for elements with normal HTML content,
* and not for, for example, script or style element content,
* which require escapes matching their particular content syntax.
*/
static const HtmlEscapeMode UNKNOWN =
const HtmlEscapeMode._('unknown', true, true, true, true);

// TODO(floitsch) - Document - Issue 13097
const HtmlEscapeMode._('unknown', true, true, true);

/**
* Escaping mode for text going into double-quoted HTML attribute values.
*
* The result should not be used as the content of an unquoted
* or single-quoted attribute value.
*
* Escapes only double quotes (`"`) but not single quotes (`'`).
*/
static const HtmlEscapeMode ATTRIBUTE =
const HtmlEscapeMode._('attribute', false, true, false, false);

// TODO(floitsch) - Document - Issue 13097
const HtmlEscapeMode._('attribute', false, true, false);

/**
* Escaping mode for text going into single-quoted HTML attribute values.
*
* The result should not be used as the content of an unquoted
* or double-quoted attribute value.
*
* Escapes only single quotes (`'`) but not double quotes (`"`).
*/
static const HtmlEscapeMode SQ_ATTRIBUTE =
const HtmlEscapeMode._('attribute', false, false, true);

/**
* Escaping mode for text going into HTML element content.
*
* The escaping only works for elements with normal HTML content,
* and not for, for example, script or style element content,
* which require escapes matching their particular content syntax.
*
* Escapes `<` and `>` characters.
*/
static const HtmlEscapeMode ELEMENT =
const HtmlEscapeMode._('element', true, false, false, true);

// TODO(floitsch) - Document - Issue 13097
const HtmlEscapeMode._(this._name, this.escapeLtGt, this.escapeQuot,
this.escapeApos, this.escapeSlash);
const HtmlEscapeMode._('element', true, false, false);

const HtmlEscapeMode._(
this._name, this.escapeLtGt, this.escapeQuot, this.escapeApos);

/**
* Create a custom escaping mode.
*
* All modes escape `&`.
* The mode can further be set to escape `<` and `>` ([escapeLtGt]),
* `"` ([escapeQuot]) and/or `'` ([escapeApos]).
*/
const HtmlEscapeMode({String name: "custom",
this.escapeLtGt: false,
this.escapeQuot: false,
this.escapeApos: false}) : _name = name;

String toString() => _name;
}

// TODO(floitsch) - Document - Issue 13097
/**
* Converter which escapes characters with special meaning in HTML.
*
* The converter finds characters that are siginificant in HTML source and
* replaces them with corresponding HTML entities.
*
* The characters that need escaping in HTML are:
*
* * `&` (ampersand) always need to be escaped.
* * `<` (less than) and '>' (greater than) when inside an element.
* * `"` (quote) when inside a double-quoted attribute value.
* * `'` (apostrophe) when inside a single-quoted attribute value.
* Apostrophe is escaped as `&#39;` instead of `&apos;` since
* not all browsers understand `&apos;`.
*
* Escaping `>` (greater than) isn't necessary, but the result is often
* found to be easier to read if greater-than is also escaped whenever
* less-than is.
*/
class HtmlEscape extends Converter<String, String> {

// TODO(floitsch) - Document - Issue 13097
/** The [HtmlEscapeMode] used by the converter. */
final HtmlEscapeMode mode;

// TODO(floitsch) - Document - Issue 13097
/**
* Create converter that escapes HTML characters.
*
* If [mode] is provided as either [HtmlEscapeMode.ATTRIBUTE] or
* [HtmlEscapeMode.ELEMENT], only the corresponding subset of HTML
* characters are escaped.
* The default is to escape all HTML characters.
*/
const HtmlEscape([this.mode = HtmlEscapeMode.UNKNOWN]);

String convert(String text) {
var val = _convert(text, 0, text.length);
return val == null ? text : val;
}

/**
* Converts the substring of text from start to end.
*
* Returns `null` if no changes were necessary, otherwise returns
* the converted string.
*/
String _convert(String text, int start, int end) {
StringBuffer result = null;
for (int i = start; i < end; i++) {
var ch = text[i];
String replace = null;
String replacement = null;
switch (ch) {
case '&': replace = '&amp;'; break;
case '\u00A0'/*NO-BREAK SPACE*/: replace = '&nbsp;'; break;
case '"': if (mode.escapeQuot) replace = '&quot;'; break;
case "'": if (mode.escapeApos) replace = '&#x27;'; break;
case '<': if (mode.escapeLtGt) replace = '&lt;'; break;
case '>': if (mode.escapeLtGt) replace = '&gt;'; break;
case '/': if (mode.escapeSlash) replace = '&#x2F;'; break;
case '&': replacement = '&amp;'; break;
case '"': if (mode.escapeQuot) replacement = '&quot;'; break;
case "'": if (mode.escapeApos) replacement = '&#39;'; break;
case '<': if (mode.escapeLtGt) replacement = '&lt;'; break;
case '>': if (mode.escapeLtGt) replacement = '&gt;'; break;
}
if (replace != null) {
if (result == null) result = new StringBuffer(text.substring(start, i));
result.write(replace);
} else if (result != null) {
result.write(ch);
if (replacement != null) {
if (result == null) result = new StringBuffer();
if (i > start) result.write(text.substring(start, i));
result.write(replacement);
start = i + 1;
}
}

return result != null ? result.toString() : null;
if (result == null) return null;
if (end > start) result.write(text.substring(start, end));
return result.toString();
}

StringConversionSink startChunkedConversion(Sink<String> sink) {
@@ -8,16 +8,19 @@ import 'dart:convert';

const _NOOP = 'Nothing_to_escape';

const _TEST_INPUT = '<A </test> of \u00A0 "double" & \'single\' values>';
const _TEST_INPUT = """<A </test> of \xA0 "double" & 'single' values>""";

const _OUTPUT_UNKNOWN = '&lt;A &lt;&#x2F;test&gt; of &nbsp; &quot;double&quot; &amp; '
'&#x27;single&#x27; values&gt;';
const _OUTPUT_UNKNOWN = '&lt;A &lt;/test&gt; of \xA0 &quot;double&quot; &amp; '
'&#39;single&#39; values&gt;';

const _OUTPUT_ATTRIBUTE = "<A </test> of &nbsp; &quot;double&quot; &amp; "
"\'single\' values>";
const _OUTPUT_ATTRIBUTE =
"<A </test> of \xA0 &quot;double&quot; &amp; 'single' values>";

const _OUTPUT_ELEMENT = '&lt;A &lt;&#x2F;test&gt; of &nbsp; "double" &amp; '
'\'single\' values&gt;';
const _OUTPUT_SQ_ATTRIBUTE =
'<A </test> of \xA0 "double" &amp; &#39;single&#39; values>';

const _OUTPUT_ELEMENT =
"""&lt;A &lt;/test&gt; of \xA0 "double" &amp; 'single' values&gt;""";

void _testMode(HtmlEscape escape, String input, String expected) {
_testConvert(escape, input, expected);
@@ -80,6 +83,7 @@ void main() {
_testMode(const HtmlEscape(), _TEST_INPUT, _OUTPUT_UNKNOWN);
_testMode(const HtmlEscape(HtmlEscapeMode.UNKNOWN), _TEST_INPUT, _OUTPUT_UNKNOWN);
_testMode(const HtmlEscape(HtmlEscapeMode.ATTRIBUTE), _TEST_INPUT, _OUTPUT_ATTRIBUTE);
_testMode(const HtmlEscape(HtmlEscapeMode.SQ_ATTRIBUTE), _TEST_INPUT, _OUTPUT_SQ_ATTRIBUTE);
_testMode(const HtmlEscape(HtmlEscapeMode.ELEMENT), _TEST_INPUT, _OUTPUT_ELEMENT);
_testMode(HTML_ESCAPE, _NOOP, _NOOP);
}

0 comments on commit 8b8223d

Please sign in to comment.
You can’t perform that action at this time.