Skip to content

Commit

Permalink
Document skips in more detail, #116.
Browse files Browse the repository at this point in the history
Signed-off-by: Edward Z. Yang <ezyang@cs.stanford.edu>
  • Loading branch information
ezyang committed Mar 7, 2017
1 parent 4047a62 commit 353c96f
Show file tree
Hide file tree
Showing 2 changed files with 57 additions and 3 deletions.
58 changes: 56 additions & 2 deletions library/HTMLPurifier/Strategy/MakeWellFormed.php
Expand Up @@ -165,7 +165,7 @@ public function execute($tokens, $config, $context)
if (empty($zipper->front)) break;
$token = $zipper->prev($token);
// indicate that other injectors should not process this token,
// but we need to reprocess it
// but we need to reprocess it. See Note [Injector skips]
unset($token->skip[$i]);
$token->rewind = $i;
if ($token instanceof HTMLPurifier_Token_Start) {
Expand Down Expand Up @@ -210,6 +210,7 @@ public function execute($tokens, $config, $context)
if ($token instanceof HTMLPurifier_Token_Text) {
foreach ($this->injectors as $i => $injector) {
if (isset($token->skip[$i])) {
// See Note [Injector skips]
continue;
}
if ($token->rewind !== null && $token->rewind !== $i) {
Expand Down Expand Up @@ -367,6 +368,7 @@ public function execute($tokens, $config, $context)
if ($ok) {
foreach ($this->injectors as $i => $injector) {
if (isset($token->skip[$i])) {
// See Note [Injector skips]
continue;
}
if ($token->rewind !== null && $token->rewind !== $i) {
Expand Down Expand Up @@ -422,6 +424,7 @@ public function execute($tokens, $config, $context)
$token->start = $current_parent;
foreach ($this->injectors as $i => $injector) {
if (isset($token->skip[$i])) {
// See Note [Injector skips]
continue;
}
if ($token->rewind !== null && $token->rewind !== $i) {
Expand Down Expand Up @@ -566,7 +569,12 @@ protected function processToken($token, $injector = -1)
list($old, $r) = $this->zipper->splice($this->token, $delete, $token);

if ($injector > -1) {
// determine appropriate skips
// See Note [Injector skips]
// Determine appropriate skips. Here's what the code does:
// *If* we deleted one or more tokens, copy the skips
// of those tokens into the skips of the new tokens (in $token).
// Also, mark the newly inserted tokens as having come from
// $injector.
$oldskip = isset($old[0]) ? $old[0]->skip : array();
foreach ($token as $object) {
$object->skip = $oldskip;
Expand Down Expand Up @@ -602,4 +610,50 @@ private function remove()
}
}

// Note [Injector skips]
// ~~~~~~~~~~~~~~~~~~~~~
// When I originally designed this class, the idea behind the 'skip'
// property of HTMLPurifier_Token was to help avoid infinite loops
// in injector processing. For example, suppose you wrote an injector
// that bolded swear words. Naively, you might write it so that
// whenever you saw ****, you replaced it with <strong>****</strong>.
//
// When this happens, we will reprocess all of the tokens with the
// other injectors. Now there is an opportunity for infinite loop:
// if we rerun the swear-word injector on these tokens, we might
// see **** and then reprocess again to get
// <strong><strong>****</strong></strong> ad infinitum.
//
// Thus, the idea of a skip is that once we process a token with
// an injector, we mark all of those tokens as having "come from"
// the injector, and we never run the injector again on these
// tokens.
//
// There were two more complications, however:
//
// - With HTMLPurifier_Injector_RemoveEmpty, we noticed that if
// you had <b><i></i></b>, after you removed the <i></i>, you
// really would like this injector to go back and reprocess
// the <b> tag, discovering that it is now empty and can be
// removed. So we reintroduced the possibility of infinite looping
// by adding a "rewind" function, which let you go back to an
// earlier point in the token stream and reprocess it with injectors.
// Needless to say, we need to UN-skip the token so it gets
// reprocessed.
//
// - Suppose that you successfuly process a token, replace it with
// one with your skip mark, but now another injector wants to
// process the skipped token with another token. Should you continue
// to skip that new token, or reprocess it? If you reprocess,
// you can end up with an infinite loop where one injector converts
// <a> to <b>, and then another injector converts it back. So
// we inherit the skips, but for some reason, I thought that we
// should inherit the skip from the first token of the token
// that we deleted. Why? Well, it seems to work OK.
//
// If I were to redesign this functionality, I would absolutely not
// go about doing it this way: the semantics are just not very well
// defined, and in any case you probably wanted to operate on trees,
// not token streams.

// vim: et sw=4 sts=4
2 changes: 1 addition & 1 deletion library/HTMLPurifier/Token.php
Expand Up @@ -26,7 +26,7 @@ abstract class HTMLPurifier_Token
public $armor = array();

/**
* Used during MakeWellFormed.
* Used during MakeWellFormed. See Note [Injector skips]
* @type
*/
public $skip;
Expand Down

0 comments on commit 353c96f

Please sign in to comment.