Skip to content

Commit

Permalink
Latest TagFilter.
Browse files Browse the repository at this point in the history
  • Loading branch information
cubiclesoft committed Mar 25, 2018
1 parent 41cc443 commit ba9ccd1
Showing 1 changed file with 190 additions and 15 deletions.
205 changes: 190 additions & 15 deletions support/tag_filter.php
@@ -1,6 +1,6 @@
<?php
// CubicleSoft PHP Tag Filter class. Can repair broken HTML.
// (C) 2017 CubicleSoft. All Rights Reserved.
// (C) 2018 CubicleSoft. All Rights Reserved.

class TagFilterStream
{
Expand Down Expand Up @@ -110,7 +110,9 @@ public function Process($content)

if ($this->options["keep_comments"])
{
$content2 = "<!-- " . htmlspecialchars(substr($content, $pos + 3, $pos2)) . " -->";
$content2 = substr($content, $pos + 3, $pos2);
if ($this->options["charset"] === "UTF-8" && !self::IsValidUTF8($content2)) $content2 = self::MakeValidUTF8($content2);
$content2 = "<!-- " . htmlspecialchars($content2, ENT_COMPAT | ENT_HTML5, $this->options["charset"]) . " -->";

// Let a callback handle any necessary changes.
if (isset($this->options["content_callback"]) && is_callable($this->options["content_callback"])) call_user_func_array($this->options["content_callback"], array($this->stack, $result, &$content2, $this->options));
Expand Down Expand Up @@ -147,7 +149,9 @@ public function Process($content)
{
// Unknown. Encode it.
$data = substr($content, $cx, strpos($content, $prefix, $cx) + strlen($prefix) - $cx);
$content2 = htmlspecialchars($data);
$content2 = $data;
if ($this->options["charset"] === "UTF-8" && !self::IsValidUTF8($content2)) $content2 = self::MakeValidUTF8($content2);
$content2 = htmlspecialchars($content2, ENT_COMPAT | ENT_HTML5, $this->options["charset"]);

// Let a callback handle any necessary changes.
if (isset($this->options["content_callback"]) && is_callable($this->options["content_callback"])) call_user_func_array($this->options["content_callback"], array($this->stack, $result, &$content2, $this->options));
Expand All @@ -174,7 +178,7 @@ public function Process($content)
$tagname = substr($content, $startpos, $cx - $startpos);
if ($parse)
{
if ($this->options["charset_tags"] && $this->options["charset"] === "UTF-8") $tagname = self::MakeValidUTF8($tagname);
if ($this->options["charset_tags"] && $this->options["charset"] === "UTF-8") $tagname = (self::IsValidUTF8($tagname) ? $tagname : self::MakeValidUTF8($tagname));
else $tagname = preg_replace(($this->options["allow_namespaces"] ? '/[^A-Za-z0-9:._-]/' : '/[^A-Za-z0-9._-]/'), "", $tagname);
}
$tagname = rtrim($tagname, "._-:");
Expand Down Expand Up @@ -232,8 +236,6 @@ public function Process($content)
$attrs = array();
do
{
//echo "State: " . $state . "\n";
//echo "Content:\n" . $content . "\n";
if ($state === "name")
{
// Find attribute key/property.
Expand Down Expand Up @@ -309,8 +311,15 @@ public function Process($content)
}

$keyname = substr($content, $x, $cx - $x);
if ($parse && $this->options["charset_attrs"] && $this->options["charset"] === "UTF-8") $keyname = self::MakeValidUTF8(preg_replace(($this->options["allow_namespaces"] ? '/[^A-Za-z0-9:._\-\x80-\xFF]/' : '/[^A-Za-z0-9._\-\x80-\xFF]/'), "", $keyname));
else $keyname = preg_replace(($this->options["allow_namespaces"] ? '/[^A-Za-z0-9:._-]/' : '/[^A-Za-z0-9._-]/'), "", $keyname);
if ($parse && $this->options["charset_attrs"] && $this->options["charset"] === "UTF-8")
{
$keyname = preg_replace(($this->options["allow_namespaces"] ? '/[^A-Za-z0-9:._\-\x80-\xFF]/' : '/[^A-Za-z0-9._\-\x80-\xFF]/'), "", $keyname);
if (!self::IsValidUTF8($keyname)) $keyname = self::MakeValidUTF8($keyname);
}
else
{
$keyname = preg_replace(($this->options["allow_namespaces"] ? '/[^A-Za-z0-9:._-]/' : '/[^A-Za-z0-9._-]/'), "", $keyname);
}
$keyname = rtrim($keyname, "._-:");
if (!isset($this->options["untouched_tag_attr_keys"][$tagname]) && $this->options["lowercase_attrs"]) $keyname = strtolower($keyname);

Expand Down Expand Up @@ -445,6 +454,7 @@ public function Process($content)

if ($state === "name")
{
if ($this->options["charset"] === "UTF-8" && !self::IsValidUTF8($value)) $value = self::MakeValidUTF8($value);
$value = html_entity_decode($value, ENT_QUOTES | ENT_HTML5, $this->options["charset"]);

// Decode remaining entities.
Expand Down Expand Up @@ -600,7 +610,11 @@ public function Process($content)
$opentag .= " " . $key;

if (is_array($val)) $val = implode(" ", $val);
if (is_string($val)) $opentag .= "=\"" . htmlspecialchars($val) . "\"";
if (is_string($val))
{
if ($this->options["charset"] === "UTF-8" && !self::IsValidUTF8($val)) $val = self::MakeValidUTF8($val);
$opentag .= "=\"" . htmlspecialchars($val, ENT_COMPAT | ENT_HTML5, $this->options["charset"]) . "\"";
}
}
if (($voidtag || isset($this->options["void_tags"][$tagname])) && $this->options["output_mode"] === "xml")
{
Expand Down Expand Up @@ -808,7 +822,7 @@ public static function MakeValidUTF8($data)
else $tempchr3 = 0x00;
if ($y - $x > 3) $tempchr4 = ord($data[$x + 3]);
else $tempchr4 = 0x00;
if ($tempchr == 0x09 || $tempchr == 0x0A || $tempchr == 0x0D || ($tempchr >= 0x20 && $tempchr <= 0x7E))
if (($tempchr >= 0x20 && $tempchr <= 0x7E) || $tempchr == 0x09 || $tempchr == 0x0A || $tempchr == 0x0D)
{
// ASCII minus control and special characters.
$result .= chr($tempchr);
Expand Down Expand Up @@ -878,12 +892,59 @@ public static function MakeValidUTF8($data)
return $result;
}

public static function IsValidUTF8($data)
{
$x = 0;
$y = strlen($data);
while ($x < $y)
{
$tempchr = ord($data{$x});
if (($tempchr >= 0x20 && $tempchr <= 0x7E) || $tempchr == 0x09 || $tempchr == 0x0A || $tempchr == 0x0D) $x++;
else if ($tempchr < 0xC2) return false;
else
{
$left = $y - $x;
if ($left > 1) $tempchr2 = ord($data{$x + 1});
else return false;

if (($tempchr >= 0xC2 && $tempchr <= 0xDF) && ($tempchr2 >= 0x80 && $tempchr2 <= 0xBF)) $x += 2;
else
{
if ($left > 2) $tempchr3 = ord($data{$x + 2});
else return false;

if ($tempchr3 < 0x80 || $tempchr3 > 0xBF) return false;

if ($tempchr == 0xE0 && ($tempchr2 >= 0xA0 && $tempchr2 <= 0xBF)) $x += 3;
else if ((($tempchr >= 0xE1 && $tempchr <= 0xEC) || $tempchr == 0xEE || $tempchr == 0xEF) && ($tempchr2 >= 0x80 && $tempchr2 <= 0xBF)) $x += 3;
else if ($tempchr == 0xED && ($tempchr2 >= 0x80 && $tempchr2 <= 0x9F)) $x += 3;
else
{
if ($left > 3) $tempchr4 = ord($data{$x + 3});
else return false;

if ($tempchr4 < 0x80 || $tempchr4 > 0xBF) return false;

if ($tempchr == 0xF0 && ($tempchr2 >= 0x90 && $tempchr2 <= 0xBF)) $x += 4;
else if (($tempchr >= 0xF1 && $tempchr <= 0xF3) && ($tempchr2 >= 0x80 && $tempchr2 <= 0xBF)) $x += 4;
else if ($tempchr == 0xF4 && ($tempchr2 >= 0x80 && $tempchr2 <= 0x8F)) $x += 4;
else return false;
}
}
}
}

return true;
}

public static function UTF8Chr($num)
{
if ($num < 0 || ($num >= 0xD800 && $num <= 0xDFFF) || ($num >= 0xFDD0 && $num <= 0xFDEF) || ($num & 0xFFFE) == 0xFFFE) return "";

if ($num <= 0x7F) $result = chr($num);
else if ($num <= 0x7FF) $result = chr(0xC0 | (($num & 0x7C0) >> 6)) . chr(0x80 | ($num & 0x3F));
else if ($num <= 0xFFFF) $result = chr(0xE0 | (($num & 0xF000) >> 6)) . chr(0x80 | (($num & 0xFC0) >> 6)) . chr(0x80 | ($num & 0x3F));
else if ($num <= 0x1FFFFF) $result = chr(0xF0 | (($num & 0x1C0000) >> 6)) . chr(0x80 | (($num & 0x3F000) >> 6)) . chr(0x80 | (($num & 0xFC0) >> 6)) . chr(0x80 | ($num & 0x3F));
else if ($num <= 0x7FF) $result = chr(0xC0 | ($num >> 6)) . chr(0x80 | ($num & 0x3F));
else if ($num <= 0xFFFF) $result = chr(0xE0 | ($num >> 12)) . chr(0x80 | (($num >> 6) & 0x3F)) . chr(0x80 | ($num & 0x3F));
else if ($num <= 0x10FFFF) $result = chr(0xF0 | ($num >> 18)) . chr(0x80 | (($num >> 12) & 0x3F)) . chr(0x80 | (($num >> 6) & 0x3F)) . chr(0x80 | ($num & 0x3F));
else $result = "";

return $result;
Expand Down Expand Up @@ -1621,7 +1682,11 @@ public function Implode($id, $options = array())
$result .= " " . $key;

if (is_array($val)) $val = implode(" ", $val);
if (is_string($val)) $result .= "=\"" . htmlspecialchars($val) . "\"";
if (is_string($val))
{
if ($this->options["charset"] === "UTF-8" && !self::IsValidUTF8($val)) $val = self::MakeValidUTF8($val);
$result .= "=\"" . htmlspecialchars($val, ENT_COMPAT | ENT_HTML5, $this->options["charset"]) . "\"";
}
}
$result .= (!$maxpos && $options["output_mode"] === "xml" ? " />" : ">");
}
Expand All @@ -1641,7 +1706,7 @@ public function Implode($id, $options = array())

if ($pos >= $maxpos)
{
if ($maxpos && $this->nodes[$id]["type"] === "element")
if ($this->nodes[$id]["type"] === "element" && is_array($this->nodes[$id]["children"]))
{
if (($include || $rootid != $id) && isset($types[$this->nodes[$id]["type"]])) $result .= "</" . $this->nodes[$id]["tag"] . ">";
}
Expand Down Expand Up @@ -1932,6 +1997,116 @@ public function Replace($id, $src, $inneronly = false)
return $this->Move($src, $newpid, $newpos);
}

private static function SplitAt_CopyNode($nodes, &$pid, $node)
{
// Copy the node.
$node["parent"] = $pid;
$node["parentpos"] = count($nodes->nodes[$pid]["children"]);
if (isset($node["children"])) $node["children"] = (is_array($node["children"]) ? array() : false);

// Attach the node.
$nodes->nodes[$nodes->nextid] = $node;
$nodes->nodes[$pid]["children"][] = $nodes->nextid;

$pid = $nodes->nextid;

$nodes->nextid++;
}

public function SplitAt($ids, $keepidparents = false)
{
$ids2 = array();
if (!is_array($ids)) $ids = array($ids);
foreach ($ids as $id) $ids2[(int)$id] = true;
unset($ids2[0]);

$result = array();

// Walk the entire set of nodes, cloning until an ID match occurs (if any).
$newnodes = new TagFilterNodes();
$newpid = 0;
$id = 0;
$pos = 0;
$maxpos = (isset($this->nodes[$id]["children"]) && is_array($this->nodes[$id]["children"]) ? count($this->nodes[$id]["children"]) : 0);
do
{
if (!$pos)
{
if (isset($ids2[$id]) && count($newnodes->nodes[0]["children"]))
{
// Found an ID match.
$result[] = $newnodes;
$newnodes = new TagFilterNodes();
$newpid = 0;

if ($keepidparents)
{
$stack = array();
$id2 = $this->nodes[$id]["parent"];
while ($id2)
{
$stack[] = $id2;

$id2 = $this->nodes[$id2]["parent"];
}
$stack = array_reverse($stack);
foreach ($stack as $id2)
{
self::SplitAt_CopyNode($newnodes, $newpid, $this->nodes[$id2]);
}
}
}

if ($id) self::SplitAt_CopyNode($newnodes, $newpid, $this->nodes[$id]);
}

if ($pos >= $maxpos)
{
if (!$id) break;

if (isset($ids2[$id]))
{
// Start a new set of nodes.
$result[] = $newnodes;
$newnodes = new TagFilterNodes();
$newpid = 0;

$stack = array();
$id2 = $this->nodes[$id]["parent"];
while ($id2)
{
$stack[] = $id2;

$id2 = $this->nodes[$id2]["parent"];
}
$stack = array_reverse($stack);
foreach ($stack as $id2)
{
self::SplitAt_CopyNode($newnodes, $newpid, $this->nodes[$id2]);
}
}
else
{
$newpid = $newnodes->nodes[$newpid]["parent"];
}

$pos = $this->nodes[$id]["parentpos"] + 1;
$id = $this->nodes[$id]["parent"];
$maxpos = count($this->nodes[$id]["children"]);
}
else
{
$id = $this->nodes[$id]["children"][$pos];
$pos = 0;
$maxpos = (isset($this->nodes[$id]["children"]) && is_array($this->nodes[$id]["children"]) ? count($this->nodes[$id]["children"]) : 0);
}
} while (1);

if (!count($result) || count($newnodes->nodes[0]["children"])) $result[] = $newnodes;

return $result;
}

public function GetOuterHTML($id, $mode = "html")
{
return $this->Implode($id, array("output_mode" => $mode));
Expand Down

0 comments on commit ba9ccd1

Please sign in to comment.