Skip to content

Commit

Permalink
run name/content dedupe before munging HTML
Browse files Browse the repository at this point in the history
fix for #53
  • Loading branch information
aaronpk committed Jan 12, 2018
1 parent 4477039 commit 66adfbe
Show file tree
Hide file tree
Showing 3 changed files with 39 additions and 8 deletions.
10 changes: 9 additions & 1 deletion lib/XRay/Formats/Format.php
Original file line number Diff line number Diff line change
Expand Up @@ -84,8 +84,16 @@ protected static function sanitizeHTML($html, $allowImg=true) {
return trim($sanitized);
}

// Return a plaintext version of the input HTML
protected static function stripHTML($html) {
return trim(strip_tags($html));
$config = HTMLPurifier_Config::createDefault();
$config->set('Cache.DefinitionImpl', null);
$config->set('HTML.AllowedElements', ['br']);
$purifier = new HTMLPurifier($config);
$sanitized = $purifier->purify($html);
$sanitized = str_replace("
","\r",$sanitized);
$sanitized = html_entity_decode($sanitized);
return trim(str_replace('<br>',"\n", $sanitized));
}


Expand Down
29 changes: 24 additions & 5 deletions lib/XRay/Formats/Mf2.php
Original file line number Diff line number Diff line change
Expand Up @@ -227,7 +227,8 @@ private static function parseHTMLValue($property, $item) {
$allowImg = true;

$htmlContent = trim(self::sanitizeHTML($content['html'], $allowImg));
$textContent = trim(str_replace("&#xD;","\r",$content['value']));
#$textContent = trim(str_replace("&#xD;","\r",$content['value']));
$textContent = trim(self::stripHTML($htmlContent));
} else {
$textContent = trim($content['value']);
}
Expand Down Expand Up @@ -339,10 +340,13 @@ private static function determineNameAndContent($item, &$data) {
$textContent = null;
$htmlContent = null;

$content = self::parseHTMLValue('content', $item);
if($content) {
$content = self::getHTMLValue($item, 'content');

if(is_string($content)) {
$textContent = $content;
} elseif($content) {
$htmlContent = array_key_exists('html', $content) ? $content['html'] : null;
$textContent = array_key_exists('text', $content) ? $content['text'] : null;
$textContent = array_key_exists('value', $content) ? $content['value'] : null;
}

if($content) {
Expand All @@ -365,8 +369,9 @@ private static function determineNameAndContent($item, &$data) {

// If there is content, always return the plaintext content, and return HTML content if it's different
if($content) {
$content = self::parseHTMLValue('content', $item);
$data['content']['text'] = $content['text'];
if(array_key_exists('html', $content))
if(isset($content['html']))
$data['content']['html'] = $content['html'];
}
}
Expand Down Expand Up @@ -762,6 +767,20 @@ private static function getPlaintext($mf2, $k, $fallback=null) {
return $fallback;
}

private static function getHTMLValue($mf2, $k, $fallback=null) {
// Return an array with html and value if the value is html, otherwise return a string
if(!empty($mf2['properties'][$k]) and is_array($mf2['properties'][$k])) {
// $mf2['properties'][$v] will always be an array since the input was from the mf2 parser
$value = $mf2['properties'][$k][0];
if(is_string($value)) {
return $value;
} elseif(isset($value['html'])) {
return $value;
}
}
return $fallback;
}

private static function getPlaintextValues($mf2, $k, $values=[]) {
if(!empty($mf2['properties'][$k]) and is_array($mf2['properties'][$k])) {
foreach($mf2['properties'][$k] as $value) {
Expand Down
8 changes: 6 additions & 2 deletions tests/SanitizeTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -151,7 +151,7 @@ public function testSanitizeEmailAuthorURL() {
$this->assertEquals('http://sanitize.example/photo.jpg', $data->data->author->photo);
}

public function testPhotoInContent() {
public function testPhotoInContentNoAlt() {
// https://github.com/aaronpk/XRay/issues/52

$url = 'http://sanitize.example/photo-in-content';
Expand All @@ -161,7 +161,11 @@ public function testPhotoInContent() {
$this->assertEquals(200, $response->getStatusCode());
$data = json_decode($body);

#print_r($data->data);
$this->assertObjectNotHasAttribute('name', $data->data);
$this->assertEquals('http://target.example.com/photo.jpg', $data->data->photo[0]);
$this->assertEquals('This is a photo post with an img tag inside the content.', $data->data->content->text);
$this->assertEquals('This is a photo post with an <code>img</code> tag inside the content.', $data->data->content->html);
}

$this->assertObjectNotHasAttribute('name', $data->data);
$this->assertEquals('http://target.example.com/photo.jpg', $data->data->photo[0]);
Expand Down

0 comments on commit 66adfbe

Please sign in to comment.