Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/continuous-integration.yml
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ jobs:
run: "mkdir -p build/logs"

- name: "Run PHPUnit"
run: "php vendor/bin/simple-phpunit -v"
run: "php vendor/bin/simple-phpunit -v --filter=testWithWipedBody"

phpunit-coverage:
name: "PHPUnit coverage (PHP ${{ matrix.php }})"
Expand Down
4 changes: 4 additions & 0 deletions rector.php
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,9 @@

use Rector\Core\Configuration\Option;
use Rector\Core\ValueObject\PhpVersion;
use Rector\PHPUnit\Set\PHPUnitSetList;
use Rector\Set\ValueObject\LevelSetList;
use Rector\Set\ValueObject\SetList;
use Symfony\Component\DependencyInjection\Loader\Configurator\ContainerConfigurator;

return static function (ContainerConfigurator $containerConfigurator): void {
Expand All @@ -26,6 +28,8 @@

// Define what rule sets will be applied
$containerConfigurator->import(LevelSetList::UP_TO_PHP_72);
$containerConfigurator->import(PHPUnitSetList::PHPUNIT_80);
$containerConfigurator->import(SetList::CODE_QUALITY);

// is your PHP version different from the one your refactor to?
$parameters->set(Option::PHP_VERSION_FEATURES, PhpVersion::PHP_72);
Expand Down
4 changes: 2 additions & 2 deletions src/JSLikeHTMLElement.php
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ public function __set($name, $value)
}

// first, empty the element
if (isset($this->childNodes)) {
if (null !== $this->childNodes) {
for ($x = $this->childNodes->length - 1; $x >= 0; --$x) {
$this->removeChild($this->childNodes->item($x));
}
Expand Down Expand Up @@ -114,7 +114,7 @@ public function __get($name)
if ('innerHTML' === $name) {
$inner = '';

if (isset($this->childNodes)) {
if (null !== $this->childNodes) {
foreach ($this->childNodes as $child) {
$inner .= $this->ownerDocument->saveXML($child);
}
Expand Down
95 changes: 48 additions & 47 deletions src/Readability.php
Original file line number Diff line number Diff line change
Expand Up @@ -220,7 +220,7 @@ public function init(): bool
{
$this->loadHtml();

if (!isset($this->dom->documentElement)) {
if (!(property_exists($this->dom, 'documentElement') && null !== $this->dom->documentElement)) {
return false;
}

Expand All @@ -236,7 +236,7 @@ public function init(): bool
}
}

if ($bodyElems->length > 0 && null === $this->body) {
if ($bodyElems->length > 0 && !$this->body instanceof \DOMElement) {
$this->body = $bodyElems->item(0);
}

Expand Down Expand Up @@ -264,7 +264,12 @@ public function init(): bool
$overlay->appendChild($innerDiv);

// without tidy the body can (sometimes) be wiped, so re-create it
if (false === isset($this->body->childNodes)) {
try {
var_dump($this->body, property_exists($this->body, 'childNodes'), isset($this->body->childNodes), $this->body->childNodes);
if (!(property_exists($this->body, 'childNodes') && null !== $this->body->childNodes)) {
$this->body = $this->dom->createElement('body');
}
} catch (\Exception $e) {
$this->body = $this->dom->createElement('body');
}

Expand Down Expand Up @@ -313,7 +318,7 @@ public function addFootnotes(DOMElement $articleContent): void
$refLink = $this->dom->createElement('a');
$footnote = $this->dom->createElement('li');
$linkDomain = @parse_url($footnoteLink->getAttribute('href'), \PHP_URL_HOST);
if (!$linkDomain && isset($this->url)) {
if (!$linkDomain && null !== $this->url) {
$linkDomain = @parse_url($this->url, \PHP_URL_HOST);
}

Expand Down Expand Up @@ -433,7 +438,7 @@ public function prepArticle(\DOMNode $articleContent): void
}

// add extra text to iframe tag to avoid an auto-closing iframe and then break the html code
if ($iframeCount) {
if (0 !== $iframeCount) {
$iframe = $item->getElementsByTagName('iframe');
$iframe->item(0)->nodeValue = ' ';

Expand Down Expand Up @@ -463,7 +468,7 @@ public function prepArticle(\DOMNode $articleContent): void
*/
public function getInnerText($e, bool $normalizeSpaces = true, bool $flattenLines = false): string
{
if (null === $e || !isset($e->textContent) || '' === $e->textContent) {
if (!$e instanceof \DOMNode || !(property_exists($e, 'textContent') && null !== $e->textContent) || '' === $e->textContent) {
return '';
}

Expand Down Expand Up @@ -679,29 +684,27 @@ public function cleanConditionally(DOMElement $e, string $tag): void
$this->logger->debug(' more than 3 embeds');
$toRemove = true;
}
} else {
if ($img > $p) {
$this->logger->debug(' more image elements than paragraph elements');
$toRemove = true;
} elseif (!$isList && $li > $p) {
$this->logger->debug(' too many <li> elements, and parent is not <ul> or <ol>');
$toRemove = true;
} elseif ($input > floor($p / 3)) {
$this->logger->debug(' too many <input> elements');
$toRemove = true;
} elseif (!$isList && $contentLength < 10 && (0 === $img || $img > 2)) {
$this->logger->debug(' content length less than 10 chars and 0 images, or more than 2 images');
$toRemove = true;
} elseif (!$isList && $weight < 25 && $linkDensity > 0.2) {
$this->logger->debug(' weight is ' . $weight . ' lower than 0 and link density is ' . sprintf('%.2f', $linkDensity) . ' > 0.2');
$toRemove = true;
} elseif ($weight >= 25 && $linkDensity > 0.5) {
$this->logger->debug(' weight above 25 but link density is ' . sprintf('%.2f', $linkDensity) . ' > 0.5');
$toRemove = true;
} elseif ((1 === $embedCount && $contentLength < 75) || $embedCount > 1) {
$this->logger->debug(' 1 embed and content length smaller than 75 chars, or more than one embed');
$toRemove = true;
}
} elseif ($img > $p) {
$this->logger->debug(' more image elements than paragraph elements');
$toRemove = true;
} elseif (!$isList && $li > $p) {
$this->logger->debug(' too many <li> elements, and parent is not <ul> or <ol>');
$toRemove = true;
} elseif ($input > floor($p / 3)) {
$this->logger->debug(' too many <input> elements');
$toRemove = true;
} elseif (!$isList && $contentLength < 10 && (0 === $img || $img > 2)) {
$this->logger->debug(' content length less than 10 chars and 0 images, or more than 2 images');
$toRemove = true;
} elseif (!$isList && $weight < 25 && $linkDensity > 0.2) {
$this->logger->debug(' weight is ' . $weight . ' lower than 0 and link density is ' . sprintf('%.2f', $linkDensity) . ' > 0.2');
$toRemove = true;
} elseif ($weight >= 25 && $linkDensity > 0.5) {
$this->logger->debug(' weight above 25 but link density is ' . sprintf('%.2f', $linkDensity) . ' > 0.5');
$toRemove = true;
} elseif ((1 === $embedCount && $contentLength < 75) || $embedCount > 1) {
$this->logger->debug(' 1 embed and content length smaller than 75 chars, or more than one embed');
$toRemove = true;
}

if ($toRemove) {
Expand Down Expand Up @@ -741,15 +744,15 @@ public function flagIsActive(int $flag): bool
*/
public function addFlag(int $flag): void
{
$this->flags = $this->flags | $flag;
$this->flags |= $flag;
}

/**
* Remove a flag.
*/
public function removeFlag(int $flag): void
{
$this->flags = $this->flags & ~$flag;
$this->flags &= ~$flag;
}

/**
Expand Down Expand Up @@ -829,7 +832,7 @@ protected function prepDocument(): void
*/
protected function initializeNode(DOMElement $node): void
{
if (!isset($node->tagName)) {
if (!(property_exists($node, 'tagName') && null !== $node->tagName)) {
return;
}

Expand Down Expand Up @@ -901,14 +904,14 @@ protected function initializeNode(DOMElement $node): void
*/
protected function grabArticle(DOMElement $page = null)
{
if (!$page) {
if (null === $page) {
$page = $this->dom;
}

$xpath = null;
$nodesToScore = [];

if ($page instanceof \DOMDocument && isset($page->documentElement)) {
if ($page instanceof \DOMDocument && (property_exists($page, 'documentElement') && null !== $page->documentElement)) {
$xpath = new \DOMXPath($page);
}

Expand Down Expand Up @@ -1019,16 +1022,13 @@ protected function grabArticle(DOMElement $page = null)
* A score is determined by things like number of commas, class names, etc.
* Maybe eventually link density.
*/
for ($pt = 0, $scored = \count($nodesToScore); $pt < $scored; ++$pt) {
$ancestors = $this->getAncestors($nodesToScore[$pt], 5);

foreach ($nodesToScore as $pt => $singleNodesToScore) {
$ancestors = $this->getAncestors($singleNodesToScore, 5);
// No parent node? Move on...
if (0 === \count($ancestors)) {
if ([] === $ancestors) {
continue;
}

$innerText = $this->getInnerText($nodesToScore[$pt]);

$innerText = $this->getInnerText($singleNodesToScore);
// If this paragraph is less than MIN_PARAGRAPH_LENGTH (default:20) characters, don't even count it.
if (mb_strlen($innerText) < self::MIN_PARAGRAPH_LENGTH) {
continue;
Expand Down Expand Up @@ -1074,7 +1074,7 @@ protected function grabArticle(DOMElement $page = null)
for ($c = $candidates->length - 1; $c >= 0; --$c) {
$node = $candidates->item($c);
// node should be readable but not inside of an article otherwise it's probably non-readable block
if ($node->hasAttribute('readability') && (int) $node->getAttributeNode('readability')->value < 40 && ($node->parentNode ? 0 !== strcasecmp($node->parentNode->tagName, 'article') : true)) {
if ($node->hasAttribute('readability') && (int) $node->getAttributeNode('readability')->value < 40 && (null !== $node->parentNode ? 0 !== strcasecmp($node->parentNode->tagName, 'article') : true)) {
$this->logger->debug('Removing unlikely candidate (using note) ' . $node->getNodePath() . ' by "' . $node->tagName . '" with readability ' . ($node->hasAttribute('readability') ? (int) $node->getAttributeNode('readability')->value : 0));
$node->parentNode->removeChild($node);
}
Expand All @@ -1093,7 +1093,7 @@ protected function grabArticle(DOMElement $page = null)
* and find the one with the highest score.
*/
$topCandidates = array_fill(0, 5, null);
if ($xpath) {
if (null !== $xpath) {
// Using array of DOMElements after deletion is a path to DOOMElement.
$candidates = $xpath->query('.//*[@data-candidate]', $page->documentElement);
$this->logger->debug('Candidates: ' . $candidates->length);
Expand Down Expand Up @@ -1135,7 +1135,7 @@ protected function grabArticle(DOMElement $page = null)
$topCandidate = $this->dom->createElement('div');

if ($page instanceof \DOMDocument) {
if (!isset($page->documentElement)) {
if (!(property_exists($page, 'documentElement') && null !== $page->documentElement)) {
// we don't have a body either? what a mess! :)
$this->logger->debug('The page has no body!');
} else {
Expand Down Expand Up @@ -1165,7 +1165,8 @@ protected function grabArticle(DOMElement $page = null)
$parentOfTopCandidate = $topCandidate->parentNode;
while ('body' !== $parentOfTopCandidate->nodeName) {
$listsContainingThisAncestor = 0;
for ($ancestorIndex = 0; $ancestorIndex < \count($alternativeCandidateAncestors) && $listsContainingThisAncestor < 3; ++$ancestorIndex) {
$alternativeCandidateAncestorsCount = \count($alternativeCandidateAncestors);
for ($ancestorIndex = 0; $ancestorIndex < $alternativeCandidateAncestorsCount && $listsContainingThisAncestor < 3; ++$ancestorIndex) {
$listsContainingThisAncestor += (int) \in_array($parentOfTopCandidate, $alternativeCandidateAncestors[$ancestorIndex], true);
}
if ($listsContainingThisAncestor >= 3) {
Expand Down Expand Up @@ -1374,7 +1375,7 @@ protected function weightAttribute(DOMElement $element, string $attribute): int
*/
protected function reinitBody(): void
{
if (!isset($this->body->childNodes)) {
if (!(property_exists($this->body, 'childNodes') && null !== $this->body->childNodes)) {
$this->body = $this->dom->createElement('body');
$this->body->setInnerHtml($this->bodyCache);
}
Expand Down Expand Up @@ -1482,7 +1483,7 @@ private function hasSingleTagInsideElement(DOMElement $node, string $tag): bool
preg_match($this->regexps['hasContent'], $this->getInnerText($childNode));
});

return 0 === \count($a);
return [] === $a;
}

/**
Expand Down