Skip to content

Commit

Permalink
Iterate more node lists with foreach
Browse files Browse the repository at this point in the history
PHP follows the DOM specification, which requires that NodeList objects
in the DOM are live. As a result, any operation that removes a NodeList
node from its parent (e.g. removeChild, replaceChild or appendChild)
will cause the next node in the iterator to be skipped.

Let’s convert those node lists to arrays to make them static to allow
us to use foreach and drop the index decrementing.

This will make the code a bit clearer
at the cost of slightly more work being performed.
  • Loading branch information
jtojnar committed Mar 18, 2024
1 parent 5166622 commit 01e9b26
Showing 1 changed file with 2 additions and 11 deletions.
13 changes: 2 additions & 11 deletions src/Readability.php
Original file line number Diff line number Diff line change
Expand Up @@ -903,8 +903,7 @@ protected function grabArticle(?\DOMElement $page = null)

$allElements = $page->getElementsByTagName('*');

for ($nodeIndex = 0; $allElements->item($nodeIndex); ++$nodeIndex) {
$node = $allElements->item($nodeIndex);
foreach (iterator_to_array($allElements) as $node) {
$tagName = $node->tagName;

$nodeContent = $node->getInnerHTML();
Expand All @@ -917,7 +916,6 @@ protected function grabArticle(?\DOMElement $page = null)
if (!$this->isNodeVisible($node)) {
$this->logger->debug('Removing invisible node ' . $node->getNodePath());
$node->parentNode->removeChild($node);
--$nodeIndex;
continue;
}

Expand All @@ -930,7 +928,6 @@ protected function grabArticle(?\DOMElement $page = null)
) {
$this->logger->debug('Removing unlikely candidate (using conf) ' . $node->getNodePath() . ' by "' . $unlikelyMatchString . '"');
$node->parentNode->removeChild($node);
--$nodeIndex;
continue;
}

Expand All @@ -949,7 +946,6 @@ protected function grabArticle(?\DOMElement $page = null)
$newNode->setInnerHtml($nodeContent);

$node->parentNode->replaceChild($newNode, $node);
--$nodeIndex;
$nodesToScore[] = $newNode;
} catch (\Exception $e) {
$this->logger->error('Could not alter div/article to p, reverting back to div: ' . $e->getMessage());
Expand Down Expand Up @@ -1216,8 +1212,7 @@ protected function grabArticle(?\DOMElement $page = null)
$parentOfTopCandidate = $topCandidate->parentNode;
$siblingNodes = $parentOfTopCandidate->childNodes;

for ($s = 0, $sl = $siblingNodes->length; $s < $sl; ++$s) {
$siblingNode = $siblingNodes->item($s);
foreach (iterator_to_array($siblingNodes) as $siblingNode) {
$siblingNodeName = $siblingNode->nodeName;
$append = false;
$this->logger->debug('Looking at sibling node: ' . $siblingNode->getNodePath() . ((\XML_ELEMENT_NODE === $siblingNode->nodeType && $siblingNode->hasAttribute('readability')) ? (' with score ' . $siblingNode->getAttribute('readability')) : ''));
Expand Down Expand Up @@ -1260,13 +1255,9 @@ protected function grabArticle(?\DOMElement $page = null)
} catch (\Exception $e) {
$this->logger->debug('Could not alter siblingNode "' . $siblingNodeName . '" to "div", reverting to original.');
$nodeToAppend = $siblingNode;
--$s;
--$sl;
}
} else {
$nodeToAppend = $siblingNode;
--$s;
--$sl;
}

// To ensure a node does not interfere with readability styles, remove its classnames & ids.
Expand Down

0 comments on commit 01e9b26

Please sign in to comment.