Skip to content
This repository has been archived by the owner on Mar 19, 2020. It is now read-only.

Commit

Permalink
More tests, fixes + partial dom.find()
Browse files Browse the repository at this point in the history
  • Loading branch information
TonyBogdanov committed Jan 9, 2018
1 parent 0bbc920 commit ce21743
Show file tree
Hide file tree
Showing 61 changed files with 5,645 additions and 1,105 deletions.
5 changes: 3 additions & 2 deletions bin/generate-docs.sh
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
#!/usr/bin/env bash

dir=$(dirname "$0")
bin="$dir/../vendor/bin/apigen"

$bin generate "$dir/../classes" --destination "$dir/../docs"
cd "$dir/.."

./vendor/bin/apigen generate --destination docs -- classes
130 changes: 123 additions & 7 deletions classes/Dom.php
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@
use SDom\Node\Element;
use SDom\Node\NodeInterface;
use SDom\Node\Text;
use Symfony\Component\CssSelector\Node;
use Symfony\Component\CssSelector\Parser\Parser;

/**
* A class to represent a DOM structure - a collection of nodes.
Expand All @@ -21,6 +23,20 @@ class Dom implements
\IteratorAggregate,
\Countable
{
/**
* Singleton instance to a CSS selector parser.
*
* @var Parser
*/
protected static $selectorParser;

/**
* Singleton instance to a CSS selector matcher.
*
* @var SelectorMatcher
*/
protected static $selectorMatcher;

/**
* Collection of nodes.
*
Expand Down Expand Up @@ -57,17 +73,60 @@ protected static function createInvalidContentException($content): \InvalidArgum
));
}

/**
* Traverse the specified node and all of its child nodes recursively (any node type is accepted, but only element
* nodes are processed) and match against the specified selector tokens.
*
* Add all nodes that match at least one of the selector tokens to the specified Dom collection and return it.
*
* The supplied $effectiveRoot node will be considered as the root of the tree, even if there are more ancestors.
* Child nodes will be treated, when matching, as if they don't have a parent node.
*
* @param Dom $dom
* @param array $selectorTokens
* @param NodeInterface $node
* @param NodeInterface $effectiveRoot
* @return Dom
*/
protected static function traverseMatch(
Dom $dom,
array $selectorTokens,
NodeInterface $node,
NodeInterface $effectiveRoot
): Dom {
if (!$node instanceof Element) {
return $dom;
}

/** @var Node\NodeInterface $selectorToken */
foreach ($selectorTokens as $selectorToken) {
if (self::$selectorMatcher->match($selectorToken, $node, $effectiveRoot)) {
$dom->add($node);
break;
}
}

/** @var NodeInterface $childNode */
foreach ($node as $childNode) {
static::traverseMatch($dom, $selectorTokens, $childNode, $effectiveRoot);
}

return $dom;
}

/**
* Dom constructor.
* Create new collection from the specified content.
*
* If the content is NULL, create empty collection.
* If the content is a Dom instance, copy the nodes into the new collection.
* If the content is a NodeInterface, add it to the collection.
* If the content is a Token, convert it to NodeInterface and add it to the collection.
* If the content is a Token, convert it to the corresponding NodeInterface (tree) and add it to the collection.
* If the content is a TokenCollection, convert it to NodeInterface instances and add them to the collection.
* If the content is a string, parse as HTML and treat as TokenCollection.
*
* Strings with all whitespace produce empty collections. Strings with invalid HTML throw an exception.
*
* @param mixed $content
*/
public function __construct($content = null)
Expand Down Expand Up @@ -105,6 +164,8 @@ public function __construct($content = null)
$node = new Element($content->getName());

/**
* Inherit attributes.
*
* @var string $name
* @var string|bool $value
*/
Expand All @@ -113,6 +174,15 @@ public function __construct($content = null)
$node->setAttribute($name, true === $value ? '' : $value);
}

/**
* Inherit children.
*
* @var Tokens\Token $child
*/
foreach ($content->getChildren() as $child) {
$node->insertAfter((new static($child))->get(0));
}

$this->nodes = [$node];
break;

Expand Down Expand Up @@ -283,8 +353,15 @@ public function children(): Dom
}

/**
* Insert content, specified by the parameter, to the end of immediate child nodes of all Element nodes in the
* collection.
* Insert content after all immediate child nodes of each Element node in the collection.
*
* If any node derived from the content already has a parent node, a cloned copy will be used instead.
* This means that, if appended to more than one Element node, references to each appended node will only point
* to the very first insertion.
*
* E.g. if the same node is appended to two or more Element nodes, it's reference will point to the node with the
* first Element as parent. Nodes appended to all other Element nodes will be cloned copies. The same rule applies
* to child nodes of appended nodes, at any depth, as the whole sub-tree is cloned recursively.
*
* @param $content
* @return Dom
Expand All @@ -296,7 +373,7 @@ public function append($content): Dom
if ($node instanceof Element) {
/** @var NodeInterface $child */
foreach ((new static($content))->nodes as $child) {
$node->insertAfter(null === $child->parent() ? $child : clone $child);
$node->insertAfter(null === $child->parent() ? $child : $child->clone());
}
}
}
Expand All @@ -305,8 +382,17 @@ public function append($content): Dom
}

/**
* Insert content, specified by the parameter, to the beginning of immediate child nodes of all Element nodes in
* the collection.
* Insert content before all immediate child nodes of each Element node in the collection.
*
* If any node derived from the content already has a parent node, a cloned copy will be used instead.
* This means that, if prepended to more than one Element node, references to each prepended node will only point
* to the very first insertion.
*
* E.g. if the same node is prepended to two or more Element nodes, it's reference will point to the node with the
* first Element as parent. Nodes prepended to all other Element nodes will be cloned copies. The same rule applies
* to child nodes of prepended nodes, at any depth, as the whole sub-tree is cloned recursively.
*
* If the supplied content resolves to a collection of nodes, they will be prepended as a group, keeping the order.
*
* @param $content
* @return Dom
Expand All @@ -318,11 +404,41 @@ public function prepend($content): Dom
if ($node instanceof Element) {
/** @var NodeInterface $child */
foreach (array_reverse((new static($content))->nodes) as $child) {
$node->insertBefore($child);
$node->insertBefore(null === $child->parent() ? $child : $child->clone());
}
}
}

return $this;
}

/**
* Return a new Dom collection of all the descendants of each Element node in the current collection,
* filtered by the specified CSS selector.
*
* @param string $selector
* @return Dom
*/
public function find(string $selector): Dom
{
if (!isset(self::$selectorParser)) {
self::$selectorParser = new Parser();
}

if (!isset(self::$selectorMatcher)) {
self::$selectorMatcher = new SelectorMatcher();
}

$dom = new static();
$selectorTokens = self::$selectorParser->parse($selector);

foreach ($this->get() as $rootNode) {
/** @var NodeInterface $childNode */
foreach ($rootNode as $childNode) {
self::traverseMatch($dom, $selectorTokens, $childNode, $rootNode);
}
}

return $dom;
}
}
53 changes: 49 additions & 4 deletions classes/Node/Element.php
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,16 @@ public function __clone()
throw new \BadMethodCallException('Native cloning is not allowed, use clone() instead.');
}

/**
* Retrieve the tag name of the element.
*
* @return string
*/
public function getTag(): string
{
return $this->tag;
}

/**
* Return TRUE if the specified name exists as attribute.
* The attribute name is lowercased.
Expand Down Expand Up @@ -306,20 +316,55 @@ public function isChild(NodeInterface $node): bool
}

/**
* Remove the specified node from the list of immediate children of this node.
* Retrieve a NodeInterface instance (immediate child node) for the specified index.
* Throw \OutOfBoundsException exception if the specified index is out of bounds.
*
* @param int $index
* @return NodeInterface
*/
public function get(int $index): NodeInterface
{
$count = count($this);

if ($index < 0 || $index >= $count) {
throw new \OutOfBoundsException(sprintf(
'The requested node index %d is out of the child list bounds [%s].',
$index,
0 < $count ? '[0; ' . ($count - 1) . ']' : '(empty child list)'
));
}

return $this->children[$index];
}

/**
* Retrieve the positional index of the specified NodeInterface in the list of immediate child nodes.
* If the target node is not an immediate child node of this one, an exception will be thrown.
* The node's detach() method will also be called to release the parent reference if such is set.
*
* @param NodeInterface $node
* @return Element
* @return int
*/
public function removeChild(NodeInterface $node): Element
public function index(NodeInterface $node): int
{
$index = array_search($node, $this->children, true);
if (false === $index) {
throw new \InvalidArgumentException('The specified node is not an immediate child node.');
}

return $index;
}

/**
* Remove the specified node from the list of immediate children of this node.
* If the target node is not an immediate child node of this one, an exception will be thrown.
* The node's detach() method will also be called to release the parent reference if such is set.
*
* @param NodeInterface $node
* @return Element
*/
public function removeChild(NodeInterface $node): Element
{
$index = $this->index($node);
$child = $this->children[$index];

array_splice($this->children, $index, 1);
Expand Down
83 changes: 83 additions & 0 deletions classes/SelectorMatcher.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
<?php

namespace SDom;

use SDom\Node as Dom;
use SDom\SelectorMatcher\AttributeNodeTrait;
use SDom\SelectorMatcher\ClassNodeTrait;
use SDom\SelectorMatcher\CombinedSelectorNodeTrait;
use SDom\SelectorMatcher\ElementNodeTrait;
use SDom\SelectorMatcher\HashNodeTrait;
use Symfony\Component\CssSelector\Node as Css;

/**
* A class for matching nodes against selector tokens.
*
* Class SelectorMatcher
* @package SDom
*/
class SelectorMatcher
{
use ElementNodeTrait;
use AttributeNodeTrait;
use ClassNodeTrait;
use HashNodeTrait;
use CombinedSelectorNodeTrait;

/**
* Check if the supplied word is found in the supplied sentence.
*
* E.g. the sentence begins with the word followed by whitespace, ends with the word preceded with whitespace,
* contains the word surrounded by whitespace or is equal to the word.
*
* The word itself may not contain whitespace.
*
* @param string $word
* @param string $sentence
* @return bool
*/
public static function containsWord(string $word, string $sentence): bool
{
return in_array($word, preg_split('/\s+/', $sentence));
}

/**
* Match the supplied CSS token against the supplied Element node and return TRUE if it is matched.
*
* The $effectiveRoot specifies an Element node part of the hierarchy that is to be considered as root of the tree.
* Immediate child nodes will be treated as if they don't have a parent.
*
* @param Css\NodeInterface $token
* @param Dom\Element $node
* @param Dom\Element|null $effectiveRoot
* @return bool
*/
public function match(Css\NodeInterface $token, Dom\Element $node, Dom\Element $effectiveRoot = null): bool
{
switch (true) {
case $token instanceof Css\SelectorNode:
return $this->match($token->getTree(), $node, $effectiveRoot);

case $token instanceof Css\ElementNode:
return $this->matchElementNode($token, $node, $effectiveRoot);

case $token instanceof Css\AttributeNode:
return $this->matchAttributeNode($token, $node, $effectiveRoot);

case $token instanceof Css\ClassNode:
return $this->matchClassNode($token, $node, $effectiveRoot);

case $token instanceof Css\HashNode:
return $this->matchHashNode($token, $node, $effectiveRoot);

case $token instanceof Css\CombinedSelectorNode:
return $this->matchCombinedSelectorNode($token, $node, $effectiveRoot);

default:
throw new \RuntimeException(sprintf(
'Selector token %s is not supported yet.',
get_class($token)
));
}
}
}
Loading

0 comments on commit ce21743

Please sign in to comment.