diff --git a/.pylintrc b/.pylintrc new file mode 100644 index 0000000..5600ccc Binary files /dev/null and b/.pylintrc differ diff --git a/python/__init__.py b/python/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/python/dom/NotImplemented/Attr.py b/python/dom/NotImplemented/Attr.py deleted file mode 100644 index e9d9b08..0000000 --- a/python/dom/NotImplemented/Attr.py +++ /dev/null @@ -1,2 +0,0 @@ -class Attr: - pass \ No newline at end of file diff --git a/python/dom/NotImplemented/CDATASection.py b/python/dom/NotImplemented/CDATASection.py deleted file mode 100644 index b9e6035..0000000 --- a/python/dom/NotImplemented/CDATASection.py +++ /dev/null @@ -1,2 +0,0 @@ -class CDATASection: - pass \ No newline at end of file diff --git a/python/dom/NotImplemented/CharacterData.py b/python/dom/NotImplemented/CharacterData.py deleted file mode 100644 index 7d8dbeb..0000000 --- a/python/dom/NotImplemented/CharacterData.py +++ /dev/null @@ -1,2 +0,0 @@ -class CharacterData: - pass \ No newline at end of file diff --git a/python/dom/NotImplemented/Comment.py b/python/dom/NotImplemented/Comment.py deleted file mode 100644 index 65d2c49..0000000 --- a/python/dom/NotImplemented/Comment.py +++ /dev/null @@ -1,2 +0,0 @@ -class Comment: - pass \ No newline at end of file diff --git a/python/dom/NotImplemented/DOMException.py b/python/dom/NotImplemented/DOMException.py deleted file mode 100644 index 2d49d6a..0000000 --- a/python/dom/NotImplemented/DOMException.py +++ /dev/null @@ -1,2 +0,0 @@ -class DOMException: - pass \ No newline at end of file diff --git a/python/dom/NotImplemented/DOMImplementation.py b/python/dom/NotImplemented/DOMImplementation.py deleted file mode 100644 index 9b49ccd..0000000 --- a/python/dom/NotImplemented/DOMImplementation.py +++ /dev/null @@ -1,2 +0,0 @@ -class DOMImplementation: - pass \ No newline at end of file diff --git a/python/dom/NotImplemented/DOMString.py b/python/dom/NotImplemented/DOMString.py deleted file mode 100644 index e24a09b..0000000 --- a/python/dom/NotImplemented/DOMString.py +++ /dev/null @@ -1,2 +0,0 @@ -class DOMString: - pass \ No newline at end of file diff --git a/python/dom/NotImplemented/Document.py b/python/dom/NotImplemented/Document.py deleted file mode 100644 index ed2c476..0000000 --- a/python/dom/NotImplemented/Document.py +++ /dev/null @@ -1,2 +0,0 @@ -class Document: - pass \ No newline at end of file diff --git a/python/dom/NotImplemented/DocumentFragment.py b/python/dom/NotImplemented/DocumentFragment.py deleted file mode 100644 index be955b6..0000000 --- a/python/dom/NotImplemented/DocumentFragment.py +++ /dev/null @@ -1,2 +0,0 @@ -class DocumentFragment: - pass \ No newline at end of file diff --git a/python/dom/NotImplemented/DocumentType.py b/python/dom/NotImplemented/DocumentType.py deleted file mode 100644 index 881d240..0000000 --- a/python/dom/NotImplemented/DocumentType.py +++ /dev/null @@ -1,2 +0,0 @@ -class DocumentType: - pass \ No newline at end of file diff --git a/python/dom/NotImplemented/Element.py b/python/dom/NotImplemented/Element.py deleted file mode 100644 index f88fa1a..0000000 --- a/python/dom/NotImplemented/Element.py +++ /dev/null @@ -1,2 +0,0 @@ -class Element: - pass \ No newline at end of file diff --git a/python/dom/NotImplemented/Entity.py b/python/dom/NotImplemented/Entity.py deleted file mode 100644 index c39e2ed..0000000 --- a/python/dom/NotImplemented/Entity.py +++ /dev/null @@ -1,2 +0,0 @@ -class Entity: - pass \ No newline at end of file diff --git a/python/dom/NotImplemented/EntityReference.py b/python/dom/NotImplemented/EntityReference.py deleted file mode 100644 index 4324696..0000000 --- a/python/dom/NotImplemented/EntityReference.py +++ /dev/null @@ -1,2 +0,0 @@ -class EntityReference: - pass \ No newline at end of file diff --git a/python/dom/NotImplemented/NamedNodeMap.py b/python/dom/NotImplemented/NamedNodeMap.py deleted file mode 100644 index a846d98..0000000 --- a/python/dom/NotImplemented/NamedNodeMap.py +++ /dev/null @@ -1,2 +0,0 @@ -class NamedNodeMap: - pass \ No newline at end of file diff --git a/python/dom/NotImplemented/Node.py b/python/dom/NotImplemented/Node.py deleted file mode 100644 index e5d5f12..0000000 --- a/python/dom/NotImplemented/Node.py +++ /dev/null @@ -1,2 +0,0 @@ -class Node: - pass \ No newline at end of file diff --git a/python/dom/NotImplemented/NodeList.py b/python/dom/NotImplemented/NodeList.py deleted file mode 100644 index ff5b1cd..0000000 --- a/python/dom/NotImplemented/NodeList.py +++ /dev/null @@ -1,2 +0,0 @@ -class NodeList: - pass \ No newline at end of file diff --git a/python/dom/NotImplemented/Notation.py b/python/dom/NotImplemented/Notation.py deleted file mode 100644 index fbf275f..0000000 --- a/python/dom/NotImplemented/Notation.py +++ /dev/null @@ -1,2 +0,0 @@ -class Notation: - pass \ No newline at end of file diff --git a/python/dom/NotImplemented/ProcessingInstruction.py b/python/dom/NotImplemented/ProcessingInstruction.py deleted file mode 100644 index 77a39c2..0000000 --- a/python/dom/NotImplemented/ProcessingInstruction.py +++ /dev/null @@ -1,2 +0,0 @@ -class ProcessingInstruction: - pass \ No newline at end of file diff --git a/python/dom/NotImplemented/Text.py b/python/dom/NotImplemented/Text.py deleted file mode 100644 index 26d222c..0000000 --- a/python/dom/NotImplemented/Text.py +++ /dev/null @@ -1,2 +0,0 @@ -class Text: - pass \ No newline at end of file diff --git a/python/dom/NotImplemented/__init__.py b/python/dom/NotImplemented/__init__.py deleted file mode 100644 index 63df183..0000000 --- a/python/dom/NotImplemented/__init__.py +++ /dev/null @@ -1,21 +0,0 @@ -from __future__ import absolute_import - -from python.dom.NotImplemented.Attr import Attr -from python.dom.NotImplemented.CDATASection import CDATASection -from python.dom.NotImplemented.CharacterData import CharacterData -from python.dom.NotImplemented.Comment import Comment -from python.dom.NotImplemented.Document import Document -from python.dom.NotImplemented.DocumentFragment import DocumentFragment -from python.dom.NotImplemented.DocumentType import DocumentType -from python.dom.NotImplemented.DOMException import DOMException -from python.dom.NotImplemented.DOMImplementation import DOMImplementation -from python.dom.NotImplemented.DOMString import DOMString -from python.dom.NotImplemented.Element import Element -from python.dom.NotImplemented.Entity import Entity -from python.dom.NotImplemented.EntityReference import EntityReference -from python.dom.NotImplemented.NamedNodeMap import NamedNodeMap -from python.dom.NotImplemented.Node import Node -from python.dom.NotImplemented.NodeList import NodeList -from python.dom.NotImplemented.Notation import Notation -from python.dom.NotImplemented.ProcessingInstruction import ProcessingInstruction -from python.dom.NotImplemented.Text import Text diff --git a/python/dom/__init__.py b/python/dom/__init__.py deleted file mode 100644 index c396168..0000000 --- a/python/dom/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from __future__ import absolute_import diff --git a/python/dom/type_checking.py b/python/dom/type_checking.py deleted file mode 100644 index b2e24a5..0000000 --- a/python/dom/type_checking.py +++ /dev/null @@ -1,23 +0,0 @@ -from __future__ import absolute_import - -from typing import Union, TYPE_CHECKING - -if TYPE_CHECKING: - from python.dom.NotImplemented import * - - AnyNode = Union[ - Node, - # Ordered by NodeType - Element, - Attr, - Text, - CDATASection, - EntityReference, - Entity, - ProcessingInstruction, - Comment, - Document, - DocumentType, - DocumentFragment, - Notation, - ] diff --git a/python/parser/__init__.py b/python/parser/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/w3/__init__.py b/w3/__init__.py new file mode 100644 index 0000000..f5b457c --- /dev/null +++ b/w3/__init__.py @@ -0,0 +1,13 @@ +"""A Python HTML Parser +This is a personal project owned by Hepheir. + +https://github.com/Hepheir/Python-HTML-Parser/ +""" + +from w3 import dom +from w3 import parser + + +__version__ = '0.0.0' + +__author__ = 'hepheir@gmail.com' diff --git a/w3/dom.py b/w3/dom.py new file mode 100644 index 0000000..34da2ff --- /dev/null +++ b/w3/dom.py @@ -0,0 +1,9 @@ +"""API Module of DOM - Level 1""" + + +# Bring in subpackages. +from w3.python.core import DOMException +from w3.python.core import DOMImplementation +from w3.python.core import Node +from w3.python.core import NodeType +from w3.python.core import NodeList diff --git a/w3/parser.py b/w3/parser.py new file mode 100644 index 0000000..43b70bf --- /dev/null +++ b/w3/parser.py @@ -0,0 +1 @@ +"""A Parser module for building Document Object Model Structure parsed from text/html.""" diff --git a/w3/python/__init__.py b/w3/python/__init__.py new file mode 100644 index 0000000..b687bb7 --- /dev/null +++ b/w3/python/__init__.py @@ -0,0 +1,6 @@ +"""Imports core names of w3. + +Interfaces are implemented under "./core/" +""" + +from w3.python import core diff --git a/w3/python/core/__init__.py b/w3/python/core/__init__.py new file mode 100644 index 0000000..07f7bd1 --- /dev/null +++ b/w3/python/core/__init__.py @@ -0,0 +1,7 @@ +"""Module that contains classes of interfaces and ect.""" + +from w3.python.core.fundamental_interface import DOMException +from w3.python.core.fundamental_interface import DOMImplementation +from w3.python.core.fundamental_interface import Node +from w3.python.core.fundamental_interface import NodeType +from w3.python.core.fundamental_interface import NodeList diff --git a/w3/python/core/fundamental_interface/DOMException.py b/w3/python/core/fundamental_interface/DOMException.py new file mode 100644 index 0000000..cbc83c1 --- /dev/null +++ b/w3/python/core/fundamental_interface/DOMException.py @@ -0,0 +1,36 @@ +import enum + + +class ExceptionCode(enum.IntEnum): + """Definition group `ExceptionCode` + An integer indicating the type of error generated. + """ + INDEX_SIZE_ERR = 1 + DOMSTRING_SIZE_ERR = 2 + HIERARCHY_REQUEST_ERR = 3 + WRONG_DOCUMENT_ERR = 4 + INVALID_CHARACTER_ERR = 5 + NO_DATA_ALLOWED_ERR = 6 + NO_MODIFICATION_ALLOWED_ERR = 7 + NOT_FOUND_ERR = 8 + NOT_SUPPORTED_ERR = 9 + INUSE_ATTRIBUTE_ERR = 10 + + +class DOMException(Exception): + """Exception DOMException + + DOM operations only raise exceptions in "exceptional" circumstances, i.e., when an operation is impossible to perform (either for logical reasons, because data is lost, or because the implementation has become unstable). In general, DOM methods return specific error values in ordinary processing situation, such as out-of-bound errors when using `NodeList`. + Implementations may raise other exceptions under other circumstances. For example, implementations may raise an implementation-dependent exception if a null argument is passed. + Some languages and object systems do not support the concept of exceptions. For such systems, error conditions may be indicated using native error reporting mechanisms. For some bindings, for example, methods may return error codes similar to those listed in the corresponding method descriptions. + """ + INDEX_SIZE_ERR = ExceptionCode.INDEX_SIZE_ERR + DOMSTRING_SIZE_ERR = ExceptionCode.DOMSTRING_SIZE_ERR + HIERARCHY_REQUEST_ERR = ExceptionCode.HIERARCHY_REQUEST_ERR + WRONG_DOCUMENT_ERR = ExceptionCode.WRONG_DOCUMENT_ERR + INVALID_CHARACTER_ERR = ExceptionCode.INVALID_CHARACTER_ERR + NO_DATA_ALLOWED_ERR = ExceptionCode.NO_DATA_ALLOWED_ERR + NO_MODIFICATION_ALLOWED_ERR = ExceptionCode.NO_MODIFICATION_ALLOWED_ERR + NOT_FOUND_ERR = ExceptionCode.NOT_FOUND_ERR + NOT_SUPPORTED_ERR = ExceptionCode.NOT_SUPPORTED_ERR + INUSE_ATTRIBUTE_ERR = ExceptionCode.INUSE_ATTRIBUTE_ERR diff --git a/w3/python/core/fundamental_interface/DOMException_test.py b/w3/python/core/fundamental_interface/DOMException_test.py new file mode 100644 index 0000000..46f7f90 --- /dev/null +++ b/w3/python/core/fundamental_interface/DOMException_test.py @@ -0,0 +1,18 @@ +import unittest + +from w3.dom import DOMException + + +class Test_DOMException(unittest.TestCase): + def test_raise_INDEX_SIZE_ERR(self): + try: + raise DOMException(DOMException.INDEX_SIZE_ERR) + except DOMException as e: + code = e.args[0] + self.assertEqual(code, DOMException.INDEX_SIZE_ERR) + else: + self.fail() + + +if __name__ == '__main__': + unittest.main() diff --git a/w3/python/core/fundamental_interface/DOMImplementation.py b/w3/python/core/fundamental_interface/DOMImplementation.py new file mode 100644 index 0000000..b3778d1 --- /dev/null +++ b/w3/python/core/fundamental_interface/DOMImplementation.py @@ -0,0 +1,29 @@ +from __future__ import absolute_import + +from w3.python.core.type import DOMString + + +class DOMImplementation: + """Interface DOMImplementation + + The `DOMImplementation` interface provides a number of methods for performing operations that are independent of any particular instance of the document object model. + The DOM Level 1 does not specify a way of creating a document instance, and hence document creation is an operation specific to an implementation. Future Levels of the DOM specification are expected to provide methods for creating documents directly. + """ + + # TODO + def has_feature(self, + feature: DOMString, + version: DOMString) -> bool: + """ + Test if the DOM implementation implements a specific feature. + + Args: + feature: The package name of the feature to test. In Level 1, the legal values are "HTML" and "XML" (case-insensitive). + version: This is the version number of the package name to test. In Level 1, this is the string "1.0". If the version is not specified, supporting any version of the feature will cause the method to return true. + + Returns: + `True` if the feature is implemented in the specified version, `False` otherwise. + + This method raises no exceptions. + """ + raise NotImplementedError diff --git a/w3/python/core/fundamental_interface/Node.py b/w3/python/core/fundamental_interface/Node.py new file mode 100644 index 0000000..ae27bb3 --- /dev/null +++ b/w3/python/core/fundamental_interface/Node.py @@ -0,0 +1,272 @@ +from __future__ import annotations + +import enum +from typing import Dict, Iterable, Optional + +from w3.python.core.fundamental_interface.DOMException import DOMException +from w3.python.core.fundamental_interface.NodeList import NodeList +from w3.python.core.type import DOMString + + +class NodeType(enum.IntEnum): + """Definition group `NodeType` + + An integer indicating which type of node this is. + + Attributes: + ELEMENT_NODE: The node is a `Element`. + ATTRIBUTE_NODE: The node is an `Attr`. + TEXT_NODE: The node is a `Text` node. + CDATA_SECTION_NODE: The node is a `CDATASection`. + ENTITY_REFERENCE_NODE: The node is an `EntityReference`. + ENTITY_NODE: The node is an `Entity`. + PROCESSING_INSTRUCTION_NODE: The node is a `ProcessingInstruction`. + COMMENT_NODE: The node is a `Comment`. + DOCUMENT_NODE: The node is a `Document`. + DOCUMENT_TYPE_NODE: The node is a `DocumentType`. + DOCUMENT_FRAGMENT_NODE: The node is a `DocumentFragment`. + NOTATION_NODE: The node is a `Notation`. + """ + ELEMENT_NODE = 1 + ATTRIBUTE_NODE = 2 + TEXT_NODE = 3 + CDATA_SECTION_NODE = 4 + ENTITY_REFERENCE_NODE = 5 + ENTITY_NODE = 6 + PROCESSING_INSTRUCTION_NODE = 7 + COMMENT_NODE = 8 + DOCUMENT_NODE = 9 + DOCUMENT_TYPE_NODE = 10 + DOCUMENT_FRAGMENT_NODE = 11 + NOTATION_NODE = 12 + + +class Node: + """Interface `Node` + + The `Node` interface is the primary datatype for the entire Document Object Model. It represents a single node in the document tree. While all objects implementing the `Node` interface expose methods for dealing with children, not all objects implementing the `Node` interface may have children. For example, `Text` nodes may not have children, and adding children to such nodes results in a `DOMException` being raised. + The attributes `node_name`, `node_value` and attributes are included as a mechanism to get at node information without casting down to the specific derived interface. In cases where there is no obvious mapping of these attributes for a specific `node_type` (e.g., `node_value` for an Element or `attributes` for a Comment), this returns `None`. Note that the specialized interfaces may contain additional and more convenient mechanisms to get and set the relevant information. + + Attributes: + node_name: The name of this node, depending on its type. + node_value: The value of this node, depending on its type. + node_type: A code representing the type of the underlying object. + parent_node: The parent of this node. + child_nodes: A `NodeList` that contains all children of this node. + first_child: The first child of this node. + last_child: The last child of this node. + previous_sibling: The node immediately preceding this node. + next_sibling: The node immediately following this node. + attributes: A `NamedNodeMap` containing the attributes of this node. + owner_document: The `Document` object associated with this node. + """ + + def __init__(self, + node_type: NodeType, + node_name: DOMString, + node_value: Optional[DOMString] = None, + parent_node: Optional[_AnyNode] = None, + child_nodes: Optional[Iterable[_AnyNode]] = None, + attributes: Optional[Iterable[_AnyNode]] = None, + owner_document: Optional[_Document] = None, + read_only: bool = False) -> None: + if node_value is None: + node_value = '' + self._read_only = False # Allow to modify only while initiating. + self._set_node_type(node_type) + self._set_node_name(node_name) + self._set_node_value(node_value) + self._set_parent_node(parent_node) + self._init_child_nodes(child_nodes) + self._init_attributes(attributes) + self._set_owner_document(owner_document) + self._read_only = bool(read_only) + # Attributes + self._node_type: NodeType + self._node_name: DOMString + self._node_value: DOMString + self._parent_node: _AnyNode + self._child_nodes: NodeList + self._attributes: _NamedNodeMap + self._owner_document: _Document + self._read_only: bool + + def _check_modifiable(self) -> None: + """Checks if this node is modifiable. + + Raises: + DOMException: + - `NO_MODIFICATION_ALLOWED_ERR`: Raised when the node is readonly. + """ + if self._read_only: + raise DOMException(DOMException.NO_MODIFICATION_ALLOWED_ERR) + + @property + def node_name(self) -> DOMString: + """Read only; The name of this node, depending on its type.""" + return self._node_name + + def _set_node_name(self, name: DOMString) -> None: + """Indirect accessor to set the 'node_name' property.""" + self._node_name = DOMString(name) + + @property + def node_value(self) -> DOMString: + """The value of this node, depending on its type. + + Raises: + DOMException: + - `NO_MODIFICATION_ALLOWED_ERR`: Raised when the node is readonly. (on setting) + - `DOMSTRING_SIZE_ERR`: Raised when it would return more characters than fit in a `DOMString` variable on the implementation platform. (on retrieval) + """ + return self._node_value + + @node_value.setter + def node_value(self, value: DOMString) -> None: + self._set_node_value(value) + + def _set_node_value(self, value: DOMString) -> None: + """Indirect accessor to set the 'node_value' property. + + Raises: + DOMException: + - `NO_MODIFICATION_ALLOWED_ERR`: Raised when the node is readonly. + """ + self._check_modifiable() + self._node_value = DOMString(value) + + @property + def node_type(self) -> NodeType: + """Read only; A code representing the type of the underlying object, as defined in `NodeType`.""" + return self._node_type + + def _set_node_type(self, node_type: NodeType) -> None: + """Indirect accessor to set the 'node_type' property.""" + if node_type not in NodeType: + raise ValueError(f'{node_type} is not a valid code ' + 'for a node type.') + self._node_type = node_type + + @property + def parent_node(self) -> Optional[_AnyNode]: + """The parent of this node. + + All nodes, except `Document`, `DocumentFragment`, and `Attr` may have a parent. + However, if a node has just been created and not yet added to the tree, or if it has been removed from the tree, this is `None`. + """ + return self._parent_node + + def _set_parent_node(self, + parent_node: Optional[_AnyNode]) -> None: + """Indirect accessor to set the 'node_type' property.""" + if not parent_node: + self._parent_node = None + else: + self._parent_node = parent_node + + @property + def child_nodes(self) -> NodeList: + """A `NodeList` that contains all children of this node. + + If there are no children, this is a `NodeList` containing no nodes. + The content of the returned `NodeList` is "live" in the sense that, for instance, changes to the children of the node object that it was created from are immediately reflected in the nodes returned by the `NodeList` accessors; it is not a static snapshot of the content of the node. + This is true for every `NodeList`, including the ones returned by the `getElementsByTagName` method. + """ + return self._child_nodes + + def _init_child_nodes(self, + child_nodes: Optional[Iterable[_AnyNode]] = None) -> None: + """Accessor to set the 'child_nodes' property.""" + if child_nodes is None: + self._child_nodes = NodeList() + else: + self._child_nodes = NodeList(iter(child_nodes)) + + @property + def first_child(self) -> Optional[_AnyNode]: + """The first child of this node. + + If there is no such node, this returns `None`. + """ + if not self.child_nodes: + return None + return self.child_nodes.item(0) + + @property + def last_child(self) -> Optional[_AnyNode]: + """The last child of this node. + + If there is no such node, this returns `None`. + """ + if not self.child_nodes: + return None + return self.child_nodes.item(self.child_nodes.length-1) + + @property + def previous_sibling(self) -> Optional[_AnyNode]: + """The node immediately preceding this node. + + If there is no such node, this returns `None`. + """ + if self.parent_node is None: + return None + if self.parent_node.first_child is self: + return None + nth_child = self._nth_child_of_parent() + return self.parent_node.child_nodes.item(nth_child-1) + + @property + def next_sibling(self) -> Optional[_AnyNode]: + """The node immediately following this node. + + If there is no such node, this returns `None`. + """ + if self.parent_node is None: + return None + if self.parent_node.last_child is self: + return None + nth_child = self._nth_child_of_parent() + return self.parent_node.child_nodes.item(nth_child+1) + + def _nth_child_of_parent(self) -> Optional[int]: + """Accessor that indicates how many siblings are there preceding this node. + + If there is no such parent node, this returns `None`. + """ + if self.parent_node is None: + return None + return self.parent_node.child_nodes.index(self) + + @property + def attributes(self) -> _NamedNodeMap: + """A `NamedNodeMap` containing the attributes of this node (if it is an `Element`) or `None` otherwise.""" + return self._attributes + + def _init_attributes(self, + attributes: Optional[Iterable[_AnyNode]] = None) -> None: + self._attributes: _NamedNodeMap = {} # TODO: Replace with real NamedNodeMap #19 + if attributes is None: + return + for attr in iter(attributes): + self._attributes.set_named_item(attr) + + @property + def owner_document(self) -> Optional[_Document]: + """The `Document` object associated with this node. + + This is also the `Document` object used to create new nodes. + When this node is a `Document` this is `None`. + """ + if self.node_type == NodeType.DOCUMENT_NODE: + return None + return self._owner_document + + def _set_owner_document(self, + owner_document: Optional[_Document] = None) -> None: + """Indirect accessor to set the 'owner_document' property.""" + self._owner_document = owner_document + + +_AnyNode = Node +_NamedNodeMap = Dict[str, _AnyNode] # TODO: Implement NamedNodeMap (#19) +_Document = Node # TODO: Implement Document (#20) diff --git a/w3/python/core/fundamental_interface/NodeList.py b/w3/python/core/fundamental_interface/NodeList.py new file mode 100644 index 0000000..df63691 --- /dev/null +++ b/w3/python/core/fundamental_interface/NodeList.py @@ -0,0 +1,38 @@ +from typing import Any, Optional + + +_AnyNode = Any + +class NodeList(list): + """Interface NodeList + + The `NodeList` interface provides the abstraction of an ordered collection of nodes, without defining or constraining how this collection is implemented. + The items in the `NodeList` are accessible via an integral index, starting from 0. + """ + + @property + def length(self) -> int: + """The number of nodes in the list. + + The range of valid child node indices is 0 to `length`-1 inclusive. + """ + return len(self) + + + def item(self, index: int) -> Optional[_AnyNode]: + """Returns the indexth item in the collection. + + If index is greater than or equal to the number of nodes in the list, this returns null. + + Args: + index: Index into the collection. + + Returns: + The node at the `index`th position in the `NodeList`, or `None` if that is not a valid index. + + This method raises no exceptions. + """ + if isinstance(index, int): + if 0 <= index < self.length: + return self[index] + return None diff --git a/w3/python/core/fundamental_interface/NodeList_test.py b/w3/python/core/fundamental_interface/NodeList_test.py new file mode 100644 index 0000000..b66e4c8 --- /dev/null +++ b/w3/python/core/fundamental_interface/NodeList_test.py @@ -0,0 +1,89 @@ +from typing import Iterator +import unittest + +from w3.dom import Node +from w3.dom import NodeType +from w3.dom import NodeList + + +def _make_nodes(n: int) -> Iterator[Node]: + """Accessor to create `n` random nodes immediatly. + + Args: + n: number of nodes to generate. + + Yields: + A randomly generated node. + """ + for _ in range(n): + yield Node(NodeType.TEXT_NODE, '#text') + + +class Test_NodeList(unittest.TestCase): + def testInit(self): + NodeList() + # Initiate with a node + node = next(_make_nodes(1)) + NodeList([node]) + # Initiate with multiple nodes. + NodeList(_make_nodes(4)) + + def testIter(self): + nodes = [*_make_nodes(4)] + node_list = NodeList(nodes) + # Create an iterator for checking + node_iter = iter(nodes) + for list_elem in node_list: + self.assertEqual(list_elem, next(node_iter)) + + def testBool(self): + # Empty `NodeList` should equal False. + node_list = NodeList() + self.assertFalse(node_list) + # `NodeList` with items should equal True. + node_list = NodeList(_make_nodes(2)) + self.assertTrue(node_list) + + def testGetItem(self): + nodes = [*_make_nodes(4)] + node_list = NodeList(nodes) + for i in range(4): + self.assertEqual(node_list[i], nodes[i]) + + def testSetItem(self): + node = Node(NodeType.TEXT_NODE, + '#text') + node_list = NodeList(_make_nodes(4)) + node_list[2] = node + self.assertEqual(node_list[2], node) + + def testItem_inSize(self): + nodes = [*_make_nodes(5)] + node_list = NodeList(nodes) + for i in range(5): + self.assertEqual(node_list.item(i), nodes[i]) + + def testItem_greaterOrEqualThanSize(self): + node_list = NodeList(_make_nodes(5)) + for i in range(5, 8): + self.assertIsNone(node_list.item(i)) + + def testItem_notValid(self): + node_list = NodeList(_make_nodes(5)) + for idx in [-1, -9999, 'hi', '1', None, Node, NodeList, b'0']: + self.assertIsNone(node_list.item(idx)) + + def testLength(self): + # Empty NodeList + node_list = NodeList() + self.assertEqual(node_list.length, 0) + # NodeList with a node. + node_list = NodeList(_make_nodes(1)) + self.assertEqual(node_list.length, 1) + # NodeList with some items. + node_list = NodeList(_make_nodes(5)) + self.assertEqual(node_list.length, 5) + + +if __name__ == '__main__': + unittest.main() diff --git a/w3/python/core/fundamental_interface/Node_test.py b/w3/python/core/fundamental_interface/Node_test.py new file mode 100644 index 0000000..556039c --- /dev/null +++ b/w3/python/core/fundamental_interface/Node_test.py @@ -0,0 +1,77 @@ +import unittest + +from w3.dom import DOMException +from w3.dom import Node +from w3.dom import NodeType + + +class Test_Node(unittest.TestCase): + def test_init(self): + node = Node(node_type=NodeType.DOCUMENT_NODE, + node_name='#document', + read_only=False) + self.assertEqual(node.node_type, NodeType.DOCUMENT_NODE) + self.assertEqual(node.node_name, '#document') + self.assertEqual(node.node_value, '') + + def testNodeValue_get(self): + node = Node(node_type=NodeType.TEXT_NODE, + node_name='#text', + node_value='lorem ipsum') + self.assertEqual(node.node_value, 'lorem ipsum') + + def testNodeValue_set(self): + # set value from constructor + node = Node(node_type=NodeType.TEXT_NODE, + node_name='#text', + node_value='foo', + read_only=False) + self.assertEqual(node.node_value, 'foo') + # set using setter + node.node_value = 'bar' + self.assertEqual(node.node_value, 'bar') + + def testNodeValue_setReadOnly(self): + node = Node(node_type=NodeType.TEXT_NODE, + node_name='#text', + node_value='foo', + read_only=True) + try: + node.node_value = 'bar' + except DOMException as e: + code = e.args[0] + self.assertEqual(code, DOMException.NO_MODIFICATION_ALLOWED_ERR) + else: + self.fail() + # `node_value` should not be modified. + self.assertEqual(node.node_value, 'foo') + + def testNodeType_get(self): + for node_type in NodeType: + node = Node(node_type=node_type, + node_name='') + self.assertEqual(node.node_type, node_type) + + def testNodeType_setCorrectTypes(self): + for node_type in NodeType: + node = Node(node_type=node_type, + node_name='') + self.assertEqual(node.node_type, node_type) + + def testNodeType_setWrongTypes(self): + for node_type in [-1, None, 'hi', Node, True, NodeType, 'DOCUMENT_NODE']: + with self.assertRaises(TypeError): + Node(node_type=node_type, + node_name='') + + def testParentNode_get(self): + parent_node = Node(node_type=NodeType.DOCUMENT_FRAGMENT_NODE, + node_name='#document-fragment') + node = Node(node_type=NodeType.ELEMENT_NODE, + node_name='tagName', + parent_node=parent_node) + self.assertEqual(node.parent_node, parent_node) + + +if __name__ == '__main__': + unittest.main() diff --git a/w3/python/core/fundamental_interface/__init__.py b/w3/python/core/fundamental_interface/__init__.py new file mode 100644 index 0000000..fd1e595 --- /dev/null +++ b/w3/python/core/fundamental_interface/__init__.py @@ -0,0 +1,10 @@ +"""Fundamental Interfaces + +The interfaces within this section are considered fundamental, and must be fully implemented by all conforming implementations of the DOM, including all HTML DOM implementations. +""" + +from w3.python.core.fundamental_interface.DOMException import DOMException +from w3.python.core.fundamental_interface.DOMImplementation import DOMImplementation +from w3.python.core.fundamental_interface.Node import Node +from w3.python.core.fundamental_interface.Node import NodeType +from w3.python.core.fundamental_interface.NodeList import NodeList diff --git a/w3/python/core/type.py b/w3/python/core/type.py new file mode 100644 index 0000000..6a576d5 --- /dev/null +++ b/w3/python/core/type.py @@ -0,0 +1 @@ +DOMString = str