diff --git a/components/DataLiberation/DataFormatConsumer/MarkupProcessorConsumer.php b/components/DataLiberation/DataFormatConsumer/MarkupProcessorConsumer.php index 486eef71..8073c8da 100644 --- a/components/DataLiberation/DataFormatConsumer/MarkupProcessorConsumer.php +++ b/components/DataLiberation/DataFormatConsumer/MarkupProcessorConsumer.php @@ -60,7 +60,7 @@ public function consume() { break; } $this->append_rich_text( htmlspecialchars( $this->markup_processor->get_modifiable_text() ) ); - if ( in_array( $this->markup_processor->get_tag(), array( 'H1', 'H2', 'H3', 'H4', 'H5', 'H6' ) ) ) { + if ( in_array( $this->get_tag_name(), array( 'H1', 'H2', 'H3', 'H4', 'H5', 'H6' ) ) ) { $this->on_title_candidate( $this->markup_processor->get_modifiable_text() ); } break; @@ -90,7 +90,11 @@ public function consume() { private function handle_tag() { $html = $this->markup_processor; - $tag = strtoupper( $html->get_tag() ); + if($html instanceof WP_HTML_Processor) { + $tag = strtoupper( $html->get_tag() ); + } else { + $tag = strtoupper( $html->get_tag_local_name() ); + } $tag_lowercase = strtolower( $tag ); $is_void_tag = ! $html->expects_closer() && ! $html->is_tag_closer(); @@ -100,14 +104,14 @@ private function handle_tag() { $this->on_title_candidate( $html->get_modifiable_text() ); break; case 'META': - $key = $html->get_attribute( 'name' ); - $value = $html->get_attribute( 'content' ); + $key = $this->get_attribute( 'name' ); + $value = $this->get_attribute( 'content' ); if ( ! array_key_exists( $key, $this->metadata ) ) { if ( $key ) { $this->metadata[ $key ] = array(); } } - switch ( $html->get_attribute( 'type' ) ) { + switch ( $this->get_attribute( 'type' ) ) { case 'integer': $value = (int) $value; break; @@ -124,8 +128,8 @@ private function handle_tag() { $template = new WP_HTML_Tag_Processor( '' ); $template->next_tag(); foreach ( array( 'alt', 'title', 'src' ) as $attr ) { - if ( $html->get_attribute( $attr ) ) { - $template->set_attribute( $attr, $html->get_attribute( $attr ) ); + if ( $this->get_attribute( $attr ) ) { + $template->set_attribute( $attr, $this->get_attribute( $attr ) ); } } /** @@ -211,8 +215,8 @@ private function handle_tag() { case 'A': $template = new WP_HTML_Tag_Processor( '' ); $template->next_tag(); - if ( $html->get_attribute( 'href' ) ) { - $template->set_attribute( 'href', $html->get_attribute( 'href' ) ); + if ( $this->get_attribute( 'href' ) ) { + $template->set_attribute( 'href', $this->get_attribute( 'href' ) ); } /** * @@ -297,6 +301,22 @@ private function handle_tag() { } } + private function get_tag_name() { + if($this->markup_processor instanceof WP_HTML_Processor) { + return $this->markup_processor->get_tag(); + } else { + return $this->markup_processor->get_tag_local_name(); + } + } + + private function get_attribute($key) { + if($this->markup_processor instanceof WP_HTML_Processor) { + return $this->markup_processor->get_attribute($key); + } else { + return $this->markup_processor->get_attribute('', $key); + } + } + private function on_title_candidate( $text ) { if ( ! array_key_exists( 'post_title', $this->metadata ) ) { $this->metadata['post_title'] = array( diff --git a/components/DataLiberation/EntityReader/EPubEntityReader.php b/components/DataLiberation/EntityReader/EPubEntityReader.php index e77f433f..9f93d96e 100644 --- a/components/DataLiberation/EntityReader/EPubEntityReader.php +++ b/components/DataLiberation/EntityReader/EPubEntityReader.php @@ -54,6 +54,7 @@ public function next_entity() { return false; } + $this->remaining_html_files = []; foreach ( $this->manifest['items'] as $item ) { if ( $item['media-type'] !== 'application/xhtml+xml' ) { continue; @@ -137,11 +138,12 @@ private function parse_manifest() { $xml = XMLProcessor::create_from_string( $this->zip->get_contents( 'META-INF/container.xml' ) ); - if ( false === $xml->next_tag( 'rootfile' ) ) { + + if ( false === $xml->next_tag( ['urn:oasis:names:tc:opendocument:xmlns:container', 'rootfile'] ) ) { return false; } - $full_path = $xml->get_attribute( 'full-path' ); + $full_path = $xml->get_attribute( '', 'full-path' ); if ( ! $full_path ) { return false; } @@ -161,16 +163,16 @@ private function parse_manifest() { ); while ( $xml->next_tag() ) { $parsed_entry = array(); - $keys = $xml->get_attribute_names_with_prefix( '' ); - foreach ( $keys as $key ) { - $parsed_entry[ $key ] = $xml->get_attribute( $key ); + $keys = $xml->get_attribute_names_with_prefix( '', '' ); + foreach ( $keys as list($ns, $key) ) { + $parsed_entry[ $key ] = $xml->get_attribute( $ns, $key ); } - if ( $xml->matches_breadcrumbs( array( 'metadata', '*' ) ) ) { + if ( $xml->matches_breadcrumbs( array( 'package', 'metadata', '*' ) ) ) { $parsed['metadata'][] = array( - 'tag' => $xml->get_tag(), + 'tag' => $xml->get_tag_local_name(), 'attributes' => $parsed_entry, ); - } elseif ( $xml->matches_breadcrumbs( array( 'manifest', 'item' ) ) ) { + } elseif ( $xml->matches_breadcrumbs( array( 'package', 'manifest', 'item' ) ) ) { $parsed_entry['type'] = 'item'; $parsed['items'][] = $parsed_entry; } diff --git a/components/DataLiberation/EntityReader/WXREntityReader.php b/components/DataLiberation/EntityReader/WXREntityReader.php index 6b545456..0d5d8622 100644 --- a/components/DataLiberation/EntityReader/WXREntityReader.php +++ b/components/DataLiberation/EntityReader/WXREntityReader.php @@ -171,15 +171,7 @@ class WXREntityReader implements EntityReader { * @since WP_VERSION * @var int */ - private $last_xml_byte_offset_outside_of_entity; - - /** - * The XML processor cursor of the last entity opener. - * - * @since WP_VERSION - * @var string|null - */ - private $last_xml_cursor_outside_of_entity; + private $entity_opener_byte_offset; /** * Whether the current entity has been emitted. @@ -250,11 +242,7 @@ class WXREntityReader implements EntityReader { * @since WP_VERSION * @var array */ - const KNOWN_SITE_OPTIONS = array( - 'wp:base_blog_url' => 'home', - 'wp:base_site_url' => 'siteurl', - 'title' => 'blogname', - ); + private $KNOWN_SITE_OPTIONS = []; /** * Mapping of WXR tags to their corresponding entity types and field mappings. @@ -262,106 +250,7 @@ class WXREntityReader implements EntityReader { * @since WP_VERSION * @var array */ - const KNOWN_ENITIES = array( - 'wp:comment' => array( - 'type' => 'comment', - 'fields' => array( - 'wp:comment_id' => 'comment_id', - 'wp:comment_author' => 'comment_author', - 'wp:comment_author_email' => 'comment_author_email', - 'wp:comment_author_url' => 'comment_author_url', - 'wp:comment_author_IP' => 'comment_author_IP', - 'wp:comment_date' => 'comment_date', - 'wp:comment_date_gmt' => 'comment_date_gmt', - 'wp:comment_content' => 'comment_content', - 'wp:comment_approved' => 'comment_approved', - 'wp:comment_type' => 'comment_type', - 'wp:comment_parent' => 'comment_parent', - 'wp:comment_user_id' => 'comment_user_id', - ), - ), - 'wp:commentmeta' => array( - 'type' => 'comment_meta', - 'fields' => array( - 'wp:meta_key' => 'meta_key', - 'wp:meta_value' => 'meta_value', - ), - ), - 'wp:author' => array( - 'type' => 'user', - 'fields' => array( - 'wp:author_id' => 'ID', - 'wp:author_login' => 'user_login', - 'wp:author_email' => 'user_email', - 'wp:author_display_name' => 'display_name', - 'wp:author_first_name' => 'first_name', - 'wp:author_last_name' => 'last_name', - ), - ), - 'item' => array( - 'type' => 'post', - 'fields' => array( - 'title' => 'post_title', - 'link' => 'link', - 'guid' => 'guid', - 'description' => 'post_excerpt', - 'pubDate' => 'post_published_at', - 'dc:creator' => 'post_author', - 'content:encoded' => 'post_content', - 'excerpt:encoded' => 'post_excerpt', - 'wp:post_id' => 'post_id', - 'wp:status' => 'post_status', - 'wp:post_date' => 'post_date', - 'wp:post_date_gmt' => 'post_date_gmt', - 'wp:post_modified' => 'post_modified', - 'wp:post_modified_gmt' => 'post_modified_gmt', - 'wp:comment_status' => 'comment_status', - 'wp:ping_status' => 'ping_status', - 'wp:post_name' => 'post_name', - 'wp:post_parent' => 'post_parent', - 'wp:menu_order' => 'menu_order', - 'wp:post_type' => 'post_type', - 'wp:post_password' => 'post_password', - 'wp:is_sticky' => 'is_sticky', - 'wp:attachment_url' => 'attachment_url', - ), - ), - 'wp:postmeta' => array( - 'type' => 'post_meta', - 'fields' => array( - 'wp:meta_key' => 'meta_key', - 'wp:meta_value' => 'meta_value', - ), - ), - 'wp:term' => array( - 'type' => 'term', - 'fields' => array( - 'wp:term_id' => 'term_id', - 'wp:term_taxonomy' => 'taxonomy', - 'wp:term_slug' => 'slug', - 'wp:term_parent' => 'parent', - 'wp:term_name' => 'name', - ), - ), - 'wp:tag' => array( - 'type' => 'tag', - 'fields' => array( - 'wp:term_id' => 'term_id', - 'wp:tag_slug' => 'slug', - 'wp:tag_name' => 'name', - 'wp:tag_description' => 'description', - ), - ), - 'wp:category' => array( - 'type' => 'category', - 'fields' => array( - 'wp:category_nicename' => 'slug', - 'wp:category_parent' => 'parent', - 'wp:cat_name' => 'name', - 'wp:category_description' => 'description', - ), - ), - ); + private $KNOWN_ENITIES = []; public static function create( ?ByteReadStream $upstream = null, $cursor = null ) { $xml_cursor = null; @@ -412,6 +301,138 @@ public static function create( ?ByteReadStream $upstream = null, $cursor = null */ protected function __construct( XMLProcessor $xml ) { $this->xml = $xml; + + // Every XML element is a combination of a long-form namespace and a + // local element name, e.g. a syntax could actually refer + // to a (https://wordpress.org/export/1.0/, post_id) element. + // + // Namespaces are paramount for parsing XML and cannot be ignored. Elements + // element must be matched based on both their namespace and local name. + // + // Unfortunately, different WXR files defined the `wp` namespace in a different way. + // Folks use a mixture of HTTP vs HTTPS protocols and version numbers. We must + // account for all possible options to parse these documents correctly. + $wxr_namespaces = [ + 'http://wordpress.org/export/1.0/', + 'https://wordpress.org/export/1.0/', + 'http://wordpress.org/export/1.1/', + 'https://wordpress.org/export/1.1/', + 'http://wordpress.org/export/1.2/', + 'https://wordpress.org/export/1.2/', + ]; + $this->KNOWN_ENITIES = [ + 'item' => array( + 'type' => 'post', + 'fields' => array( + 'title' => 'post_title', + 'link' => 'link', + 'guid' => 'guid', + 'description' => 'post_excerpt', + 'pubDate' => 'post_published_at', + '{http://purl.org/dc/elements/1.1/}creator' => 'post_author', + '{http://purl.org/rss/1.0/modules/content/}encoded' => 'post_content', + '{http://wordpress.org/export/1.0/excerpt/}encoded' => 'post_excerpt', + '{http://wordpress.org/export/1.1/excerpt/}encoded' => 'post_excerpt', + '{http://wordpress.org/export/1.2/excerpt/}encoded' => 'post_excerpt', + ) + ) + ]; + foreach($wxr_namespaces as $wxr_namespace) { + $this->KNOWN_SITE_OPTIONS = array_merge($this->KNOWN_SITE_OPTIONS, array( + '{'.$wxr_namespace.'}base_blog_url' => 'home', + '{'.$wxr_namespace.'}base_site_url' => 'siteurl', + 'title' => 'blogname', + )); + $this->KNOWN_ENITIES['item']['fields'] = array_merge($this->KNOWN_ENITIES['item']['fields'], array( + '{'.$wxr_namespace.'}post_id' => 'post_id', + '{'.$wxr_namespace.'}status' => 'post_status', + '{'.$wxr_namespace.'}post_date' => 'post_date', + '{'.$wxr_namespace.'}post_date_gmt' => 'post_date_gmt', + '{'.$wxr_namespace.'}post_modified' => 'post_modified', + '{'.$wxr_namespace.'}post_modified_gmt' => 'post_modified_gmt', + '{'.$wxr_namespace.'}comment_status' => 'comment_status', + '{'.$wxr_namespace.'}ping_status' => 'ping_status', + '{'.$wxr_namespace.'}post_name' => 'post_name', + '{'.$wxr_namespace.'}post_parent' => 'post_parent', + '{'.$wxr_namespace.'}menu_order' => 'menu_order', + '{'.$wxr_namespace.'}post_type' => 'post_type', + '{'.$wxr_namespace.'}post_password' => 'post_password', + '{'.$wxr_namespace.'}is_sticky' => 'is_sticky', + '{'.$wxr_namespace.'}attachment_url' => 'attachment_url', + )); + $this->KNOWN_ENITIES = array_merge($this->KNOWN_ENITIES, array( + '{'.$wxr_namespace.'}comment' => array( + 'type' => 'comment', + 'fields' => array( + '{'.$wxr_namespace.'}comment_id' => 'comment_id', + '{'.$wxr_namespace.'}comment_author' => 'comment_author', + '{'.$wxr_namespace.'}comment_author_email' => 'comment_author_email', + '{'.$wxr_namespace.'}comment_author_url' => 'comment_author_url', + '{'.$wxr_namespace.'}comment_author_IP' => 'comment_author_IP', + '{'.$wxr_namespace.'}comment_date' => 'comment_date', + '{'.$wxr_namespace.'}comment_date_gmt' => 'comment_date_gmt', + '{'.$wxr_namespace.'}comment_content' => 'comment_content', + '{'.$wxr_namespace.'}comment_approved' => 'comment_approved', + '{'.$wxr_namespace.'}comment_type' => 'comment_type', + '{'.$wxr_namespace.'}comment_parent' => 'comment_parent', + '{'.$wxr_namespace.'}comment_user_id' => 'comment_user_id', + ), + ), + '{'.$wxr_namespace.'}commentmeta' => array( + 'type' => 'comment_meta', + 'fields' => array( + '{'.$wxr_namespace.'}meta_key' => 'meta_key', + '{'.$wxr_namespace.'}meta_value' => 'meta_value', + ), + ), + '{'.$wxr_namespace.'}author' => array( + 'type' => 'user', + 'fields' => array( + '{'.$wxr_namespace.'}author_id' => 'ID', + '{'.$wxr_namespace.'}author_login' => 'user_login', + '{'.$wxr_namespace.'}author_email' => 'user_email', + '{'.$wxr_namespace.'}author_display_name' => 'display_name', + '{'.$wxr_namespace.'}author_first_name' => 'first_name', + '{'.$wxr_namespace.'}author_last_name' => 'last_name', + ), + ), + '{'.$wxr_namespace.'}postmeta' => array( + 'type' => 'post_meta', + 'fields' => array( + '{'.$wxr_namespace.'}meta_key' => 'meta_key', + '{'.$wxr_namespace.'}meta_value' => 'meta_value', + ), + ), + '{'.$wxr_namespace.'}term' => array( + 'type' => 'term', + 'fields' => array( + '{'.$wxr_namespace.'}term_id' => 'term_id', + '{'.$wxr_namespace.'}term_taxonomy' => 'taxonomy', + '{'.$wxr_namespace.'}term_slug' => 'slug', + '{'.$wxr_namespace.'}term_parent' => 'parent', + '{'.$wxr_namespace.'}term_name' => 'name', + ), + ), + '{'.$wxr_namespace.'}tag' => array( + 'type' => 'tag', + 'fields' => array( + '{'.$wxr_namespace.'}term_id' => 'term_id', + '{'.$wxr_namespace.'}tag_slug' => 'slug', + '{'.$wxr_namespace.'}tag_name' => 'name', + '{'.$wxr_namespace.'}tag_description' => 'description', + ), + ), + '{'.$wxr_namespace.'}category' => array( + 'type' => 'category', + 'fields' => array( + '{'.$wxr_namespace.'}category_nicename' => 'slug', + '{'.$wxr_namespace.'}category_parent' => 'parent', + '{'.$wxr_namespace.'}cat_name' => 'name', + '{'.$wxr_namespace.'}category_description' => 'description', + ), + ), + )); + } } public function get_reentrancy_cursor() { @@ -423,13 +444,13 @@ public function get_reentrancy_cursor() { */ $xml_cursor = $this->xml->get_reentrancy_cursor(); $xml_cursor = json_decode( base64_decode( $xml_cursor ), true ); - $xml_cursor['upstream_bytes_forgotten'] = $this->last_xml_byte_offset_outside_of_entity; + $xml_cursor['upstream_bytes_forgotten'] = $this->entity_opener_byte_offset; $xml_cursor = base64_encode( json_encode( $xml_cursor ) ); return json_encode( array( 'xml' => $xml_cursor, - 'upstream' => $this->last_xml_byte_offset_outside_of_entity, + 'upstream' => $this->entity_opener_byte_offset, 'last_post_id' => $this->last_post_id, 'last_comment_id' => $this->last_comment_id, ) @@ -467,11 +488,11 @@ private function get_entity_type() { if ( null === $this->entity_tag ) { return false; } - if ( ! array_key_exists( $this->entity_tag, static::KNOWN_ENITIES ) ) { + if ( ! array_key_exists( $this->entity_tag, $this->KNOWN_ENITIES ) ) { return false; } - return static::KNOWN_ENITIES[ $this->entity_tag ]['type']; + return $this->KNOWN_ENITIES[ $this->entity_tag ]['type']; } /** @@ -629,8 +650,8 @@ private function read_next_entity() { // Don't process anything outside the hierarchy. if ( count( $breadcrumbs ) < 2 || - $breadcrumbs[0] !== 'rss' || - $breadcrumbs[1] !== 'channel' + $breadcrumbs[0] !== ['', 'rss'] || + $breadcrumbs[1] !== ['', 'channel'] ) { continue; } @@ -655,11 +676,11 @@ private function read_next_entity() { } if ( count( $breadcrumbs ) <= 2 && $this->xml->is_tag_opener() ) { - $this->last_xml_byte_offset_outside_of_entity = $this->xml->get_token_byte_offset_in_the_input_stream(); - $this->last_xml_cursor_outside_of_entity = $this->xml->get_reentrancy_cursor(); + $this->entity_opener_byte_offset = $this->xml->get_token_byte_offset_in_the_input_stream(); } - $tag = $this->xml->get_tag(); + $tag_with_namespace = $this->xml->get_tag_namespace_and_local_name(); + /** * Custom adjustment: the Accessibility WXR file uses a non-standard * wp:wp_author tag. @@ -668,8 +689,8 @@ private function read_next_entity() { * the regular WXR importer would ignore them? Perhaps a warning * and an upstream PR would be a better solution. */ - if ( $tag === 'wp:wp_author' ) { - $tag = 'wp:author'; + if ( $tag_with_namespace === '{http://wordpress.org/export/1.2/}wp_author' ) { + $tag_with_namespace = '{http://wordpress.org/export/1.2/}author'; } /** @@ -677,7 +698,7 @@ private function read_next_entity() { * finished, emit it, and start processing the new entity the next * time this function is called. */ - if ( array_key_exists( $tag, static::KNOWN_ENITIES ) ) { + if ( array_key_exists( $tag_with_namespace, $this->KNOWN_ENITIES ) ) { if ( $this->entity_type && ! $this->entity_finished ) { $this->emit_entity(); @@ -687,9 +708,8 @@ private function read_next_entity() { // Only tag openers indicate a new entity. Closers just mean // the previous entity is finished. if ( $this->xml->is_tag_opener() ) { - $this->set_entity_tag( $tag ); - $this->last_xml_byte_offset_outside_of_entity = $this->xml->get_token_byte_offset_in_the_input_stream(); - $this->last_xml_cursor_outside_of_entity = $this->xml->get_reentrancy_cursor(); + $this->set_entity_tag( $tag_with_namespace ); + $this->entity_opener_byte_offset = $this->xml->get_token_byte_offset_in_the_input_stream(); } continue; } @@ -732,20 +752,22 @@ private function read_next_entity() { */ if ( $this->xml->is_tag_opener() ) { $this->last_opener_attributes = array(); - $names = $this->xml->get_attribute_names_with_prefix( '' ); - foreach ( $names as $name ) { - $this->last_opener_attributes[ $name ] = $this->xml->get_attribute( $name ); + // Get non-namespaced attributes. + $names = $this->xml->get_attribute_names_with_prefix( '', '' ); + foreach ( $names as list($namespace, $name) ) { + $this->last_opener_attributes[ $name ] = $this->xml->get_attribute( $namespace, $name ); } $this->text_buffer = ''; $is_site_option_opener = ( count( $this->xml->get_breadcrumbs() ) === 3 && $this->xml->matches_breadcrumbs( array( 'rss', 'channel', '*' ) ) && - array_key_exists( $this->xml->get_tag(), static::KNOWN_SITE_OPTIONS ) + array_key_exists( $this->xml->get_tag_namespace_and_local_name(), $this->KNOWN_SITE_OPTIONS ) ); - if ( $is_site_option_opener ) { - $this->last_xml_byte_offset_outside_of_entity = $this->xml->get_token_byte_offset_in_the_input_stream(); + if ( $is_site_option_opener ) { + $this->entity_opener_byte_offset = $this->xml->get_token_byte_offset_in_the_input_stream(); } + continue; } @@ -759,7 +781,7 @@ private function read_next_entity() { if ( ! $this->entity_finished && - $this->xml->get_breadcrumbs() === array( 'rss', 'channel' ) + $this->xml->get_breadcrumbs() === array( array( '', 'rss' ), array( '', 'channel' ) ) ) { // Look for site options in children of the tag. if ( $this->parse_site_option() ) { @@ -790,7 +812,7 @@ private function read_next_entity() { */ if ( $this->entity_type === 'post' && - $tag === 'category' && + $this->xml->get_tag_local_name() === 'category' && array_key_exists( 'domain', $this->last_opener_attributes ) && array_key_exists( 'nicename', $this->last_opener_attributes ) ) { @@ -805,18 +827,18 @@ private function read_next_entity() { /** * Store the text content of known tags as the value of the corresponding - * entity attribute as defined by the KNOWN_ENITIES mapping. + * entity attribute as defined by the $KNOWN_ENITIES mapping. * - * Ignores tags unlisted in the KNOWN_ENITIES mapping. + * Ignores tags unlisted in the $KNOWN_ENITIES mapping. * * The WXR format is extensible so this reader could potentially * support registering custom handlers for unknown tags in the future. */ - if ( ! isset( static::KNOWN_ENITIES[ $this->entity_tag ]['fields'][ $tag ] ) ) { + if ( ! isset( $this->KNOWN_ENITIES[ $this->entity_tag ]['fields'][ $tag_with_namespace ] ) ) { continue; } - $key = static::KNOWN_ENITIES[ $this->entity_tag ]['fields'][ $tag ]; + $key = $this->KNOWN_ENITIES[ $this->entity_tag ]['fields'][ $tag_with_namespace ]; $this->entity_data[ $key ] = $this->text_buffer; $this->text_buffer = ''; } while ( $this->xml->next_token() ); @@ -848,13 +870,13 @@ private function read_next_entity() { * @return bool Whether a site_option entity was emitted. */ private function parse_site_option() { - if ( ! array_key_exists( $this->xml->get_tag(), static::KNOWN_SITE_OPTIONS ) ) { + if ( ! array_key_exists( $this->xml->get_tag_namespace_and_local_name(), $this->KNOWN_SITE_OPTIONS ) ) { return false; } $this->entity_type = 'site_option'; $this->entity_data = array( - 'option_name' => static::KNOWN_SITE_OPTIONS[ $this->xml->get_tag() ], + 'option_name' => $this->KNOWN_SITE_OPTIONS[ $this->xml->get_tag_namespace_and_local_name() ], 'option_value' => $this->text_buffer, ); $this->emit_entity(); @@ -924,10 +946,10 @@ private function emit_entity() { * @since WP_VERSION * */ - private function set_entity_tag( string $tag ) { - $this->entity_tag = $tag; - if ( array_key_exists( $tag, static::KNOWN_ENITIES ) ) { - $this->entity_type = static::KNOWN_ENITIES[ $tag ]['type']; + private function set_entity_tag( string $tag_with_namespace ) { + $this->entity_tag = $tag_with_namespace; + if ( array_key_exists( $tag_with_namespace, $this->KNOWN_ENITIES ) ) { + $this->entity_type = $this->KNOWN_ENITIES[ $tag_with_namespace ]['type']; } } diff --git a/components/DataLiberation/EntityWriter/WXRWriter.php b/components/DataLiberation/EntityWriter/WXRWriter.php index ac513bec..7cee15d6 100644 --- a/components/DataLiberation/EntityWriter/WXRWriter.php +++ b/components/DataLiberation/EntityWriter/WXRWriter.php @@ -123,7 +123,13 @@ private function append_if_not_empty( $tag_name, &$content ) { } private function create_xml_tag( $tag_name, $content ) { - $xml = XMLProcessor::create_from_string( "<$tag_name>text\n" ); + $xml = XMLProcessor::create_from_string( "<$tag_name>text\n", null, 'UTF-8', array( + 'excerpt' => "http://wordpress.org/export/1.2/excerpt/", + 'content' => "http://purl.org/rss/1.0/modules/content/", + 'wfw' => "http://wellformedweb.org/CommentAPI/", + 'dc' => "http://purl.org/dc/elements/1.1/", + 'wp' => "http://wordpress.org/export/1.2/" + ) ); $xml->next_token(); // Move to the opening tag $xml->next_token(); // Move to the text node $xml->set_modifiable_text( $content ); diff --git a/components/DataLiberation/Tests/WXRReaderTest.php b/components/DataLiberation/Tests/WXRReaderTest.php index 7c980f55..df3e982c 100644 --- a/components/DataLiberation/Tests/WXRReaderTest.php +++ b/components/DataLiberation/Tests/WXRReaderTest.php @@ -192,7 +192,10 @@ public function test_attachments() { $importer->append_bytes( << - + vneck-tee-2.jpg @@ -276,7 +279,10 @@ public function test_terms() { $importer->append_bytes( << - + @@ -312,7 +318,10 @@ public function test_category() { $importer->append_bytes( << - + uncategorized @@ -344,7 +353,10 @@ public function test_tag_string() { $importer->append_bytes( << - + 651 @@ -376,7 +388,10 @@ public function test_tag_string() { public function test_tag_streaming() { $wxr = << - + 651 @@ -425,7 +440,10 @@ public function test_parse_comment() { $wxr->append_bytes( << - + My post! @@ -509,7 +527,10 @@ public function test_retains_last_ids() { $wxr->append_bytes( << - + My post! diff --git a/components/DataLiberation/Tests/wxr/theme-unit-test-data.xml b/components/DataLiberation/Tests/wxr/theme-unit-test-data.xml index 5f2cdebd..665b410c 100644 --- a/components/DataLiberation/Tests/wxr/theme-unit-test-data.xml +++ b/components/DataLiberation/Tests/wxr/theme-unit-test-data.xml @@ -18,9 +18,11 @@ contained in this file into your site. --> - + xmlns:dc="http://purl.org/dc/elements/1.1/" + xmlns:wp="https://wordpress.org/export/1.2/"> Theme Unit Test Data https://wpthemetestdata.wordpress.com diff --git a/components/XML/Tests/XMLProcessorTest.php b/components/XML/Tests/XMLProcessorTest.php index 10965cfe..cf20183e 100644 --- a/components/XML/Tests/XMLProcessorTest.php +++ b/components/XML/Tests/XMLProcessorTest.php @@ -15,51 +15,47 @@ * @coversDefaultClass XMLProcessor */ class XMLProcessorTest extends TestCase { - const XML_SIMPLE = 'Text'; - const XML_WITH_CLASSES = 'Text'; - const XML_MALFORMED = 'Back to notifications'; + const XML_SIMPLE = 'Text'; + const XML_WITH_CLASSES = 'Text'; + const XML_MALFORMED = 'Back to notifications'; public function beforeEach() { $GLOBALS['_doing_it_wrong_messages'] = array(); } /** - * @ticket 61365 * - * @covers XMLProcessor::get_tag + * @covers XMLProcessor::get_tag_local_name */ public function test_get_tag_returns_null_before_finding_tags() { $processor = XMLProcessor::create_from_string( 'Test' ); - $this->assertNull( $processor->get_tag(), 'Calling get_tag() without selecting a tag did not return null' ); + $this->assertNull( $processor->get_tag_local_name(), 'Calling get_tag() without selecting a tag did not return null' ); } /** - * @ticket 61365 * - * @covers XMLProcessor::get_tag + * @covers XMLProcessor::get_tag_local_name */ public function test_get_tag_returns_null_when_not_in_open_tag() { - $processor = XMLProcessor::create_from_string( 'Test' ); + $processor = XMLProcessor::create_from_string( 'Test' ); - $this->assertFalse( $processor->next_tag( 'p' ), 'Querying a non-existing tag did not return false' ); - $this->assertNull( $processor->get_tag(), 'Accessing a non-existing tag did not return null' ); + $this->assertFalse( $processor->next_tag( array( '', 'p') ), 'Querying a non-existing tag did not return false' ); + $this->assertNull( $processor->get_tag_local_name(), 'Accessing a non-existing tag did not return null' ); } /** - * @ticket 61365 * - * @covers XMLProcessor::get_tag + * @covers XMLProcessor::get_tag_local_name */ public function test_get_tag_returns_open_tag_name() { - $processor = XMLProcessor::create_from_string( 'Test' ); + $processor = XMLProcessor::create_from_string( 'Test' ); - $this->assertTrue( $processor->next_tag( 'wp:content' ), 'Querying an existing tag did not return true' ); - $this->assertSame( 'wp:content', $processor->get_tag(), 'Accessing an existing tag name did not return "div"' ); + $this->assertTrue( $processor->next_tag( 'content' ), 'Querying an existing tag did not return true' ); + $this->assertSame( 'content', $processor->get_tag_local_name(), 'Accessing an existing tag name did not return "div"' ); } /** - * @ticket 61365 * * @covers XMLProcessor::is_empty_element * @@ -105,73 +101,67 @@ public static function data_is_empty_element() { } /** - * @ticket 61365 * * @covers XMLProcessor::get_attribute */ public function test_get_attribute_returns_null_when_not_in_open_tag() { - $processor = XMLProcessor::create_from_string( 'Test' ); + $processor = XMLProcessor::create_from_string( 'Test' ); $this->assertFalse( $processor->next_tag( 'p' ), 'Querying a non-existing tag did not return false' ); - $this->assertNull( $processor->get_attribute( 'wp:post-type' ), + $this->assertNull( $processor->get_attribute( '', 'wp:post-type' ), 'Accessing an attribute of a non-existing tag did not return null' ); } /** - * @ticket 61365 * * @covers XMLProcessor::get_attribute */ public function test_get_attribute_returns_null_when_in_closing_tag() { - $processor = XMLProcessor::create_from_string( 'Test' ); + $processor = XMLProcessor::create_from_string( 'Test' ); - $this->assertTrue( $processor->next_tag( 'wp:content' ), 'Querying an existing tag did not return true' ); + $this->assertTrue( $processor->next_tag( array( 'w.org', 'content' ) ), 'Querying an existing tag did not return true' ); $this->assertTrue( $processor->next_token(), 'Querying an existing closing tag did not return true' ); $this->assertTrue( $processor->next_token(), 'Querying an existing closing tag did not return true' ); - $this->assertNull( $processor->get_attribute( 'wp:post-type' ), 'Accessing an attribute of a closing tag did not return null' ); + $this->assertNull( $processor->get_attribute( 'w.org', 'post-type' ), 'Accessing an attribute of a closing tag did not return null' ); } /** - * @ticket 61365 * * @covers XMLProcessor::get_attribute */ public function test_get_attribute_returns_null_when_attribute_missing() { - $processor = XMLProcessor::create_from_string( 'Test' ); + $processor = XMLProcessor::create_from_string( 'Test' ); - $this->assertTrue( $processor->next_tag( 'wp:content' ), 'Querying an existing tag did not return true' ); - $this->assertNull( $processor->get_attribute( 'test-id' ), 'Accessing a non-existing attribute did not return null' ); + $this->assertTrue( $processor->next_tag( array( 'w.org', 'content' ) ), 'Querying an existing tag did not return true' ); + $this->assertNull( $processor->get_attribute( '', 'test-id' ), 'Accessing a non-existing attribute did not return null' ); } /** - * @ticket 61365 * * @expectedIncorrectUsage XMLProcessor::base_class_next_token * @covers XMLProcessor::get_attribute */ public function test_attributes_are_rejected_in_tag_closers() { - $processor = XMLProcessor::create_from_string( 'Test' ); + $processor = XMLProcessor::create_from_string( 'Test' ); - $this->assertTrue( $processor->next_tag( 'wp:content' ), 'Querying an existing tag did not return true' ); + $this->assertTrue( $processor->next_tag( 'content' ), 'Querying an existing tag did not return true' ); $this->assertTrue( $processor->next_token(), 'Querying a text node did not return true.' ); $this->assertFalse( $processor->next_token(), 'Querying an existing but invalid closing tag did not return false.' ); } /** - * @ticket 61365 * * @covers XMLProcessor::get_attribute */ public function test_get_attribute_returns_attribute_value() { - $processor = XMLProcessor::create_from_string( 'Test' ); + $processor = XMLProcessor::create_from_string( 'Test' ); - $this->assertTrue( $processor->next_tag( 'wp:content' ), 'Querying an existing tag did not return true' ); - $this->assertSame( 'test', $processor->get_attribute( 'wp:post-type' ), + $this->assertTrue( $processor->next_tag( array( 'breadcrumbs' => array( array( 'w.org', 'content' ) ) ) ), 'Querying an existing tag did not return true' ); + $this->assertSame( 'test', $processor->get_attribute( 'w.org', 'post-type' ), 'Accessing a wp:post-type="test" attribute value did not return "test"' ); } /** - * @ticket 61365 * @expectedIncorrectUsage XMLProcessor::parse_next_attribute * * @covers XMLProcessor::get_attribute @@ -183,7 +173,6 @@ public function test_parsing_stops_on_malformed_attribute_value_no_value() { } /** - * @ticket 61365 * @expectedIncorrectUsage XMLProcessor::parse_next_attribute * * @covers XMLProcessor::get_attribute @@ -195,33 +184,30 @@ public function test_parsing_stops_on_malformed_attribute_value_no_quotes() { } /** - * @ticket 61365 * @expectedIncorrectUsage XMLProcessor::get_attribute * * @covers XMLProcessor::get_attribute */ public function test_malformed_attribute_value_containing_ampersand_is_treated_as_plaintext() { - $processor = XMLProcessor::create_from_string( 'Test' ); + $processor = XMLProcessor::create_from_string( 'Test' ); $this->assertTrue( $processor->next_tag(), 'Querying a tag did not return true' ); - $this->assertEquals( 'WordPress & WordPress', $processor->get_attribute( 'enabled' ) ); + $this->assertEquals( 'WordPress & WordPress', $processor->get_attribute( '', 'enabled' ) ); } /** - * @ticket 61365 * @expectedIncorrectUsage XMLProcessor::get_attribute * * @covers XMLProcessor::get_attribute */ public function test_malformed_attribute_value_containing_entity_without_semicolon_is_treated_as_plaintext() { - $processor = XMLProcessor::create_from_string( 'Test' ); + $processor = XMLProcessor::create_from_string( 'Test' ); $this->assertTrue( $processor->next_tag(), 'Querying a tag did not return true' ); - $this->assertEquals( '”', $processor->get_attribute( 'enabled' ) ); + $this->assertEquals( '”', $processor->get_attribute( '', 'enabled' ) ); } /** - * @ticket 61365 * @expectedIncorrectUsage XMLProcessor::parse_next_attribute * * @covers XMLProcessor::get_attribute @@ -233,7 +219,6 @@ public function test_parsing_stops_on_malformed_attribute_value_contains_lt_char } /** - * @ticket 61365 * @expectedIncorrectUsage XMLProcessor::parse_next_attribute * * @covers XMLProcessor::get_attribute @@ -245,7 +230,6 @@ public function test_parsing_stops_on_malformed_tags_duplicate_attributes() { } /** - * @ticket 61365 * @expectedIncorrectUsage XMLProcessor::parse_next_attribute * * @covers XMLProcessor::get_attribute @@ -257,12 +241,11 @@ public function test_parsing_stops_on_malformed_attribute_name_contains_slash() } /** - * @ticket 61365 * * @covers XMLProcessor::get_attribute */ public function test_get_modifiable_text_returns_a_decoded_value() { - $processor = XMLProcessor::create_from_string( '“😄”' ); + $processor = XMLProcessor::create_from_string( '“😄”' ); $processor->next_tag( 'root' ); $processor->next_token(); @@ -275,7 +258,6 @@ public function test_get_modifiable_text_returns_a_decoded_value() { } /** - * @ticket 61365 * * @covers XMLProcessor::get_attribute */ @@ -285,170 +267,206 @@ public function test_get_attribute_returns_a_decoded_value() { $this->assertTrue( $processor->next_tag( 'root' ), 'Querying a tag did not return true' ); $this->assertEquals( '“😄”', - $processor->get_attribute( 'encoded-data' ), + $processor->get_attribute( '', 'encoded-data' ), 'Reading an encoded attribute did not decode it.' ); } /** - * @ticket 61365 * * @covers XMLProcessor::get_attribute * * @param string $attribute_name Name of data-enabled attribute with case variations. */ public function test_get_attribute_is_case_sensitive() { - $processor = XMLProcessor::create_from_string( 'Test' ); + $processor = XMLProcessor::create_from_string( 'Test' ); $processor->next_tag(); $this->assertEquals( 'true', - $processor->get_attribute( 'DATA-enabled' ), + $processor->get_attribute( '', 'DATA-enabled' ), 'Accessing an attribute by a same-cased name did return not its value' ); $this->assertNull( - $processor->get_attribute( 'data-enabled' ), + $processor->get_attribute( '', 'data-enabled' ), 'Accessing an attribute by a differently-cased name did return its value' ); } /** - * @ticket 61365 * * @covers XMLProcessor::remove_attribute */ public function test_remove_attribute_is_case_sensitive() { $processor = XMLProcessor::create_from_string( 'Test' ); $processor->next_tag(); - $processor->remove_attribute( 'data-enabled' ); + $processor->remove_attribute( '', 'data-enabled' ); $this->assertSame( 'Test', $processor->get_updated_xml(), 'A case-sensitive remove_attribute call did remove the attribute' ); - $processor->remove_attribute( 'DATA-enabled' ); + $processor->remove_attribute( '', 'DATA-enabled' ); - $this->assertSame( 'Test', $processor->get_updated_xml(), + $this->assertSame( 'Test', $processor->get_updated_xml(), 'A case-sensitive remove_attribute call did not remove the attribute' ); } /** - * @ticket 61365 * * @covers XMLProcessor::set_attribute */ public function test_set_attribute_is_case_sensitive() { - $processor = XMLProcessor::create_from_string( 'Test' ); + $processor = XMLProcessor::create_from_string( 'Test' ); $processor->next_tag(); - $processor->set_attribute( 'data-enabled', 'abc' ); + $processor->set_attribute( '', 'data-enabled', 'abc' ); - $this->assertSame( 'Test', $processor->get_updated_xml(), + $this->assertSame( 'Test', $processor->get_updated_xml(), 'A case-insensitive set_attribute call did not update the existing attribute' ); } /** - * @ticket 61365 * - * @covers XMLProcessor::get_attribute_names_with_prefix + * @covers XMLProcessor::get_attribute_qualified_names_with_prefix */ public function test_get_attribute_names_with_prefix_returns_null_before_finding_tags() { $processor = XMLProcessor::create_from_string( 'Test' ); $this->assertNull( - $processor->get_attribute_names_with_prefix( 'data-' ), + $processor->get_attribute_names_with_prefix( '', 'data-' ), 'Accessing attributes by their prefix did not return null when no tag was selected' ); } /** - * @ticket 61365 * - * @covers XMLProcessor::get_attribute_names_with_prefix + * @covers XMLProcessor::get_attribute_qualified_names_with_prefix */ public function test_get_attribute_names_with_prefix_returns_null_when_not_in_open_tag() { - $processor = XMLProcessor::create_from_string( 'Test' ); - $processor->next_tag( 'p' ); - $this->assertNull( $processor->get_attribute_names_with_prefix( 'data-' ), + $processor = XMLProcessor::create_from_string( 'Test' ); + $processor->next_tag( 'w.org', 'content' ); + $processor->next_token(); + $this->assertNull( $processor->get_attribute_names_with_prefix( '', 'data-' ), 'Accessing attributes of a non-existing tag did not return null' ); } /** - * @ticket 61365 * - * @covers XMLProcessor::get_attribute_names_with_prefix + * @covers XMLProcessor::get_attribute_qualified_names_with_prefix */ public function test_get_attribute_names_with_prefix_returns_null_when_in_closing_tag() { - $processor = XMLProcessor::create_from_string( 'Test' ); - $processor->next_tag( 'wp:content' ); + $processor = XMLProcessor::create_from_string( 'Test' ); + $processor->next_tag( 'w.org', 'content' ); $processor->next_tag( array( 'tag_closers' => 'visit' ) ); - $this->assertNull( $processor->get_attribute_names_with_prefix( 'data-' ), + $this->assertNull( $processor->get_attribute_names_with_prefix( '', 'data-' ), 'Accessing attributes of a closing tag did not return null' ); } /** - * @ticket 61365 * - * @covers XMLProcessor::get_attribute_names_with_prefix + * @covers XMLProcessor::get_attribute_qualified_names_with_prefix */ public function test_get_attribute_names_with_prefix_returns_empty_array_when_no_attributes_present() { $processor = XMLProcessor::create_from_string( 'Test' ); $processor->next_tag( 'wp:content' ); - $this->assertSame( array(), $processor->get_attribute_names_with_prefix( 'data-' ), + $this->assertSame( array(), $processor->get_attribute_names_with_prefix( '', 'data-' ), 'Accessing the attributes on a tag without any did not return an empty array' ); } /** - * @ticket 61365 * - * @covers XMLProcessor::get_attribute_names_with_prefix + * @covers XMLProcessor::get_attribute_qualified_names_with_prefix */ public function test_get_attribute_names_with_prefix_returns_matching_attribute_names_in_original_case() { - $processor = XMLProcessor::create_from_string( 'Test' ); + $processor = XMLProcessor::create_from_string( 'Test' ); $processor->next_tag(); $this->assertSame( - array( 'data-test-ID' ), - $processor->get_attribute_names_with_prefix( 'data-' ), + array( array( '', 'data-test-ID' ) ), + $processor->get_attribute_names_with_prefix( '', 'data-' ), 'Accessing attributes by their prefix did not return their lowercase names' ); } /** - * @ticket 61365 * - * @covers XMLProcessor::get_attribute_names_with_prefix + * @covers XMLProcessor::get_attribute_qualified_names_with_prefix */ public function test_get_attribute_names_with_prefix_returns_attribute_added_by_set_attribute() { - $processor = XMLProcessor::create_from_string( 'Test' ); + $processor = XMLProcessor::create_from_string( 'Test' ); $processor->next_tag(); - $processor->set_attribute( 'data-test-id', '14' ); + $processor->set_attribute( '', 'data-test-id', '14' ); $this->assertSame( - 'Test', + 'Test', $processor->get_updated_xml(), "Updated XML doesn't include attribute added via set_attribute" ); $this->assertSame( - array( 'data-test-id', 'data-foo' ), - $processor->get_attribute_names_with_prefix( 'data-' ), + array( array( '', 'data-test-id' ), array( '', 'data-foo' ) ), + $processor->get_attribute_names_with_prefix( '', 'data-' ), "Accessing attribute names doesn't find attribute added via set_attribute" ); } + public function test_get_attribute_names_with_prefix_with_namespace_and_local_name_prefix() { + // XML with two attributes in the wp namespace and one in no namespace + $xml = ''; + $processor = XMLProcessor::create_from_string( $xml ); + $this->assertTrue( $processor->next_tag(), 'Querying a tag did not return true' ); + + // Should match only the wp:data-foo and wp:data-bar attributes + $result = $processor->get_attribute_names_with_prefix( 'http://wordpress.org/export/1.2/', 'data-' ); + $this->assertSame( + array( + array( 'http://wordpress.org/export/1.2/', 'data-foo' ), + array( 'http://wordpress.org/export/1.2/', 'data-bar' ), + ), + $result, + 'get_attribute_names_with_prefix did not return the expected attributes for namespace and local name prefix' + ); + + // Should match only the no-namespace data-foo attribute + $result_no_ns = $processor->get_attribute_names_with_prefix( null, 'data-' ); + $this->assertSame( + array( + array( '', 'data-foo' ), + ), + $result_no_ns, + 'get_attribute_names_with_prefix did not return the expected attributes for no namespace' + ); + + // Should return empty array for a namespace that does not exist + $result_none = $processor->get_attribute_names_with_prefix( 'http://notfound.org/', 'data-' ); + $this->assertSame( + array(), + $result_none, + 'get_attribute_names_with_prefix did not return empty array for non-existent namespace' + ); + + // Should return empty array for a prefix that does not match + $result_no_prefix = $processor->get_attribute_names_with_prefix( 'http://wordpress.org/export/1.2/', 'not-a-match-' ); + $this->assertSame( + array(), + $result_no_prefix, + 'get_attribute_names_with_prefix did not return empty array for non-matching prefix' + ); + } + /** - * @ticket 61365 * * @covers XMLProcessor::__toString */ public function test_to_string_returns_updated_xml() { - $processor = XMLProcessor::create_from_string( 'Test' ); + $processor = XMLProcessor::create_from_string( 'Test' ); + $processor->next_tag(); $processor->next_tag(); - $processor->remove_attribute( 'id' ); + $processor->remove_attribute( '', 'id' ); $processor->next_tag(); - $processor->set_attribute( 'id', 'wp:content-id-1' ); + $processor->set_attribute( '', 'id', 'wp:content-id-1' ); $this->assertSame( $processor->get_updated_xml(), @@ -458,44 +476,42 @@ public function test_to_string_returns_updated_xml() { } /** - * @ticket 61365 * * @covers XMLProcessor::get_updated_xml */ public function test_get_updated_xml_applies_the_updates_so_far_and_keeps_the_processor_on_the_current_tag() { - $processor = XMLProcessor::create_from_string( 'Test' ); + $processor = XMLProcessor::create_from_string( 'Test' ); $processor->next_tag(); - $processor->remove_attribute( 'id' ); + $processor->remove_attribute( '', 'id' ); $processor->next_tag(); - $processor->set_attribute( 'id', 'wp:content-id-1' ); + $processor->set_attribute( '', 'id', 'content-id-1' ); $this->assertSame( - 'Test', + 'Test', $processor->get_updated_xml(), 'Calling get_updated_xml after updating the attributes of the second tag returned different XML than expected' ); - $processor->set_attribute( 'id', 'wp:content-id-2' ); + $processor->set_attribute( '', 'id', 'content-id-2' ); $this->assertSame( - 'Test', + 'Test', $processor->get_updated_xml(), 'Calling get_updated_xml after updating the attributes of the second tag for the second time returned different XML than expected' ); $processor->next_tag(); - $processor->remove_attribute( 'id' ); + $processor->remove_attribute( '', 'id' ); $this->assertSame( - 'Test', + 'Test', $processor->get_updated_xml(), 'Calling get_updated_xml after removing the id attribute of the third tag returned different XML than expected' ); } /** - * @ticket 61365 * * @covers XMLProcessor::get_updated_xml */ @@ -513,21 +529,20 @@ public function test_get_updated_xml_without_updating_any_attributes_returns_the * Ensures that when seeking to an earlier spot in the document that * all previously-enqueued updates are applied as they ought to be. * - * @ticket 61365 * @expectedIncorrectUsage XMLProcessor::parse_next_attribute */ public function test_get_updated_xml_applies_updates_to_content_after_seeking_to_before_parsed_bytes() { - $processor = XMLProcessor::create_from_string( '' ); + $processor = XMLProcessor::create_from_string( '' ); $processor->next_tag(); - $processor->set_attribute( 'wonky', 'true' ); + $processor->set_attribute( '', 'wonky', 'true' ); $processor->next_tag(); $processor->set_bookmark( 'here' ); $processor->next_tag( array( 'tag_closers' => 'visit' ) ); $processor->seek( 'here' ); - $this->assertSame( '', $processor->get_updated_xml() ); + $this->assertSame( '', $processor->get_updated_xml() ); } public function test_declare_element_as_pcdata() { @@ -545,7 +560,7 @@ public function test_declare_element_as_pcdata() { But! It is all treated as text. '; $processor = XMLProcessor::create_from_string( - "$text" + "$text" ); $processor->declare_element_as_pcdata( 'my-pcdata' ); $processor->next_tag( 'my-pcdata' ); @@ -560,7 +575,6 @@ public function test_declare_element_as_pcdata() { /** * Ensures that bookmarks start and length correctly describe a given token in XML. * - * @ticket 61365 * * @dataProvider data_xml_nth_token_substring * @@ -571,7 +585,7 @@ public function test_declare_element_as_pcdata() { public function test_token_bookmark_span( string $xml, int $match_nth_token, string $expected_match ) { $processor = new class( $xml ) extends XMLProcessor { public function __construct( $xml ) { - parent::__construct( $xml, self::CONSTRUCTOR_UNLOCK_CODE ); + parent::__construct( $xml, [], self::CONSTRUCTOR_UNLOCK_CODE ); } /** @@ -622,34 +636,34 @@ public function get_raw_token() { public static function data_xml_nth_token_substring() { return array( // Tags. - 'DIV start tag' => array( '', 1, '' ), + 'DIV start tag' => array( '', 1, '' ), 'DIV start tag with attributes' => array( - '', + '', 1, - '', + '', ), - 'Nested DIV' => array( '', 2, '' ), - 'Sibling DIV' => array( '', 3, '' ), - 'DIV before text' => array( ' text', 1, '' ), - 'DIV after comment' => array( '', 3, '' ), - 'DIV before comment' => array( ' ', 1, '' ), - 'Start "self-closing" tag' => array( '', 1, '' ), + 'Nested DIV' => array( '', 2, '' ), + 'Sibling DIV' => array( '', 3, '' ), + 'DIV before text' => array( ' text', 1, '' ), + 'DIV after comment' => array( '', 3, '' ), + 'DIV before comment' => array( ' ', 1, '' ), + 'Start "self-closing" tag' => array( '', 1, '' ), 'Void tag' => array( '', 1, '' ), 'Void tag w/self-closing flag' => array( '', 1, '' ), - 'Void tag inside DIV' => array( '', 2, '' ), + 'Void tag inside DIV' => array( '', 2, '' ), // Text. 'Text' => array( 'Just text', 1, 'Just text' ), - 'Text in DIV' => array( 'Text', 2, 'Text' ), - 'Text before DIV' => array( 'Text', 1, 'Text' ), + 'Text in DIV' => array( 'Text', 2, 'Text' ), + 'Text before DIV' => array( 'Text', 1, 'Text' ), 'Text after comment' => array( 'Text', 2, 'Text' ), 'Text before comment' => array( 'Text ', 1, 'Text' ), // Comments. 'Comment' => array( '', 1, '' ), - 'Comment in DIV' => array( '', 2, '' ), - 'Comment before DIV' => array( '', 1, '' ), - 'Comment after DIV' => array( '', 3, '' ), + 'Comment in DIV' => array( '', 2, '' ), + 'Comment before DIV' => array( '', 1, '' ), + 'Comment after DIV' => array( '', 3, '' ), 'Comment after comment' => array( '', 2, '' ), 'Comment before comment' => array( ' ', 1, '' ), 'Empty comment' => array( '', 1, '' ), @@ -657,7 +671,6 @@ public static function data_xml_nth_token_substring() { } /** - * @ticket 61365 * * @covers XMLProcessor::next_tag */ @@ -668,7 +681,6 @@ public function test_next_tag_with_no_arguments_should_find_the_next_existing_ta } /** - * @ticket 61365 * * @covers XMLProcessor::next_tag */ @@ -678,14 +690,77 @@ public function test_next_tag_should_return_false_for_a_non_existing_tag() { $this->assertFalse( $processor->next_tag( 'p' ), 'Querying a non-existing tag did not return false' ); } + + /** + * Data provider for test_next_tag_ns_and_array_equivalence. + * + * Provides XML snippets and tag queries (namespace, local name). + * + * @return array[] + */ + public function data_next_tag_ns_and_array_equivalence() { + return array( + 'no namespace, simple tag' => array( + 'OneTwo', + '', + 'item', + ), + 'with namespace, prefix' => array( + 'Test', + 'http://wordpress.org/export/1.2/', + 'content', + ), + 'with namespace, multiple tags' => array( + '12', + 'urn:foo', + 'baz', + ), + 'no namespace, nested' => array( + '
Inner
', + '', + 'item', + ), + 'with namespace, nested' => array( + '
Inner
', + 'urn:ns', + 'item', + ), + ); + } + + /** + * @dataProvider data_next_tag_ns_and_array_equivalence + * @covers XMLProcessor::next_tag + */ + public function test_next_tag_ns_two_arguments( $xml, $namespace, $local_name ) { + $processor1 = XMLProcessor::create_from_string( $xml ); + $result1 = $processor1->next_tag( $namespace, $local_name ); + $this->assertTrue( $result1, 'next_tag($ns, $tag_name) did not find the tag' ); + $this->assertSame( $local_name, $processor1->get_tag_local_name(), 'next_tag($ns, $tag_name) did not land on correct tag' ); + $this->assertSame( $namespace, $processor1->get_tag_namespace(), 'next_tag($ns, $tag_name) did not land on correct namespace' ); + } + + /** + * @dataProvider data_next_tag_ns_and_array_equivalence + * @covers XMLProcessor::next_tag + */ + public function test_next_tag_array_query( $xml, $namespace, $local_name ) { + // Test using next_tag([$ns, $tag_name]) + $processor2 = XMLProcessor::create_from_string( $xml ); + $result2 = $processor2->next_tag( array( $namespace, $local_name ) ); + $this->assertTrue( $result2, 'next_tag([$ns, $tag_name]) did not find the tag' ); + $this->assertSame( $local_name, $processor2->get_tag_local_name(), 'next_tag([$ns, $tag_name]) did not land on correct tag' ); + $this->assertSame( $namespace, $processor2->get_tag_namespace(), 'next_tag([$ns, $tag_name]) did not land on correct namespace' ); + + } + /** - * @ticket 61365 * * @covers XMLProcessor::get_modifiable_text */ public function test_normalizes_carriage_returns_in_text_nodes() { $processor = XMLProcessor::create_from_string( - "We are\rnormalizing\r\n\nthe\n\r\r\r\ncarriage returns" + "We are\rnormalizing\r\n\nthe\n\r\r\r\ncarriage returns" ); $processor->next_tag(); $processor->next_token(); @@ -697,13 +772,12 @@ public function test_normalizes_carriage_returns_in_text_nodes() { } /** - * @ticket 61365 * * @covers XMLProcessor::get_modifiable_text */ public function test_normalizes_carriage_returns_in_cdata() { $processor = XMLProcessor::create_from_string( - "" + "" ); $processor->next_tag(); $processor->next_token(); @@ -715,17 +789,15 @@ public function test_normalizes_carriage_returns_in_cdata() { } /** - * @ticket 61365 - * @ticket 61365 * * @covers XMLProcessor::next_tag * @covers XMLProcessor::is_tag_closer */ public function test_next_tag_should_not_stop_on_closers() { - $processor = XMLProcessor::create_from_string( '' ); + $processor = XMLProcessor::create_from_string( '' ); - $this->assertTrue( $processor->next_tag( array( 'breadcrumbs' => array( 'wp:content' ) ) ), 'Did not find desired tag opener' ); - $this->assertFalse( $processor->next_tag( array( 'breadcrumbs' => array( 'wp:content' ) ) ), + $this->assertTrue( $processor->next_tag( array( 'breadcrumbs' => array( array( 'w.org', 'content' ) ) ) ), 'Did not find desired tag opener' ); + $this->assertFalse( $processor->next_tag( array( 'breadcrumbs' => array( array( 'w.org', 'content' ) ) ) ), 'Visited an unwanted tag, a tag closer' ); } @@ -733,16 +805,15 @@ public function test_next_tag_should_not_stop_on_closers() { * Verifies that updates to a document before calls to `get_updated_xml()` don't * lead to the Tag Processor jumping to the wrong tag after the updates. * - * @ticket 61365 * * @covers XMLProcessor::get_updated_xml */ public function test_internal_pointer_returns_to_original_spot_after_inserting_content_before_cursor() { - $tags = XMLProcessor::create_from_string( 'outside
inside
' ); + $tags = XMLProcessor::create_from_string( 'outside
inside
' ); $tags->next_tag(); $tags->next_tag(); - $tags->set_attribute( 'wp:post-type', 'foo' ); + $tags->set_attribute( '', 'wp:post-type', 'foo' ); $tags->next_tag( 'section' ); // Return to this spot after moving ahead. @@ -751,14 +822,13 @@ public function test_internal_pointer_returns_to_original_spot_after_inserting_c // Move ahead. $tags->next_tag( 'photo' ); $tags->seek( 'here' ); - $this->assertSame( 'outside
inside
', + $this->assertSame( 'outside
inside
', $tags->get_updated_xml() ); - $this->assertSame( 'section', $tags->get_tag() ); + $this->assertSame( 'section', $tags->get_tag_local_name() ); $this->assertFalse( $tags->is_tag_closer() ); } /** - * @ticket 61365 * * @covers XMLProcessor::set_attribute */ @@ -768,7 +838,7 @@ public function test_set_attribute_on_a_non_existing_tag_does_not_change_the_mar $this->assertFalse( $processor->next_tag( 'p' ), 'Querying a non-existing tag did not return false' ); $this->assertFalse( $processor->next_tag( 'wp:content' ), 'Querying a non-existing tag did not return false' ); - $processor->set_attribute( 'id', 'primary' ); + $processor->set_attribute( '', 'id', 'primary' ); $this->assertSame( self::XML_SIMPLE, @@ -778,7 +848,6 @@ public function test_set_attribute_on_a_non_existing_tag_does_not_change_the_mar } /** - * @ticket 61365 * * @covers XMLProcessor::set_attribute * @covers XMLProcessor::remove_attribute @@ -786,18 +855,18 @@ public function test_set_attribute_on_a_non_existing_tag_does_not_change_the_mar * @covers XMLProcessor::remove_class */ public function test_attribute_ops_on_tag_closer_do_not_change_the_markup() { - $processor = XMLProcessor::create_from_string( '' ); + $processor = XMLProcessor::create_from_string( '' ); $processor->next_token(); $this->assertFalse( $processor->is_tag_closer(), 'Skipped tag opener' ); $processor->next_token(); $this->assertTrue( $processor->is_tag_closer(), 'Skipped tag closer' ); - $this->assertFalse( $processor->set_attribute( 'id', 'test' ), + $this->assertFalse( $processor->set_attribute( '', 'id', 'test' ), "Allowed setting an attribute on a tag closer when it shouldn't have" ); - $this->assertFalse( $processor->remove_attribute( 'invalid-id' ), + $this->assertFalse( $processor->remove_attribute( '', 'invalid-id' ), "Allowed removing an attribute on a tag closer when it shouldn't have" ); $this->assertSame( - '', + '', $processor->get_updated_xml(), 'Calling get_updated_xml after updating a non-existing tag returned an XML that was different from the original XML' ); @@ -805,66 +874,63 @@ public function test_attribute_ops_on_tag_closer_do_not_change_the_markup() { /** - * @ticket 61365 * * @covers XMLProcessor::set_attribute */ public function test_set_attribute_with_a_non_existing_attribute_adds_a_new_attribute_to_the_markup() { $processor = XMLProcessor::create_from_string( self::XML_SIMPLE ); $processor->next_tag(); - $processor->set_attribute( 'test-attribute', 'test-value' ); + $processor->set_attribute( '', 'test-attribute', 'test-value' ); $this->assertSame( - 'Text', + 'Text', $processor->get_updated_xml(), 'Updated XML does not include attribute added via set_attribute()' ); $this->assertSame( 'test-value', - $processor->get_attribute( 'test-attribute' ), + $processor->get_attribute( '', 'test-attribute' ), 'get_attribute() (called after get_updated_xml()) did not return attribute added via set_attribute()' ); } /** - * @ticket 61365 * * @covers XMLProcessor::get_attribute */ public function test_get_attribute_returns_updated_values_before_they_are_applied() { $processor = XMLProcessor::create_from_string( self::XML_SIMPLE ); $processor->next_tag(); - $processor->set_attribute( 'test-attribute', 'test-value' ); + $processor->set_attribute( '', 'test-attribute', 'test-value' ); $this->assertSame( 'test-value', - $processor->get_attribute( 'test-attribute' ), + $processor->get_attribute( '', 'test-attribute' ), 'get_attribute() (called before get_updated_xml()) did not return attribute added via set_attribute()' ); $this->assertSame( - 'Text', + 'Text', $processor->get_updated_xml(), 'Updated XML does not include attribute added via set_attribute()' ); } /** - * @ticket 61365 * * @covers XMLProcessor::get_attribute */ public function test_get_attribute_returns_updated_values_before_they_are_applied_with_different_name_casing() { $processor = XMLProcessor::create_from_string( self::XML_SIMPLE ); $processor->next_tag(); - $processor->set_attribute( 'test-ATTribute', 'test-value' ); + $processor->set_attribute( '', 'test-ATTribute', 'test-value' ); $this->assertSame( 'test-value', - $processor->get_attribute( 'test-ATTribute' ), + $processor->get_attribute( '', 'test-ATTribute' ), 'get_attribute() (called before get_updated_xml()) did not return attribute added via set_attribute()' ); $this->assertSame( - 'Text', + 'Text', $processor->get_updated_xml(), 'Updated XML does not include attribute added via set_attribute()' ); @@ -872,39 +938,37 @@ public function test_get_attribute_returns_updated_values_before_they_are_applie /** - * @ticket 61365 * * @covers XMLProcessor::get_attribute */ public function test_get_attribute_reflects_removed_attribute_before_it_is_applied() { $processor = XMLProcessor::create_from_string( self::XML_SIMPLE ); $processor->next_tag(); - $processor->remove_attribute( 'id' ); + $processor->remove_attribute( '', 'id' ); $this->assertNull( - $processor->get_attribute( 'id' ), + $processor->get_attribute( '', 'id' ), 'get_attribute() (called before get_updated_xml()) returned attribute that was removed by remove_attribute()' ); $this->assertSame( - 'Text', + 'Text', $processor->get_updated_xml(), 'Updated XML includes attribute that was removed by remove_attribute()' ); } /** - * @ticket 61365 * * @covers XMLProcessor::get_attribute */ public function test_get_attribute_reflects_adding_and_then_removing_an_attribute_before_those_updates_are_applied() { $processor = XMLProcessor::create_from_string( self::XML_SIMPLE ); $processor->next_tag(); - $processor->set_attribute( 'test-attribute', 'test-value' ); - $processor->remove_attribute( 'test-attribute' ); + $processor->set_attribute( '', 'test-attribute', 'test-value' ); + $processor->remove_attribute( '', 'test-attribute' ); $this->assertNull( - $processor->get_attribute( 'test-attribute' ), + $processor->get_attribute( '', 'test-attribute' ), 'get_attribute() (called before get_updated_xml()) returned attribute that was added via set_attribute() and then removed by remove_attribute()' ); $this->assertSame( @@ -915,38 +979,36 @@ public function test_get_attribute_reflects_adding_and_then_removing_an_attribut } /** - * @ticket 61365 * * @covers XMLProcessor::get_attribute */ public function test_get_attribute_reflects_setting_and_then_removing_an_existing_attribute_before_those_updates_are_applied() { $processor = XMLProcessor::create_from_string( self::XML_SIMPLE ); $processor->next_tag(); - $processor->set_attribute( 'id', 'test-value' ); - $processor->remove_attribute( 'id' ); + $processor->set_attribute( '', 'id', 'test-value' ); + $processor->remove_attribute( '', 'id' ); $this->assertNull( - $processor->get_attribute( 'id' ), + $processor->get_attribute( '', 'id' ), 'get_attribute() (called before get_updated_xml()) returned attribute that was overwritten by set_attribute() and then removed by remove_attribute()' ); $this->assertSame( - 'Text', + 'Text', $processor->get_updated_xml(), 'Updated XML includes attribute that was overwritten by set_attribute() and then removed by remove_attribute()' ); } /** - * @ticket 61365 * * @covers XMLProcessor::set_attribute */ public function test_set_attribute_with_an_existing_attribute_name_updates_its_value_in_the_markup() { $processor = XMLProcessor::create_from_string( self::XML_SIMPLE ); $processor->next_tag(); - $processor->set_attribute( 'id', 'new-id' ); + $processor->set_attribute( '', 'id', 'new-id' ); $this->assertSame( - 'Text', + 'Text', $processor->get_updated_xml(), 'Existing attribute was not updated' ); @@ -956,22 +1018,20 @@ public function test_set_attribute_with_an_existing_attribute_name_updates_its_v * Ensures that when setting an attribute multiple times that only * one update flushes out into the updated XML. * - * @ticket 61365 * * @covers XMLProcessor::set_attribute */ public function test_set_attribute_with_case_variants_updates_only_the_original_first_copy() { - $processor = XMLProcessor::create_from_string( '' ); + $processor = XMLProcessor::create_from_string( '' ); $processor->next_tag(); - $processor->set_attribute( 'data-enabled', 'canary1' ); - $processor->set_attribute( 'data-enabled', 'canary2' ); - $processor->set_attribute( 'data-enabled', 'canary3' ); + $processor->set_attribute( '', 'data-enabled', 'canary1' ); + $processor->set_attribute( '', 'data-enabled', 'canary2' ); + $processor->set_attribute( '', 'data-enabled', 'canary3' ); - $this->assertSame( '', strtolower( $processor->get_updated_xml() ) ); + $this->assertSame( '', strtolower( $processor->get_updated_xml() ) ); } /** - * @ticket 61365 * * @covers XMLProcessor::next_tag * @covers XMLProcessor::set_attribute @@ -979,42 +1039,40 @@ public function test_set_attribute_with_case_variants_updates_only_the_original_ public function test_next_tag_and_set_attribute_in_a_loop_update_all_tags_in_the_markup() { $processor = XMLProcessor::create_from_string( self::XML_SIMPLE ); while ( $processor->next_tag() ) { - $processor->set_attribute( 'data-foo', 'bar' ); + $processor->set_attribute( '', 'data-foo', 'bar' ); } $this->assertSame( - 'Text', + 'Text', $processor->get_updated_xml(), 'Not all tags were updated when looping with next_tag() and set_attribute()' ); } /** - * @ticket 61365 * * @covers XMLProcessor::remove_attribute */ public function test_remove_attribute_with_an_existing_attribute_name_removes_it_from_the_markup() { $processor = XMLProcessor::create_from_string( self::XML_SIMPLE ); $processor->next_tag(); - $processor->remove_attribute( 'id' ); + $processor->remove_attribute( '', 'id' ); $this->assertSame( - 'Text', + 'Text', $processor->get_updated_xml(), 'Attribute was not removed' ); } /** - * @ticket 61365 * * @covers XMLProcessor::remove_attribute */ public function test_remove_attribute_with_a_non_existing_attribute_name_does_not_change_the_markup() { $processor = XMLProcessor::create_from_string( self::XML_SIMPLE ); $processor->next_tag(); - $processor->remove_attribute( 'no-such-attribute' ); + $processor->remove_attribute( '', 'no-such-attribute' ); $this->assertSame( self::XML_SIMPLE, @@ -1024,37 +1082,35 @@ public function test_remove_attribute_with_a_non_existing_attribute_name_does_no } /** - * @ticket 61365 * * @covers XMLProcessor::next_tag */ public function test_correctly_parses_xml_attributes_wrapped_in_single_quotation_marks() { $processor = XMLProcessor::create_from_string( - 'Text' + 'Text' ); $processor->next_tag( array( - 'breadcrumbs' => array( 'wp:content' ), + 'breadcrumbs' => array( array( 'w.org', 'content' ) ), 'id' => 'first', ) ); - $processor->remove_attribute( 'id' ); + $processor->remove_attribute( '', 'id' ); $processor->next_tag( array( - 'breadcrumbs' => array( 'wp:text' ), + 'breadcrumbs' => array( array( 'w.org', 'text' ) ), 'id' => 'second', ) ); - $processor->set_attribute( 'id', 'single-quote' ); + $processor->set_attribute( '', 'id', 'single-quote' ); $this->assertSame( - 'Text', + 'Text', $processor->get_updated_xml(), 'Did not remove single-quoted attribute' ); } /** - * @ticket 61365 * @expectedIncorrectUsage XMLProcessor::parse_next_attribute * @expectedIncorrectUsage XMLProcessor::set_attribute * @@ -1066,13 +1122,12 @@ public function test_setting_an_attribute_to_false_is_rejected() { ); $processor->next_tag( 'input' ); $this->assertFalse( - $processor->set_attribute( 'checked', false ), + $processor->set_attribute( '', 'checked', false ), 'Accepted a boolean attribute name.' ); } /** - * @ticket 61365 * @expectedIncorrectUsage XMLProcessor::set_attribute * * @covers XMLProcessor::set_attribute @@ -1081,7 +1136,7 @@ public function test_setting_a_missing_attribute_to_false_does_not_change_the_ma $xml_input = '
'; $processor = XMLProcessor::create_from_string( $xml_input ); $processor->next_tag( 'input' ); - $processor->set_attribute( 'checked', false ); + $processor->set_attribute( '', 'checked', false ); $this->assertSame( $xml_input, $processor->get_updated_xml(), @@ -1092,7 +1147,6 @@ public function test_setting_a_missing_attribute_to_false_does_not_change_the_ma /** * Ensures that unclosed and invalid comments trigger warnings or errors. * - * @ticket 61365 * * @covers XMLProcessor::next_tag * @covers XMLProcessor::paused_at_incomplete_token @@ -1106,7 +1160,7 @@ public function test_documents_may_end_with_unclosed_comment( $xml_ending_before $this->assertFalse( $processor->next_tag(), - "Should not have found any tag, but found {$processor->get_tag()}." + "Should not have found any tag, but found {$processor->get_tag_local_name()}." ); $this->assertTrue( @@ -1130,7 +1184,6 @@ public static function data_xml_with_unclosed_comments() { /** * Ensures that partial syntax triggers warnings or errors. * - * @ticket 61365 * * @covers XMLProcessor::next_tag * @covers XMLProcessor::paused_at_incomplete_token @@ -1144,7 +1197,7 @@ public function test_partial_syntax_triggers_parse_error_when_streaming_is_not_u $this->assertFalse( $processor->next_tag(), - "Should not have found any tag, but found {$processor->get_tag()}." + "Should not have found any tag, but found {$processor->get_tag_local_name()}." ); $this->assertFalse( @@ -1174,7 +1227,6 @@ public static function data_partial_syntax() { /** * Ensures that the processor doesn't attempt to match an incomplete token. * - * @ticket 61365 * * @covers XMLProcessor::next_tag * @covers XMLProcessor::paused_at_incomplete_token @@ -1189,7 +1241,7 @@ public function test_next_tag_returns_false_for_incomplete_syntax_elements( $inc $processor->next_tag(); $this->assertFalse( $processor->next_tag(), - "Shouldn't have found any tags but found {$processor->get_tag()}." + "Shouldn't have found any tags but found {$processor->get_tag_local_name()}." ); $this->assertTrue( @@ -1205,26 +1257,25 @@ public function test_next_tag_returns_false_for_incomplete_syntax_elements( $inc */ public static function data_incomplete_syntax_elements() { return array( - 'Incomplete tag name' => array( ' array( ' array( ' array( '