diff --git a/modules/aioseop_sitemap.php b/modules/aioseop_sitemap.php index e18e0794a..86e31bcea 100644 --- a/modules/aioseop_sitemap.php +++ b/modules/aioseop_sitemap.php @@ -706,9 +706,11 @@ function filter_display_options( $options ) { $options[ $this->prefix . 'max_posts' ] = 50000; } $url = aioseop_home_url( '/' . $this->get_filename() . '.xml' ); + $url_rss = aioseop_home_url( '/' . $this->get_filename() . '.rss' ); - $options[ $this->prefix . 'link' ] = sprintf( __( 'Click here to %s.', 'all-in-one-seo-pack' ), '' . __( 'view your sitemap', 'all-in-one-seo-pack' ) . '' ); + $options[ $this->prefix . 'link' ] = sprintf( __( 'Click here to %s.', 'all-in-one-seo-pack' ), '' . __( 'view your XML sitemap', 'all-in-one-seo-pack' ) . '' ); $options[ $this->prefix . 'link' ] .= __( ' Your sitemap has been created with content and images.', 'all-in-one-seo-pack' ); + $options[ $this->prefix . 'link' ] .= '

' . sprintf( __( 'Click here to %1$sview your RSS sitemap%2$s.', 'all-in-one-seo-pack' ), '', '' ) . '

'; if ( '0' !== get_option( 'blog_public' ) ) { $options[ $this->prefix . 'link' ] .= ' ' . __( 'Changes are automatically submitted to search engines.', 'all-in-one-seo-pack' ); } @@ -1159,9 +1161,12 @@ function setup_rewrites() { function get_rewrite_rules() { $sitemap_rules_normal = $sitemap_rules_gzipped = array(); $sitemap_rules_normal = array( + $this->get_filename() . '.xml' => "index.php?{$this->prefix}path=root", $this->get_filename() . '_(.+)_(\d+).xml' => 'index.php?' . $this->prefix . 'path=$matches[1]&' . $this->prefix . 'page=$matches[2]', $this->get_filename() . '_(.+).xml' => 'index.php?' . $this->prefix . 'path=$matches[1]', + $this->get_filename() . '.rss' => 'index.php?' . $this->prefix . 'path=rss', + $this->get_filename() . 'latest.rss' => 'index.php?' . $this->prefix . 'path=rss_latest', ); if ( $this->options[ "{$this->prefix}gzipped" ] ) { $sitemap_rules_gzipped = array( @@ -1200,10 +1205,12 @@ function flush_rules_hook() { $sitemap_rules = $this->get_rewrite_rules( $wp_rewrite ); if ( ! empty( $sitemap_rules ) ) { $rules = get_option( 'rewrite_rules' ); - $rule = key( $sitemap_rules ); - if ( ! isset( $rules[ $rule ] ) || ( $rules[ $rule ] !== $sitemap_rules[ $rule ] ) ) { - $wp_rewrite->flush_rules(); - set_transient( "{$this->prefix}rules_flushed", true, 43200 ); + $new_rules = array_keys( $sitemap_rules ); + foreach ( $new_rules as $rule ) { + if ( ! isset( $rules[ $rule ] ) || ( $rules[ $rule ] !== $sitemap_rules[ $rule ] ) ) { + $wp_rewrite->flush_rules(); + set_transient( "{$this->prefix}rules_flushed", true, 43200 ); + } } } } @@ -1325,8 +1332,11 @@ function make_dynamic_xsl() { */ function get_sitemap_data( $sitemap_type, $page = 0 ) { $sitemap_data = array(); - if ( $this->options[ "{$this->prefix}indexes" ] ) { - $posttypes = $this->options[ "{$this->prefix}posttypes" ]; + + if ( 0 === strpos( $sitemap_type, 'rss' ) ) { + $sitemap_data = $this->get_simple_sitemap(); + } elseif ( $this->options[ "{$this->prefix}indexes" ] ) { + $posttypes = $this->options["{$this->prefix}posttypes"]; if ( empty( $posttypes ) ) { $posttypes = array(); } @@ -1467,8 +1477,11 @@ function do_sitemaps( $message = '' ) { $comment = sprintf( "file '%s' statically", $this->get_filename() ); $sitemap = $this->do_simple_sitemap( $comment ); + $this->write_sitemaps( $this->get_filename(), $sitemap ); - $this->log_stats( 'root', $this->options["{$this->prefix}gzipped"], false ); + $rss = $this->do_simple_sitemap_rss( $comment ); + $this->write_sitemaps( $this->get_filename(), $rss, '.rss' ); + $this->log_stats( 'root', $this->options[ "{$this->prefix}gzipped" ], false ); } } else { delete_transient( "{$this->prefix}rules_flushed" ); @@ -1502,10 +1515,10 @@ function add_xml_mime_type( $mime ) { * @param $filename * @param $contents */ - function write_sitemaps( $filename, $contents ) { - $this->write_sitemap( $filename . '.xml', $contents ); - if ( $this->options[ "{$this->prefix}gzipped" ] ) { - $this->write_sitemap( $filename . '.xml.gz', $contents, true ); + function write_sitemaps( $filename, $contents, $extn = '.xml' ) { + $this->write_sitemap( $filename . $extn, $contents ); + if ( $this->options["{$this->prefix}gzipped"] ) { + $this->write_sitemap( $filename . $extn . '.gz', $contents, true ); } } @@ -1769,10 +1782,17 @@ function get_sitemap_index_filenames() { */ function do_build_sitemap( $sitemap_type, $page = 0, $filename = '', $comment = '' ) { if ( empty( $filename ) ) { - if ( 'root' === $sitemap_type ) { - $filename = $this->get_filename(); - } else { - $filename = $this->get_filename() . '_' . $sitemap_type; + switch ( $sitemap_type ) { + case 'root': + // fall-through. + case 'rss': + // fall-through. + case 'rss_latest': + $filename = $this->get_filename(); + break; + default: + $filename = $this->get_filename() . '_' . $sitemap_type; + break; } } if ( empty( $comment ) ) { @@ -1782,7 +1802,7 @@ function do_build_sitemap( $sitemap_type, $page = 0, $filename = '', $comment = if ( ( 'root' === $sitemap_type ) && ! empty( $this->options[ "{$this->prefix}indexes" ] ) ) { return $this->build_sitemap_index( $sitemap_data, sprintf( $comment, $filename ) ); } else { - return $this->build_sitemap( $sitemap_data, sprintf( $comment, $filename ) ); + return $this->build_sitemap( $sitemap_data, $sitemap_type, sprintf( $comment, $filename ) ); } } @@ -2001,7 +2021,21 @@ function do_simple_sitemap( $comment = '' ) { $sitemap_data = $this->get_simple_sitemap(); $sitemap_data = apply_filters( $this->prefix . 'data', $sitemap_data, 'root', 0, $this->options ); - return $this->build_sitemap( $sitemap_data, $comment ); + return $this->build_sitemap( $sitemap_data, '', $comment ); + } + + /** + * Build a single stand-alone RSS sitemap without indexes. + * + * @param string $comment + * + * @return string + */ + function do_simple_sitemap_rss( $comment = '' ) { + $sitemap_data = $this->get_simple_sitemap(); + $sitemap_data = apply_filters( $this->prefix . 'data', $sitemap_data, 'rss', 0, $this->options ); + + return $this->build_sitemap( $sitemap_data, 'rss', $comment ); } /** @@ -2020,15 +2054,83 @@ function get_sitemap_xsl() { return esc_url( apply_filters( 'aioseop_sitemap_xsl_url', aioseop_home_url( '/sitemap.xsl' ) ) ); } + /** + * Output the RSS for a sitemap, full or latest. + * + * @param $urls + * @param string $sitemap_type The type of RSS sitemap viz. rss or rss_latest. + * @param string $comment + */ + private function output_rss( $urls, $sitemap_type, $comment ) { + echo '' . "\r\n\r\n"; + echo '\r\n"; + + echo ''; + if ( is_multisite() ) { + echo '' . esc_html( get_blog_option( get_current_blog_id(), 'blogname' ) ) . ' + ' . esc_url( get_blog_option( get_current_blog_id(), 'siteurl' ) ) . ' + ' . esc_html( get_blog_option( get_current_blog_id(), 'blogdescription' ) ) . ''; + } else { + echo '' . esc_html( get_option( 'blogname' ) ) . ' + ' . esc_url( get_option( 'siteurl' ) ) . ' + ' . esc_html( get_option( 'blogdescription' ) ) . ''; + } + + // remove urls that do not have the rss element. + $urls = array_filter( $urls, array( $this, 'include_in_rss' ) ); + + if ( false !== strpos( $sitemap_type, 'latest' ) ) { + // let's sort the array in descending order of date. + uasort( $urls, array( $this, 'sort_modifed_date_descending' ) ); + $urls = array_slice( $urls, 0, apply_filters( $this->prefix . 'rss_latest_limit', 20 ) ); + } + + foreach ( $urls as $url ) { + echo + ' + ' . esc_url( $url['loc'] ) . ' + ' . esc_html( $url['rss']['title'] ) . ' + ' . esc_url( $url['loc'] ) . ' + + ' . esc_html( $url['rss']['pubDate'] ) . ' + '; + } + echo ''; + } + + /** + * Remove elements not containing the rss element. + */ + public function include_in_rss( $array ) { + return isset( $array['rss'] ); + } + + /** + * Sort on the basis of modified date. + */ + public function sort_modifed_date_descending( $array1, $array2 ) { + if ( ! isset( $array1['rss'] ) || ! isset( $array2['rss'] ) ) { + return 0; + } + return $array1['rss']['timestamp'] < $array2['rss']['timestamp']; + } + /** * Output the XML for a sitemap. * * @param $urls + * @param string $sitemap_type The type of sitemap viz. root, rss, rss_latest etc.. For static sitemaps, this would be empty. * @param string $comment * * @return null */ - function output_sitemap( $urls, $comment = '' ) { + private function output_sitemap( $urls, $sitemap_type, $comment = '' ) { + if ( 0 === strpos( $sitemap_type, 'rss' ) ) { + // starts with rss. + $this->output_rss( $urls, $sitemap_type, $comment ); + return; + } + $max_items = 50000; if ( ! is_array( $urls ) ) { return null; @@ -2073,6 +2175,9 @@ function output_sitemap( $urls, $comment = '' ) { foreach ( $urls as $url ) { echo "\t\r\n"; if ( is_array( $url ) ) { + if ( isset( $url['rss'] ) ) { + unset( $url['rss'] ); + } foreach ( $url as $k => $v ) { if ( ! empty( $v ) ) { if ( 'loc' === $k ) { @@ -2179,13 +2284,14 @@ function build_sitemap_index( $urls, $comment = '' ) { * Return an XML sitemap as a string. * * @param $urls + * @param string $sitemap_type The type of sitemap viz. root, rss, rss_latest etc.. For static sitemaps, this would be empty. * @param string $comment * * @return string */ - function build_sitemap( $urls, $comment = '' ) { + function build_sitemap( $urls, $sitemap_type, $comment = '' ) { ob_start(); - $this->output_sitemap( $urls, $comment ); + $this->output_sitemap( $urls, $sitemap_type, $comment ); return ob_get_clean(); } @@ -2217,6 +2323,11 @@ function get_term_priority_data( $terms ) { } $pr_info['image:image'] = $this->get_images_from_term( $term ); + $pr_info['rss'] = array( + 'title' => $term->name, + 'description' => $term->description, + 'pubDate' => $this->get_date_for_term( $term ), + ); $prio[] = $pr_info; } } @@ -2224,6 +2335,37 @@ function get_term_priority_data( $terms ) { return $prio; } + /** + * Return the date of the latest post in the given taxonomy term. + * + * @param WP_Term $term The taxonomy term. + * + * @return string + */ + private function get_date_for_term( $term ) { + $date = ''; + $query = new WP_Query( array( + 'orderby' => 'post_date', + 'order' => 'DESC', + 'numberposts' => 1, + 'post_type' => 'any', + 'post_status' => 'publish', + 'tax_query' => array( + array( + 'taxonomy' => $term->taxonomy, + 'terms' => $term->term_id, + ), + ), + ) ); + + if ( $query->have_posts() ) { + $timestamp = mysql2date( 'U', $query->post->post_modified_gmt ); + $date = date( 'r', $timestamp ); + } + + return $date; + } + /** * Return a list of permalinks for an array of terms. * @@ -2538,11 +2680,12 @@ function get_date_archive_prio_from_posts( $posts ) { } if ( ! empty( $archives ) ) { - return $this->get_prio_from_posts( - $archives, $this->get_default_priority( 'archive', true ), $this->get_default_frequency( 'archive', true ), array( + return $this->get_prio_from_posts( $archives, $this->get_default_priority( 'archive', true ), $this->get_default_frequency( 'archive', true ), + array( $this, 'get_date_archive_link_from_post', - ) + ), + 'archive' ); } @@ -2644,11 +2787,12 @@ function get_author_prio_from_posts( $posts ) { } } - return $this->get_prio_from_posts( - $authors, $this->get_default_priority( 'author', true ), $this->get_default_frequency( 'author', true ), array( + return $this->get_prio_from_posts( $authors, $this->get_default_priority( 'author', true ), $this->get_default_frequency( 'author', true ), + array( $this, 'get_author_link_from_post', - ) + ), + 'author' ); } @@ -2707,10 +2851,11 @@ function get_comment_count_stats( $posts ) { * @param bool $prio_override * @param bool $freq_override * @param string $linkfunc + * @param string $type Type of entity being fetched viz. author, post etc. * * @return array */ - private function get_prio_from_posts( $posts, $prio_override = false, $freq_override = false, $linkfunc = 'get_permalink' ) { + function get_prio_from_posts( $posts, $prio_override = false, $freq_override = false, $linkfunc = 'get_permalink', $type = 'post' ) { $prio = array(); $args = array( 'prio_override' => $prio_override, @@ -2723,10 +2868,11 @@ private function get_prio_from_posts( $posts, $prio_override = false, $freq_over $stats = $this->get_comment_count_stats( $posts ); } if ( is_array( $posts ) ) { - foreach ( $posts as $post ) { + foreach ( $posts as $key => $post ) { // Determine if we check the post for images. $is_single = true; $post->filter = 'sample'; + $timestamp = null; if ( 'get_permalink' === $linkfunc ) { $url = $this->get_permalink( $post ); } else { @@ -2744,6 +2890,7 @@ private function get_prio_from_posts( $posts, $prio_override = false, $freq_over $date = $post->post_date_gmt; } if ( '0000-00-00 00:00:00' !== $date ) { + $timestamp = $date; $date = date( 'Y-m-d\TH:i:s\Z', mysql2date( 'U', $date ) ); } else { $date = 0; @@ -2783,6 +2930,31 @@ private function get_prio_from_posts( $posts, $prio_override = false, $freq_over if ( is_float( $pr_info['priority'] ) ) { $pr_info['priority'] = sprintf( '%0.1F', $pr_info['priority'] ); } + + // add the rss specific data. + if ( $timestamp ) { + $title = null; + switch ( $type ) { + case 'author': + $title = get_the_author_meta( 'display_name', $key ); + break; + default: + $title = get_the_title( $post ); + break; + } + + // RSS expects the GMT date. + $timestamp = mysql2date( 'U', $post->post_modified_gmt ); + $pr_info['rss'] = array( + 'title' => $title, + 'description' => $this->get_the_excerpt( $post ), + 'pubDate' => date( 'r', $timestamp ), + 'timestamp' => $timestamp, + 'post_type' => $post->post_type, + ); + } + + $pr_info['image:image'] = $is_single ? $this->get_images_from_post( $post ) : null; $pr_info = apply_filters( $this->prefix . 'prio_item_filter', $pr_info, $post, $args ); if ( ! empty( $pr_info ) ) { @@ -2794,6 +2966,30 @@ private function get_prio_from_posts( $posts, $prio_override = false, $freq_over return $prio; } + /** + * Return the excerpt of the given post. + * + * @param WP_Post $post The post object. + * + * @return string + */ + private function get_the_excerpt( $post ) { + global $wp_version; + if ( has_excerpt( $post->ID ) ) { + if ( version_compare( $wp_version, '4.5.0', '>=' ) ) { + return get_the_excerpt( $post ); + } + + $text = strip_shortcodes( $post->post_content ); + $text = apply_filters( 'the_content', $text ); + $text = str_replace( ']]>', ']]>', $text ); + $excerpt_length = apply_filters( 'excerpt_length', 55 ); + $excerpt_more = apply_filters( 'excerpt_more', '[…]' ); + return wp_trim_words( $text, $excerpt_length, $excerpt_more ); + } + return ''; + } + /** * Return the images attached to the term. * @@ -3453,5 +3649,4 @@ function get_all_post_type_data( $args ) { return $posts; } } -} - +} \ No newline at end of file diff --git a/tests/modules/sitemap/test-sitemap.php b/tests/modules/sitemap/test-sitemap.php index f24d550c1..de23968e9 100644 --- a/tests/modules/sitemap/test-sitemap.php +++ b/tests/modules/sitemap/test-sitemap.php @@ -140,6 +140,43 @@ public function test_exclude_images() { ) ); } + + /** + * Test the generated RSS file for the sitemap. + * + * @ticket 561 XML Sitemap module - Add support for RSS/Atom updates. + */ + public function test_rss() { + $posts = $this->setup_posts( 2 ); + + $custom_options = array(); + $custom_options['aiosp_sitemap_indexes'] = ''; + $custom_options['aiosp_sitemap_images'] = 'on'; + $custom_options['aiosp_sitemap_gzipped'] = ''; + $custom_options['aiosp_sitemap_posttypes'] = array( 'post' ); + + $this->_setup_options( 'sitemap', $custom_options ); + + $this->validate_sitemap( + array( + $posts['without'][0] => true, + $posts['without'][1] => true, + ) + ); + + $rss = ABSPATH . '/sitemap.rss'; + $this->assertFileExists( $rss ); + + libxml_use_internal_errors(true); + $dom = new DOMDocument(); + $dom->load( $rss ); + $content = file_get_contents( $rss ); + + $this->assertTrue( $dom->schemaValidate( AIOSEOP_UNIT_TESTING_DIR . '/resources/xsd/rss.xsd' ) ); + $this->assertContains( $posts['without'][0], $content ); + $this->assertContains( $posts['without'][1], $content ); + } + /** * Don't include content from trashed pages. @@ -851,4 +888,5 @@ public function invalidExternalPagesProvider() { ), ); } -} \ No newline at end of file +} + diff --git a/tests/resources/xsd/rss.xsd b/tests/resources/xsd/rss.xsd new file mode 100644 index 000000000..d7ddaee60 --- /dev/null +++ b/tests/resources/xsd/rss.xsd @@ -0,0 +1,500 @@ + + + + + XML Schema for RSS v2.0 feed files. + Project home: http://www.codeplex.com/rss2schema/ + Based on the RSS 2.0 specification document at http://cyber.law.harvard.edu/rss/rss.html + Author: Jorgen Thelin + Revision: 16 + Date: 01-Nov-2008 + Feedback to: http://www.codeplex.com/rss2schema/WorkItem/List.aspx + + + + + + + + + + + + + + An item may represent a "story" -- much like a story in a newspaper or magazine; if so its description is a synopsis of the story, and the link points to the full story. An item may also be complete in itself, if so, the description contains the text (entity-encoded HTML is allowed), and the link and title may be omitted. + + + + + + The title of the item. + + + + + The item synopsis. + + + + + The URL of the item. + + + + + Email address of the author of the item. + + + + + Includes the item in one or more categories. + + + + + URL of a page for comments relating to the item. + + + + + Describes a media object that is attached to the item. + + + + + guid or permalink URL for this entry + + + + + Indicates when the item was published. + + + + + The RSS channel that the item came from. + + + + + Extensibility element. + + + + + + + + + + + + The name of the channel. It's how people refer to your service. If you have an HTML website that contains the same information as your RSS file, the title of your channel should be the same as the title of your website. + + + + + The URL to the HTML website corresponding to the channel. + + + + + Phrase or sentence describing the channel. + + + + + The language the channel is written in. This allows aggregators to group all Italian language sites, for example, on a single page. A list of allowable values for this element, as provided by Netscape, is here. You may also use values defined by the W3C. + + + + + Copyright notice for content in the channel. + + + + + Email address for person responsible for editorial content. + + + + + Email address for person responsible for technical issues relating to channel. + + + + + The publication date for the content in the channel. All date-times in RSS conform to the Date and Time Specification of RFC 822, with the exception that the year may be expressed with two characters or four characters (four preferred). + + + + + The last time the content of the channel changed. + + + + + Specify one or more categories that the channel belongs to. + + + + + A string indicating the program used to generate the channel. + + + + + A URL that points to the documentation for the format used in the RSS file. It's probably a pointer to this page. It's for people who might stumble across an RSS file on a Web server 25 years from now and wonder what it is. + + + + + Allows processes to register with a cloud to be notified of updates to the channel, implementing a lightweight publish-subscribe protocol for RSS feeds. + + + + + ttl stands for time to live. It's a number of minutes that indicates how long a channel can be cached before refreshing from the source. + + + + + Specifies a GIF, JPEG or PNG image that can be displayed with the channel. + + + + + The PICS rating for the channel. + + + + + Specifies a text input box that can be displayed with the channel. + + + + + A hint for aggregators telling them which hours they can skip. + + + + + A hint for aggregators telling them which days they can skip. + + + + + Extensibility element. + + + + + + + + + Extensibility element. + + + + + + + + A time in GMT when aggregators should not request the channel data. The hour beginning at midnight is hour zero. + + + + + + + + + + + + + + A day when aggregators should not request the channel data. + + + + + + + + + + + + + + + + A time in GMT, when aggregators should not request the channel data. The hour beginning at midnight is hour zero. + + + + + + + + + + + + + + + + The URL of the image file. + + + + + Describes the image, it's used in the ALT attribute of the HTML <img> tag when the channel is rendered in HTML. + + + + + The URL of the site, when the channel is rendered, the image is a link to the site. (Note, in practice the image <title> and <link> should have the same value as the channel's <title> and <link>. + + + + + The width of the image in pixels. + + + + + The height of the image in pixels. + + + + + Text that is included in the TITLE attribute of the link formed around the image in the HTML rendering. + + + + + + + The height of the image in pixels. + + + + + + + + The width of the image in pixels. + + + + + + + + Specifies a web service that supports the rssCloud interface which can be implemented in HTTP-POST, XML-RPC or SOAP 1.1. Its purpose is to allow processes to register with a cloud to be notified of updates to the channel, implementing a lightweight publish-subscribe protocol for RSS feeds. + + + + + + + + + + + + + + + + + The purpose of this element is something of a mystery! You can use it to specify a search engine box. Or to allow a reader to provide feedback. Most aggregators ignore it. + + + + + The label of the Submit button in the text input area. + + + + + Explains the text input area. + + + + + The name of the text object in the text input area. + + + + + The URL of the CGI script that processes text input requests. + + + + + + + Using the regexp definiton of E-Mail Address by Lucadean from the .NET RegExp Pattern Repository at http://www.3leaf.com/default/NetRegExpRepository.aspx + + + + + + + + A date-time displayed in RFC-822 format. + Using the regexp definiton of rfc-822 date by Sam Ruby at http://www.intertwingly.net/blog/1360.html + + + + + + + + + + + + + + + + + + URL where the enclosure is located + + + + + Size in bytes + + + + + MIME media-type of the enclosure + + + + + + + + + + + + + + + + + +