Skip to content
Permalink
Browse files

#43590: Use robots meta tag to better discourage search engines.

This changes the "discourage search engines" option to output a `noindex, nofollow` robots meta tag. `Disallow: /` is removed from the `robots.txt` to allow search engines to discover they are requested not to index the site.

Disallowing search engines from accessing a site in the `robots.txt` file can result in search engines listing a site with a fragment (a listing without content).

Props donmhico, jonoaldersonwp.
Fixes #43590.



git-svn-id: https://develop.svn.wordpress.org/trunk@45928 602fd350-edb4-49c9-b593-d223f7449a82
  • Loading branch information...
peterwilsoncc committed Sep 2, 2019
1 parent d0db5be commit 122cb2864bd9699375782aec1f6c62783bd0c1bd
Showing with 30 additions and 10 deletions.
  1. +7 −8 src/wp-includes/functions.php
  2. +8 −2 src/wp-includes/general-template.php
  3. +15 −0 tests/phpunit/tests/general/template.php
@@ -1582,6 +1582,8 @@ function do_feed_atom( $for_comments ) {
* Displays the default robots.txt file content.
*
* @since 2.1.0
* @since 5.3.0 Remove the "Disallow: /" output if search engine visiblity is
* discouraged in favor of robots meta HTML tag in wp_no_robots().
*/
function do_robots() {
header( 'Content-Type: text/plain; charset=utf-8' );
@@ -1595,14 +1597,11 @@ function do_robots() {
$output = "User-agent: *\n";
$public = get_option( 'blog_public' );
if ( '0' == $public ) {
$output .= "Disallow: /\n";
} else {
$site_url = parse_url( site_url() );
$path = ( ! empty( $site_url['path'] ) ) ? $site_url['path'] : '';
$output .= "Disallow: $path/wp-admin/\n";
$output .= "Allow: $path/wp-admin/admin-ajax.php\n";
}
$site_url = parse_url( site_url() );
$path = ( ! empty( $site_url['path'] ) ) ? $site_url['path'] : '';
$output .= "Disallow: $path/wp-admin/\n";
$output .= "Allow: $path/wp-admin/admin-ajax.php\n";
/**
* Filters the robots.txt output.
@@ -2986,12 +2986,18 @@ function noindex() {
* Display a noindex meta tag.
*
* Outputs a noindex meta tag that tells web robots not to index the page content.
* Typical usage is as a wp_head callback. add_action( 'wp_head', 'wp_no_robots' );
* Typical usage is as a {@see 'wp_head'} callback. add_action( 'wp_head', 'wp_no_robots' );
*
* @since 3.3.0
* @since 5.3.0 Echo "noindex,nofollow" if search engine visibility is discouraged.
*/
function wp_no_robots() {
echo "<meta name='robots' content='noindex,follow' />\n";
if ( get_option( 'blog_public' ) ) {
echo "<meta name='robots' content='noindex,follow' />\n";
return;
}
echo "<meta name='robots' content='noindex,nofollow' />\n";
}
/**
@@ -612,4 +612,19 @@ function test_get_custom_logo_preserves_switched_state() {
$this->assertSame( $expected, $result );
}
/**
* @ticket 43590
*/
function test_wp_no_robots() {
// Simulate private site (search engines discouraged).
update_option( 'blog_public', '0' );
$actual_private = get_echo( 'wp_no_robots' );
$this->assertSame( "<meta name='robots' content='noindex,nofollow' />\n", $actual_private );
// Simulate public site.
update_option( 'blog_public', '1' );
$actual_public = get_echo( 'wp_no_robots' );
$this->assertSame( "<meta name='robots' content='noindex,follow' />\n", $actual_public );
}
}

0 comments on commit 122cb28

Please sign in to comment.
You can’t perform that action at this time.