Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add AMP_Script_Sanitizer to replace noscript elements with their contents #1226

Merged
merged 3 commits into from
Jun 26, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions includes/amp-helper-functions.php
Original file line number Diff line number Diff line change
Expand Up @@ -630,6 +630,7 @@ function amp_get_content_sanitizers( $post = null ) {
'carousel_required' => ! current_theme_supports( 'amp' ), // For back-compat.
),
'AMP_Block_Sanitizer' => array(), // Note: Block sanitizer must come after embed / media sanitizers since it's logic is using the already sanitized content.
'AMP_Script_Sanitizer' => array(),
'AMP_Style_Sanitizer' => array(),
'AMP_Tag_And_Attribute_Sanitizer' => array(), // Note: This whitelist sanitizer must come at the end to clean up any remaining issues the other sanitizers didn't catch.
),
Expand Down
1 change: 1 addition & 0 deletions includes/class-amp-autoloader.php
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,7 @@ class AMP_Autoloader {
'AMP_O2_Player_Sanitizer' => 'includes/sanitizers/class-amp-o2-player-sanitizer',
'AMP_Playbuzz_Sanitizer' => 'includes/sanitizers/class-amp-playbuzz-sanitizer',
'AMP_Style_Sanitizer' => 'includes/sanitizers/class-amp-style-sanitizer',
'AMP_Script_Sanitizer' => 'includes/sanitizers/class-amp-script-sanitizer',
'AMP_Embed_Sanitizer' => 'includes/sanitizers/class-amp-embed-sanitizer',
'AMP_Tag_And_Attribute_Sanitizer' => 'includes/sanitizers/class-amp-tag-and-attribute-sanitizer',
'AMP_Video_Sanitizer' => 'includes/sanitizers/class-amp-video-sanitizer',
Expand Down
49 changes: 49 additions & 0 deletions includes/sanitizers/class-amp-script-sanitizer.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
<?php
/**
* Class AMP_Script_Sanitizer
*
* @since 1.0
* @package AMP
*/

/**
* Class AMP_Script_Sanitizer
*
* @since 1.0
*/
class AMP_Script_Sanitizer extends AMP_Base_Sanitizer {

/**
* Sanitize noscript elements.
*
* Eventually this should also handle script elements, if there is a known AMP equivalent.
* If nothing is done with script elements, the whitelist sanitizer will deal with them ultimately.
*
* @todo Eventually this try to automatically convert script tags to AMP when they are recognized. See <https://github.com/Automattic/amp-wp/issues/1032>.
* @todo When a script has an adjacent noscript, consider removing the script here to prevent validation error later. See <https://github.com/Automattic/amp-wp/issues/1213>.
*
* @since 1.0
*/
public function sanitize() {
$noscripts = $this->dom->getElementsByTagName( 'noscript' );

for ( $i = $noscripts->length - 1; $i >= 0; $i-- ) {
$noscript = $noscripts->item( $i );

// Skip AMP boilerplate.
if ( $noscript->firstChild instanceof DOMElement && $noscript->firstChild->hasAttribute( 'amp-boilerplate' ) ) {
continue;
}

$fragment = $this->dom->createDocumentFragment();
$fragment->appendChild( $this->dom->createComment( 'noscript' ) );
while ( $noscript->firstChild ) {
$fragment->appendChild( $noscript->firstChild );
}
$fragment->appendChild( $this->dom->createComment( '/noscript' ) );
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's nice how this outputs the comment <!--noscript-->. This should help with debugging if needed, like on WordPress.org support topics.

$noscript->parentNode->replaceChild( $fragment, $noscript );

$this->did_convert_elements = true;
}
}
}
19 changes: 14 additions & 5 deletions includes/utils/class-amp-dom-utils.php
Original file line number Diff line number Diff line change
Expand Up @@ -82,13 +82,22 @@ public static function get_dom( $document ) {
* When appearing in the head element, a noscript can cause the head to close prematurely
* and the noscript gets moved to the body and anything after it which was in the head.
* See <https://stackoverflow.com/questions/39013102/why-does-noscript-move-into-body-tag-instead-of-head-tag>.
* This is limited to only running in the head element because this is where the problem lies,
* and it is important for the AMP_Script_Sanitizer to be able to access the noscript elements
* in the body otherwise.
*/
$document = preg_replace_callback(
'#<noscript[^>]*>.*?</noscript>#si',
function( $matches ) {
$placeholder = sprintf( '<!--noscript:%s-->', (string) wp_rand() );
AMP_DOM_Utils::$noscript_placeholder_comments[ $placeholder ] = $matches[0];
return $placeholder;
'#^.+?(?=<body)#is',
function( $head_matches ) {
return preg_replace_callback(
'#<noscript[^>]*>.*?</noscript>#si',
function( $noscript_matches ) {
$placeholder = sprintf( '<!--noscript:%s-->', (string) wp_rand() );
AMP_DOM_Utils::$noscript_placeholder_comments[ $placeholder ] = $noscript_matches[0];
return $placeholder;
},
$head_matches[0]
);
},
$document
);
Expand Down
101 changes: 101 additions & 0 deletions tests/test-amp-script-sanitizer.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
<?php
/**
* Test AMP_Script_Sanitizer.
*
* @package AMP
*/

/**
* Test AMP_Script_Sanitizer.
*
* @covers AMP_Script_Sanitizer
*/
class AMP_Script_Sanitizer_Test extends WP_UnitTestCase {

/**
* Data for testing noscript handling.
*
* @return array
*/
public function get_noscript_data() {
return array(
'document_write' => array(
'Has script? <script>document.write("Yep!")</script><noscript>Nope!</noscript>',
'Has script? <!--noscript-->Nope!<!--/noscript-->',
),
'nested_elements' => array(
'<noscript>before <em><strong>middle</strong> end</em></noscript>',
'<!--noscript-->before <em><strong>middle</strong> end</em><!--/noscript-->',
),
);
}

/**
* Test that noscript elements get replaced with their children.
*
* @dataProvider get_noscript_data
* @param string $source Source.
* @param string $expected Expected.
* @covers AMP_Script_Sanitizer::sanitize()
*/
public function test_noscript_promotion( $source, $expected = null ) {
$dom = AMP_DOM_Utils::get_dom_from_content( $source );
$this->assertSame( 1, $dom->getElementsByTagName( 'noscript' )->length );
$sanitizer = new AMP_Script_Sanitizer( $dom );
$sanitizer->sanitize();
$whitelist_sanitizer = new AMP_Tag_And_Attribute_Sanitizer( $dom );
$whitelist_sanitizer->sanitize();
$content = AMP_DOM_Utils::get_content_from_dom( $dom );
$this->assertEquals( $expected, $content );
}

/**
* Test style[amp-boilerplate] preservation.
*/
public function test_boilerplate_preservation() {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nice thorough testing of the boilerplate not being removed.

ob_start();
?>
<!doctype html>
<html amp>
<head>
<meta charset="utf-8">
<link rel="canonical" href="self.html" />
<meta name="viewport" content="width=device-width,minimum-scale=1">
<style amp-boilerplate>body{-webkit-animation:-amp-start 8s steps(1,end) 0s 1 normal both;-moz-animation:-amp-start 8s steps(1,end) 0s 1 normal both;-ms-animation:-amp-start 8s steps(1,end) 0s 1 normal both;animation:-amp-start 8s steps(1,end) 0s 1 normal both}@-webkit-keyframes -amp-start{from{visibility:hidden}to{visibility:visible}}@-moz-keyframes -amp-start{from{visibility:hidden}to{visibility:visible}}@-ms-keyframes -amp-start{from{visibility:hidden}to{visibility:visible}}@-o-keyframes -amp-start{from{visibility:hidden}to{visibility:visible}}@keyframes -amp-start{from{visibility:hidden}to{visibility:visible}}</style><noscript><style amp-boilerplate>body{-webkit-animation:none;-moz-animation:none;-ms-animation:none;animation:none}</style></noscript>
<script async src="https://cdn.ampproject.org/v0.js"></script><?php // phpcs:ignore WordPress.WP.EnqueuedResources.NonEnqueuedScript ?>

<!-- Google Tag Manager -->
<script>(function(w,d,s,l,i){w[l]=w[l]||[];w[l].push({'gtm.start':
new Date().getTime(),event:'gtm.js'});var f=d.getElementsByTagName(s)[0],
j=d.createElement(s),dl=l!='dataLayer'?'&l='+l:'';j.async=true;j.src=
'https://www.googletagmanager.com/gtm.js?id='+i+dl;f.parentNode.insertBefore(j,f);
})(window,document,'script','dataLayer','GTM-XXXX');</script>
<!-- End Google Tag Manager -->
</head>
<body>
<!-- Google Tag Manager (noscript) -->
<noscript><iframe src="//www.googletagmanager.com/ns.html?id=GTM-XXXX"
height="0" width="0" style="display:none;visibility:hidden"></iframe></noscript>
<!-- End Google Tag Manager (noscript) -->

Hello, AMP world.
Has script? <script>document.write("Yep!")</script><noscript>Nope!</noscript>
</body>
</html>
<?php
$html = ob_get_clean();
$args = array(
'use_document_element' => true,
);

$dom = AMP_DOM_Utils::get_dom( $html );
AMP_Content_Sanitizer::sanitize_document( $dom, amp_get_content_sanitizers(), $args );

$content = AMP_DOM_Utils::get_content_from_dom_node( $dom, $dom->documentElement );

$this->assertContains( '<!-- Google Tag Manager --><!-- End Google Tag Manager -->', $content );
$this->assertContains( '<noscript><style amp-boilerplate>body{-webkit-animation:none;-moz-animation:none;-ms-animation:none;animation:none}</style></noscript>', $content );
$this->assertContains( 'Has script? <!--noscript-->Nope!<!--/noscript-->', $content );
$this->assertContains( '<!--noscript--><amp-iframe src="https://www.googletagmanager.com/ns.html?id=GTM-XXXX" height="400" sandbox="allow-scripts allow-same-origin" layout="fixed-height" class="amp-wp-b3bfe1b"><div placeholder="" class="amp-wp-iframe-placeholder"></div></amp-iframe><!--/noscript-->', $content );
}
}
1 change: 0 additions & 1 deletion tests/test-amp-style-sanitizer.php
Original file line number Diff line number Diff line change
Expand Up @@ -732,7 +732,6 @@ public function test_class_amp_bind_preservation() {
<style>.sidebar2{ visibility:hidden }</style>
<style>.sidebar2.visible { display:block }</style>
<style>.nothing { visibility:hidden; }</style>
</style>
</head>
<body>
<amp-state id="mySidebar">
Expand Down
1 change: 1 addition & 0 deletions tests/test-class-amp-theme-support.php
Original file line number Diff line number Diff line change
Expand Up @@ -1140,6 +1140,7 @@ public function test_prepare_response() {
$this->assertContains( '<meta charset="' . get_bloginfo( 'charset' ) . '">', $sanitized_html );
$this->assertContains( '<meta name="viewport" content="width=device-width,minimum-scale=1">', $sanitized_html );
$this->assertContains( '<style amp-boilerplate>', $sanitized_html );
$this->assertContains( '<noscript><style amp-boilerplate>', $sanitized_html );
$this->assertRegExp( '#<style amp-custom>.*?body{background:black}.*?</style>#s', $sanitized_html );
$this->assertContains( '<script type="text/javascript" src="https://cdn.ampproject.org/v0.js" async></script>', $sanitized_html ); // phpcs:ignore WordPress.WP.EnqueuedResources.NonEnqueuedScript
$this->assertContains( '<script type="text/javascript" src="https://cdn.ampproject.org/v0/amp-list-latest.js" async custom-element="amp-list"></script>', $sanitized_html ); // phpcs:ignore WordPress.WP.EnqueuedResources.NonEnqueuedScript
Expand Down