Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improve URL handling in Optimization Detective #1043

Merged
merged 5 commits into from Mar 19, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
17 changes: 17 additions & 0 deletions plugins/optimization-detective/class-od-url-metric.php
Expand Up @@ -24,6 +24,7 @@
* boundingClientRect: RectData,
* }
* @phpstan-type Data array{
* url: string,
* timestamp: int,
* viewport: RectData,
* elements: ElementData[]
Expand Down Expand Up @@ -83,6 +84,13 @@ public static function get_json_schema(): array {
'title' => 'od-url-metric',
'type' => 'object',
'properties' => array(
'url' => array(
'type' => 'string',
'description' => __( 'The URL for which the metric was obtained.', 'optimization-detective' ),
'required' => true,
'format' => 'uri',
'pattern' => '^https?://',
),
'viewport' => array(
'description' => __( 'Viewport dimensions', 'optimization-detective' ),
'type' => 'object',
Expand Down Expand Up @@ -145,6 +153,15 @@ public static function get_json_schema(): array {
);
}

/**
* Gets URL.
*
* @return string URL.
*/
public function get_url(): string {
return $this->data['url'];
}

/**
* Gets viewport data.
*
Expand Down
4 changes: 3 additions & 1 deletion plugins/optimization-detective/detection.php
Expand Up @@ -38,14 +38,16 @@ function od_get_detection_script( string $slug, OD_URL_Metrics_Group_Collection
$web_vitals_lib_data = require __DIR__ . '/detection/web-vitals.asset.php';
$web_vitals_lib_src = add_query_arg( 'ver', $web_vitals_lib_data['version'], plugin_dir_url( __FILE__ ) . '/detection/web-vitals.js' );

$current_url = od_get_current_url();
$detect_args = array(
'serveTime' => microtime( true ) * 1000, // In milliseconds for comparison with `Date.now()` in JavaScript.
'detectionTimeWindow' => $detection_time_window,
'isDebug' => WP_DEBUG,
'restApiEndpoint' => rest_url( OD_REST_API_NAMESPACE . OD_URL_METRICS_ROUTE ),
'restApiNonce' => wp_create_nonce( 'wp_rest' ),
'currentUrl' => $current_url,
'urlMetricsSlug' => $slug,
'urlMetricsNonce' => od_get_url_metrics_storage_nonce( $slug ),
'urlMetricsNonce' => od_get_url_metrics_storage_nonce( $slug, $current_url ),
'urlMetricsGroupStatuses' => array_map(
static function ( OD_URL_Metrics_Group $group ): array {
return array(
Expand Down
4 changes: 3 additions & 1 deletion plugins/optimization-detective/detection/detect.js
Expand Up @@ -141,6 +141,7 @@ function getCurrentTime() {
* @param {boolean} args.isDebug Whether to show debug messages.
* @param {string} args.restApiEndpoint URL for where to send the detection data.
* @param {string} args.restApiNonce Nonce for writing to the REST API.
* @param {string} args.currentUrl Current URL.
* @param {string} args.urlMetricsSlug Slug for URL metrics.
* @param {string} args.urlMetricsNonce Nonce for URL metrics storage.
* @param {URLMetricsGroupStatus[]} args.urlMetricsGroupStatuses URL metrics group statuses.
Expand All @@ -153,6 +154,7 @@ export default async function detect( {
isDebug,
restApiEndpoint,
restApiNonce,
currentUrl,
urlMetricsSlug,
urlMetricsNonce,
urlMetricsGroupStatuses,
Expand Down Expand Up @@ -314,7 +316,7 @@ export default async function detect( {

/** @type {URLMetrics} */
const urlMetrics = {
url: win.location.href,
url: currentUrl,
slug: urlMetricsSlug,
nonce: urlMetricsNonce,
viewport: {
Expand Down
58 changes: 51 additions & 7 deletions plugins/optimization-detective/storage/data.php
Expand Up @@ -85,9 +85,49 @@ function od_get_normalized_query_vars(): array {
return $normalized_query_vars;
}

/**
* Get the URL for the current request.
*
* This is essentially the REQUEST_URI prefixed by the scheme and host for the home URL.
* This is needed in particular due to subdirectory installs.
*
* @since n.e.x.t
* @access private
*
* @return string Current URL.
*/
function od_get_current_url(): string {
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

$parsed_url = wp_parse_url( home_url() );
if ( ! is_array( $parsed_url ) ) {
$parsed_url = array();
}

if ( empty( $parsed_url['scheme'] ) ) {
$parsed_url['scheme'] = is_ssl() ? 'https' : 'http';
}
if ( ! isset( $parsed_url['host'] ) ) {
// phpcs:ignore WordPress.Security.ValidatedSanitizedInput.InputNotSanitized
$parsed_url['host'] = isset( $_SERVER['HTTP_HOST'] ) ? wp_unslash( $_SERVER['HTTP_HOST'] ) : 'localhost';
}

$current_url = $parsed_url['scheme'] . '://' . $parsed_url['host'];
if ( isset( $parsed_url['port'] ) ) {
$current_url .= ':' . $parsed_url['port'];
}
$current_url .= '/';

if ( isset( $_SERVER['REQUEST_URI'] ) ) {
// phpcs:ignore WordPress.Security.ValidatedSanitizedInput.InputNotSanitized
$current_url .= ltrim( wp_unslash( $_SERVER['REQUEST_URI'] ), '/' );
}
Comment on lines +119 to +122
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thinking about my favorite edge-case of sites within a subdirectory, this works as expected, right? I guess since $_SERVER['REQUEST_URI'] would include the full path, including the part that's potentially part of home_url(), this should be fine. Just want to make sure since the function doesn't otherwise consider a path that may be part of home_url() already.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, REQUEST_URI should include the full path including the subdirectory root path. Otherwise, this function here only uses home_url() to obtain the default scheme, host, and port. The path is not used.

return esc_url_raw( $current_url );
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why does this use esc_url_raw()?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Well, humm, since potentially malicious data is present in REQUEST_URI. Perhaps this is overkill, but it has worked well in the AMP plugin.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I would think this is already handled by $wpdb->prepare() when storing the value. But in any case, doesn't hurt I guess.

}

/**
* Gets slug for URL metrics.
*
* A slug is the hash of the normalized query vars.
*
* @since 0.1.0
* @access private
*
Expand All @@ -110,12 +150,14 @@ function od_get_url_metrics_slug( array $query_vars ): string {
*
* @see wp_create_nonce()
* @see od_verify_url_metrics_storage_nonce()
* @see od_get_url_metrics_slug()
*
* @param string $slug URL metrics slug.
* @param string $slug Slug (hash of normalized query vars).
* @param string $url URL.
* @return string Nonce.
*/
function od_get_url_metrics_storage_nonce( string $slug ): string {
return wp_create_nonce( "store_url_metrics:$slug" );
function od_get_url_metrics_storage_nonce( string $slug, string $url ): string {
return wp_create_nonce( "store_url_metrics:$slug:$url" );
}

/**
Expand All @@ -126,13 +168,15 @@ function od_get_url_metrics_storage_nonce( string $slug ): string {
*
* @see wp_verify_nonce()
* @see od_get_url_metrics_storage_nonce()
* @see od_get_url_metrics_slug()
*
* @param string $nonce URL metrics storage nonce.
* @param string $slug URL metrics slug.
* @param string $nonce Nonce.
* @param string $slug Slug (hash of normalized query vars).
* @param String $url URL.
* @return bool Whether the nonce is valid.
*/
function od_verify_url_metrics_storage_nonce( string $nonce, string $slug ): bool {
return (bool) wp_verify_nonce( $nonce, "store_url_metrics:$slug" );
function od_verify_url_metrics_storage_nonce( string $nonce, string $slug, string $url ): bool {
return (bool) wp_verify_nonce( $nonce, "store_url_metrics:$slug:$url" );
}

/**
Expand Down
23 changes: 12 additions & 11 deletions plugins/optimization-detective/storage/post-type.php
Expand Up @@ -141,25 +141,25 @@ static function ( $url_metric_data ) use ( $trigger_error ) {
}

/**
* Stores URL metric by merging it with the other URL metrics for a given URL.
* Stores URL metric by merging it with the other URL metrics which share the same normalized query vars.
*
* @since 0.1.0
* @access private
*
* @param string $url URL for the URL metrics. This is used purely as metadata.
* @param string $slug URL metrics slug (computed from query vars).
* @param string $slug Slug (hash of normalized query vars).
* @param OD_URL_Metric $new_url_metric New URL metric.
* @return int|WP_Error Post ID or WP_Error otherwise.
*/
function od_store_url_metric( string $url, string $slug, OD_URL_Metric $new_url_metric ) {

// TODO: What about storing a version identifier?
function od_store_url_metric( string $slug, OD_URL_Metric $new_url_metric ) {
$post_data = array(
'post_title' => $url, // TODO: Should we keep this? It can help with debugging.
// The URL is supplied as the post title in order to aid with debugging. Note that an od-url-metrics post stores
// multiple URL Metric instances, each of which also contains the URL for which the metric was captured. The URL
// appearing in the post title is therefore the most recent URL seen for the URL Metrics which have the same
// normalized query vars among them.
'post_title' => $new_url_metric->get_url(),
);

$post = od_get_url_metrics_post( $slug );

if ( $post instanceof WP_Post ) {
$post_data['ID'] = $post->ID;
$post_data['post_name'] = $post->post_name;
Expand Down Expand Up @@ -190,7 +190,7 @@ static function ( OD_URL_Metric $url_metric ): array {
},
$group_collection->get_flattened_url_metrics()
),
JSON_PRETTY_PRINT | JSON_UNESCAPED_SLASHES // TODO: No need for pretty-printing.
JSON_UNESCAPED_SLASHES // No need for escaped slashes since not printed to frontend.
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Completely unrelated question: Why is this relevant? Do frontend slashes have to be escaped? I'm asking since the Speculation Rules plugin JSON output contains backslashes which Domenic suggested to remove in my Speculation Rules post draft code example. I'm confused about whether those should or shouldn't be there. 🤔

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good question. The slashes should always be present when the JSON is inline in HTML, as this prevents malicious strings from breaking out of the containing script. For example, if someone somehow got </script> into the data being serialized to JSON, this would prematurely terminate the script element, whereas if slashes are escaped, then <\/script> would not.

In the context of post_content here where the JSON is never being printed to HTML, there is no need for this escaping.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Er, so that should probably be restored to Speculation Rules 😊

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I see, thanks for explaining. I believe Speculation Rules already does this correctly then, feel free to check. I was referring to the Make Core post draft I've been working on.

);

$has_kses = false !== has_filter( 'content_save_pre', 'wp_filter_post_kses' );
Expand All @@ -201,10 +201,11 @@ static function ( OD_URL_Metric $url_metric ): array {

$post_data['post_type'] = OD_URL_METRICS_POST_TYPE;
$post_data['post_status'] = 'publish';
$slashed_post_data = wp_slash( $post_data );
if ( isset( $post_data['ID'] ) ) {
$result = wp_update_post( wp_slash( $post_data ), true );
$result = wp_update_post( $slashed_post_data, true );
} else {
$result = wp_insert_post( wp_slash( $post_data ), true );
$result = wp_insert_post( $slashed_post_data, true );
}

if ( $has_kses ) {
Expand Down
20 changes: 3 additions & 17 deletions plugins/optimization-detective/storage/rest-api.php
Expand Up @@ -38,34 +38,21 @@
function od_register_endpoint() {

$args = array(
'url' => array(
'type' => 'string',
'description' => __( 'The URL for which the metric was obtained.', 'optimization-detective' ),
'required' => true,
'format' => 'uri',
'validate_callback' => static function ( $url ) {
if ( ! wp_validate_redirect( $url ) ) {
return new WP_Error( 'non_origin_url', __( 'URL for another site provided.', 'optimization-detective' ) );
}
// TODO: This is not validated as corresponding to the slug in any way. True it is not used for anything but metadata.
return true;
},
),
'slug' => array(
'type' => 'string',
'description' => __( 'An MD5 hash of the query args.', 'optimization-detective' ),
'required' => true,
'pattern' => '^[0-9a-f]{32}$',
// This is validated via the nonce validate_callback, as it is provided as input to create the nonce by the server
// which then is verified to match in the REST API request.
// This is further validated via the validate_callback for the nonce argument, as it is provided as input
// with the 'url' argument to create the nonce by the server. which then is verified to match in the REST API request.
),
'nonce' => array(
'type' => 'string',
'description' => __( 'Nonce originally computed by server required to authorize the request.', 'optimization-detective' ),
'required' => true,
'pattern' => '^[0-9a-f]+$',
'validate_callback' => static function ( $nonce, WP_REST_Request $request ) {
if ( ! od_verify_url_metrics_storage_nonce( $nonce, $request->get_param( 'slug' ) ) ) {
if ( ! od_verify_url_metrics_storage_nonce( $nonce, $request->get_param( 'slug' ), $request->get_param( 'url' ) ) ) {
return new WP_Error( 'invalid_nonce', __( 'URL metrics nonce verification failure.', 'optimization-detective' ) );
}
return true;
Expand Down Expand Up @@ -165,7 +152,6 @@ function od_handle_rest_request( WP_REST_Request $request ) {
}

$result = od_store_url_metric(
$request->get_param( 'url' ),
$request->get_param( 'slug' ),
$url_metric
);
Expand Down
Expand Up @@ -22,13 +22,15 @@ public function data_provider(): array {
return array(
'valid_minimal' => array(
'data' => array(
'url' => home_url( '/' ),
'viewport' => $viewport,
'timestamp' => microtime( true ),
'elements' => array(),
),
),
'valid_with_element' => array(
'data' => array(
'url' => home_url( '/' ),
'viewport' => $viewport,
'timestamp' => microtime( true ),
'elements' => array(
Expand All @@ -43,13 +45,15 @@ public function data_provider(): array {
),
'missing_viewport' => array(
'data' => array(
'url' => home_url( '/' ),
'timestamp' => microtime( true ),
'elements' => array(),
),
'error' => 'viewport is a required property of OD_URL_Metric.',
),
'missing_viewport_width' => array(
'data' => array(
'url' => home_url( '/' ),
'viewport' => array( 'height' => 640 ),
'timestamp' => microtime( true ),
'elements' => array(),
Expand All @@ -58,6 +62,7 @@ public function data_provider(): array {
),
'bad_viewport' => array(
'data' => array(
'url' => home_url( '/' ),
'viewport' => array(
'height' => 'tall',
'width' => 'wide',
Expand All @@ -69,20 +74,31 @@ public function data_provider(): array {
),
'missing_timestamp' => array(
'data' => array(
'url' => home_url( '/' ),
'viewport' => $viewport,
'elements' => array(),
),
'error' => 'timestamp is a required property of OD_URL_Metric.',
),
'missing_elements' => array(
'data' => array(
'url' => home_url( '/' ),
'viewport' => $viewport,
'timestamp' => microtime( true ),
),
'error' => 'elements is a required property of OD_URL_Metric.',
),
'missing_url' => array(
'data' => array(
'viewport' => $viewport,
'timestamp' => microtime( true ),
'elements' => array(),
),
'error' => 'url is a required property of OD_URL_Metric.',
),
'bad_elements' => array(
'data' => array(
'url' => home_url( '/' ),
'viewport' => $viewport,
'timestamp' => microtime( true ),
'elements' => array(
Expand Down
Expand Up @@ -561,6 +561,7 @@ public function test_get_flattened_url_metrics() {
*/
private function get_validated_url_metric( int $viewport_width = 480 ): OD_URL_Metric {
$data = array(
'url' => home_url( '/' ),
'viewport' => array(
'width' => $viewport_width,
'height' => 640,
Expand Down
Expand Up @@ -71,6 +71,7 @@ public function data_provider_test_construction(): array {
'url_metrics' => array(
new OD_URL_Metric(
array(
'url' => home_url( '/' ),
'viewport' => array(
'width' => 1,
'height' => 2,
Expand Down Expand Up @@ -192,6 +193,7 @@ public function test_add_url_metric( int $viewport_width, string $exception ) {
$group->add_url_metric(
new OD_URL_Metric(
array(
'url' => home_url( '/' ),
'viewport' => array(
'width' => $viewport_width,
'height' => 1000,
Expand Down