Skip to content

Commit

Permalink
Stabilize subresource response headers.
Browse files Browse the repository at this point in the history
When content-type is not text/html, strip common sources of entropy in the the
response headers (e.g. trace IDs), so that the header-integrity is stable. This
enables prefetching of subresources via substitution:
https://github.com/WICG/webpackage/blob/main/explainers/signed-exchange-subresource-substitution.md
  • Loading branch information
twifkak committed Aug 26, 2021
1 parent ed0b339 commit d70689c
Show file tree
Hide file tree
Showing 5 changed files with 1,230 additions and 7 deletions.
2 changes: 2 additions & 0 deletions sxg_rs/Cargo.toml
Expand Up @@ -21,9 +21,11 @@ edition = "2018"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html

[features]
default = ["strip_id_headers"]
js_fetcher = []
js_signer = []
rust_signer = []
strip_id_headers = []

[dependencies]
async-trait = "0.1.50"
Expand Down
62 changes: 56 additions & 6 deletions sxg_rs/src/headers.rs
Expand Up @@ -12,6 +12,12 @@
// See the License for the specific language governing permissions and
// limitations under the License.

use crate::http_parser::{
parse_accept_header,
parse_cache_control_header,
parse_content_type_header,
media_type::MediaType,
};
use std::collections::{BTreeSet, HashMap, HashSet};
use once_cell::sync::Lazy;
use crate::http::HeaderFields;
Expand Down Expand Up @@ -88,7 +94,7 @@ impl Headers {
}
pub fn validate_as_sxg_payload(&self) -> Result<(), String> {
for (k, v) in self.0.iter() {
if STATEFUL_HEADERS.contains(k.as_str()) {
if DONT_SIGN_RESPONSE_HEADERS.contains(k.as_str()) {
return Err(format!(r#"A stateful header "{}" is found."#, k));
}
if CACHE_CONTROL_HEADERS.contains(k.as_str()) {
Expand Down Expand Up @@ -122,8 +128,15 @@ impl Headers {
use crate::cbor::DataItem;
let connection = self.connection_headers();
let mut fields: Vec<(&str, &str)> = vec![];
let html = self.0.get("content-type").map_or(false, |t|
matches!(parse_content_type_header(t),
Ok(MediaType {primary_type, sub_type, ..})
if primary_type.eq_ignore_ascii_case("text") && sub_type.eq_ignore_ascii_case("html")));
for (k, v) in self.0.iter() {
if UNCACHED_HEADERS.contains(k.as_str()) || STATEFUL_HEADERS.contains(k.as_str()) || connection.contains(k) {
if STRIP_RESPONSE_HEADERS.contains(k.as_str()) || DONT_SIGN_RESPONSE_HEADERS.contains(k.as_str()) || connection.contains(k) {
continue;
}
if !html && (STRIP_SUBRESOURCE_RESPONSE_HEADERS.contains(k.as_str()) || crate::id_headers::ID_HEADERS.contains(k.as_str())) {
continue;
}
fields.push((k, v));
Expand Down Expand Up @@ -156,7 +169,7 @@ impl Headers {
pub fn signature_duration(&self) -> Result<Duration, String> {
// Default to 7 days unless a cache-control directive lowers it.
if let Some(value) = self.0.get("cache-control") {
if let Ok(duration) = crate::http_parser::parse_cache_control_header(value) {
if let Ok(duration) = parse_cache_control_header(value) {
// https://github.com/google/webpackager/blob/main/docs/cache_requirements.md
const MIN_DURATION: Duration = Duration::from_secs(120);
return if duration >= MIN_DURATION {
Expand All @@ -171,7 +184,8 @@ impl Headers {
}
}

static UNCACHED_HEADERS: Lazy<HashSet<&'static str>> = Lazy::new(|| {
// These headers are always stripped before signing.
static STRIP_RESPONSE_HEADERS: Lazy<HashSet<&'static str>> = Lazy::new(|| {
vec![
// https://wicg.github.io/webpackage/draft-yasskin-httpbis-origin-signed-exchanges-impl.html#name-uncached-header-fields
"connection",
Expand All @@ -190,10 +204,37 @@ static UNCACHED_HEADERS: Lazy<HashSet<&'static str>> = Lazy::new(|| {
// https://github.com/google/webpackager/blob/master/docs/cache_requirements.md
"variant-key-04",
"variants-04",

].into_iter().collect()
});

// These headers don't affect the semantics of the response inside an
// SXG, but they vary frequently. This prevents the SXG from being used
// as a subresource due to the header-integrity requirement:
// https://github.com/WICG/webpackage/blob/main/explainers/signed-exchange-subresource-substitution.md.
static STRIP_SUBRESOURCE_RESPONSE_HEADERS: Lazy<HashSet<&'static str>> = Lazy::new(|| {
vec![
// These headers are standard, but signed headers don't affect the
// browser caching behavior, because the SXG is only stored in the
// referring document's prefetch cache, per
// https://wicg.github.io/webpackage/loading.html#document-prefetched-signed-exchanges-for-navigation.
// The Date header could theoretically have an impact on SXG loading,
// according to
// https://wicg.github.io/webpackage/loading.html#mp-http-network-or-cache-fetch,
// but I don't see evidence of that in
// https://source.chromium.org/chromium/chromium/src/+/main:content/browser/web_package/.
"age",
"date",
"expires",
"last-modified",
"server-timing",
"via",
"warning",
].into_iter().collect()
});

static STATEFUL_HEADERS: Lazy<HashSet<&'static str>> = Lazy::new(|| {
// These headers prevent signing, unless stripped by the strip_response_headers param.
static DONT_SIGN_RESPONSE_HEADERS: Lazy<HashSet<&'static str>> = Lazy::new(|| {
vec![
// https://wicg.github.io/webpackage/draft-yasskin-http-origin-signed-responses.html#stateful-headers
"authentication-control",
Expand Down Expand Up @@ -229,7 +270,7 @@ static CACHE_CONTROL_HEADERS: Lazy<HashSet<&'static str>> = Lazy::new(|| {
// and either accept_filter != PrefersSxg or its `q` value is 1.
fn validate_accept_header(accept: &str, accept_filter: AcceptFilter) -> Result<(), String> {
let accept = accept.trim();
let accept = crate::http_parser::parse_accept_header(accept)?;
let accept = parse_accept_header(accept)?;
if accept.len() == 0 {
return Err(format!("Accept header is empty"));
}
Expand Down Expand Up @@ -383,4 +424,13 @@ mod tests {
assert_eq!(headers(vec![("cache-control", "doesn't even parse")]).signature_duration().unwrap(), SEVEN_DAYS);
assert_eq!(headers(vec![("cache-control", "max=, max-age=3600")]).signature_duration().unwrap(), SEVEN_DAYS);
}

// === get_signed_headers_bytes ===
#[test]
fn strip_id_headers() {
assert_eq!(headers(vec![("content-type", "image/jpeg"), ("x-request-id", "abcdef123")]).get_signed_headers_bytes(200, &[]),
b"\xA4FdigestMmi-sha256-03=G:statusC200Lcontent-typeJimage/jpegPcontent-encodingLmi-sha256-03");
assert_eq!(headers(vec![("content-type", "text/html;charset=utf-8"), ("x-request-id", "abcdef123")]).get_signed_headers_bytes(200, &[]),
b"\xA5FdigestMmi-sha256-03=G:statusC200Lcontent-typeWtext/html;charset=utf-8Lx-request-idIabcdef123Pcontent-encodingLmi-sha256-03");
}
}
9 changes: 8 additions & 1 deletion sxg_rs/src/http_parser/mod.rs
Expand Up @@ -15,11 +15,12 @@
mod accept;
mod base;
mod cache_control;
mod media_type;
pub mod media_type;

use nom::{
IResult,
character::complete::char as char1,
combinator::complete,
eof,
separated_list0,
separated_pair,
Expand Down Expand Up @@ -49,6 +50,12 @@ pub fn parse_accept_header(input: &str) -> Result<Vec<accept::Accept>, String> {
parse_vec(input, accept::accept)
}

pub fn parse_content_type_header(input: &str) -> Result<media_type::MediaType, String> {
complete(media_type::media_type)(input)
.map(|(_, output)| output)
.map_err(format_nom_err)
}

// Returns the freshness lifetime for a shared cache.
pub fn parse_cache_control_header(input: &str) -> Result<Duration, String> {
let directives = parse_vec(input, cache_control::directive)?;
Expand Down

0 comments on commit d70689c

Please sign in to comment.