Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Process Link header for cache requirements. #43

Merged
merged 4 commits into from Sep 3, 2021
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
150 changes: 138 additions & 12 deletions sxg_rs/src/headers.rs
Expand Up @@ -16,6 +16,8 @@ use crate::http_parser::{
parse_accept_header,
parse_cache_control_header,
parse_content_type_header,
parse_link_header,
link::Link,
media_type::MediaType,
};
use std::collections::{BTreeSet, HashMap, HashSet};
Expand All @@ -24,6 +26,7 @@ use crate::http::HeaderFields;
use serde::Deserialize;
use std::cmp::min;
use std::time::Duration;
use url::Url;

#[derive(Debug)]
pub struct Headers(HashMap<String, String>);
Expand Down Expand Up @@ -124,34 +127,90 @@ impl Headers {
}
Ok(())
}
pub fn get_signed_headers_bytes(&self, status_code: u16, mice_digest: &[u8]) -> Vec<u8> {
use crate::cbor::DataItem;
// Filters the link header to comply with
// https://github.com/google/webpackager/blob/main/docs/cache_requirements.md.
fn process_link_header(value: &str, fallback_url: &Url) -> String {
static ALLOWED_PARAM: Lazy<HashSet<&'static str>> = Lazy::new(|| {
twifkak marked this conversation as resolved.
Show resolved Hide resolved
vec!["as", "header-integrity", "media", "rel", "imagesrcset", "imagesizes", "crossorigin"].into_iter().collect()});
static ALLOWED_REL: Lazy<HashSet<&'static str>> = Lazy::new(|| {
vec!["preload", "allowed-alt-sxg"].into_iter().collect()});
static ALLOWED_CROSSORIGIN: Lazy<HashSet<&'static str>> = Lazy::new(|| {
vec!["", "anonymous"].into_iter().collect()});
match parse_link_header(value) {
Ok(links) => {
let mut count = 0;
links.into_iter().filter_map(|link| {
let uri: String = fallback_url.join(&link.uri).ok()?.into();
let params_valid = link.params.iter().all(|(k, v)|
ALLOWED_PARAM.contains(k) &&
match *k {
"rel" => matches!(v, Some(v) if ALLOWED_REL.contains(v.as_str())),
"crossorigin" => matches!(v, Some(v) if ALLOWED_CROSSORIGIN.contains(v.as_str())),
_ => true,
}
);
if params_valid {
if link.params.iter().any(|(k,v)| *k == "rel" && matches!(v, Some(v) if v == "preload")) {
if count >= 20 {
return None
}
count += 1;
}
Some(Link{uri: &uri, ..link}.serialize())
} else {
None
}
}).collect::<Vec<String>>().join(",")
},
Err(_) => "".into(),
}
}
// Returns the signed headers via the serializer callback instead of return
// value, because it contains a mix of &str and String. This makes it easy
// to test the intermediate Vec<(&str, &str)> without sacrificing
// performance by copying it into a Vec<(String, String)>.
fn get_signed_headers<O, F>(&self, fallback_url: &Url, status_code: u16, mice_digest: &[u8], serializer: F) -> O
where F: Fn(Vec<(&str, &str)>) -> O {
let connection = self.connection_headers();
let mut fields: Vec<(&str, &str)> = vec![];
let html = self.0.get("content-type").map_or(false, |t|
matches!(parse_content_type_header(t),
Ok(MediaType {primary_type, sub_type, ..})
if primary_type.eq_ignore_ascii_case("text") && sub_type.eq_ignore_ascii_case("html")));
let link = self.0.get("link").map_or("".into(), |value| Self::process_link_header(value, fallback_url));
if !link.is_empty() {
fields.push(("link", &link));
}
for (k, v) in self.0.iter() {
if STRIP_RESPONSE_HEADERS.contains(k.as_str()) || DONT_SIGN_RESPONSE_HEADERS.contains(k.as_str()) || connection.contains(k) {
continue;
}
if !html && (STRIP_SUBRESOURCE_RESPONSE_HEADERS.contains(k.as_str()) || crate::id_headers::ID_HEADERS.contains(k.as_str())) {
continue;
}
if k == "link" {
// Handled above.
continue;
}
fields.push((k, v));
}
let status_code = status_code.to_string();
let digest = format!("mi-sha256-03={}", ::base64::encode(&mice_digest));
fields.push((":status", &status_code));
fields.push(("content-encoding", "mi-sha256-03"));
fields.push(("digest", &digest));
let cbor_data = DataItem::Map(
fields.iter().map(|(key, value)| {
(DataItem::ByteString(key.as_bytes()), DataItem::ByteString(value.as_bytes()))
}).collect()
);
cbor_data.serialize()
serializer(fields)
}
pub fn get_signed_headers_bytes(&self, fallback_url: &Url, status_code: u16, mice_digest: &[u8]) -> Vec<u8> {
self.get_signed_headers(fallback_url, status_code, mice_digest, |fields| {
use crate::cbor::DataItem;
let cbor_data = DataItem::Map(
fields.iter().map(|(key, value)| {
(DataItem::ByteString(key.as_bytes()), DataItem::ByteString(value.as_bytes()))
}).collect()
);
cbor_data.serialize()
})
}
// Connection-specific headers per
// https://datatracker.ietf.org/doc/html/rfc7230#section-6.1.
Expand Down Expand Up @@ -425,12 +484,79 @@ mod tests {
assert_eq!(headers(vec![("cache-control", "max=, max-age=3600")]).signature_duration().unwrap(), SEVEN_DAYS);
}

// === get_signed_headers_bytes ===
// === process_link_header ===
#[test]
fn process_link_header() {
use std::iter::repeat;
let url = Url::parse("https://foo.com").unwrap();
assert_eq!(Headers::process_link_header(r#"<https://foo.com/> ; rel = "preload""#, &url),
"<https://foo.com/>;rel=preload");
{
let link = "<https://foo.com/>;rel=preload";
assert_eq!(Headers::process_link_header(&repeat(link).take(21).collect::<Vec<&str>>().join(","), &url),
repeat(link).take(20).collect::<Vec<&str>>().join(","));
}
{
let link = r#"<https://foo.com/>;rel=preload,<https://foo.com/>;rel=allowed-alt-sxg;header-integrity="sha256-OcpYAC5zFQtAXUURzXkMDDxMbxuEeWVjdRCDcLcBhBY=""#;
assert_eq!(Headers::process_link_header(&repeat(link).take(21).collect::<Vec<&str>>().join(","), &url),
repeat(link).take(20).collect::<Vec<&str>>().join(",") + r#",<https://foo.com/>;rel=allowed-alt-sxg;header-integrity="sha256-OcpYAC5zFQtAXUURzXkMDDxMbxuEeWVjdRCDcLcBhBY=""#);
}
assert_eq!(Headers::process_link_header("</foo>;rel=preload", &url),
"<https://foo.com/foo>;rel=preload");
assert_eq!(Headers::process_link_header("<../quux>;rel=preload", &url.join("/bar/baz/").unwrap()),
"<https://foo.com/bar/quux>;rel=preload");
assert_eq!(Headers::process_link_header("<https://foo.com/>;rel=prefetch", &url),
"");
assert_eq!(Headers::process_link_header("<https://foo.com/>;other", &url),
"");
assert_eq!(Headers::process_link_header("<https://foo.com/>;rel=preload,<https://foo.com/>;rel=prefetch", &url),
"<https://foo.com/>;rel=preload");
assert_eq!(Headers::process_link_header(r#"<img.jpg>;rel=preload;as=image;imagesizes=800px;imagesrcset="img.jpg 800w""#, &url),
r#"<https://foo.com/img.jpg>;rel=preload;as=image;imagesizes=800px;imagesrcset="img.jpg 800w""#);
}

// === get_signed_headers ===
#[test]
fn strip_id_headers() {
assert_eq!(headers(vec![("content-type", "image/jpeg"), ("x-request-id", "abcdef123")]).get_signed_headers_bytes(200, &[]),
let url = Url::parse("https://foo.com").unwrap();
assert_eq!(headers(vec![("content-type", "image/jpeg"), ("x-request-id", "abcdef123")]).get_signed_headers::<HashMap<String, String>, _>(&url, 200, &[], header_fields),
header_fields::<HashMap<String, String>>(vec![
("content-type", "image/jpeg"),
// x-request-id is missing
(":status", "200"),
("content-encoding", "mi-sha256-03"),
("digest", "mi-sha256-03=")]));
assert_eq!(headers(vec![("content-type", "text/html;charset=utf-8"), ("x-request-id", "abcdef123")]).get_signed_headers::<HashMap<String, String>, _>(&url, 200, &[], header_fields),
header_fields::<HashMap<String, String>>(vec![
("content-type", "text/html;charset=utf-8"),
("x-request-id", "abcdef123"),
(":status", "200"),
("content-encoding", "mi-sha256-03"),
("digest", "mi-sha256-03=")]));
}
#[test]
fn includes_link_if_valid() {
let url = Url::parse("https://foo.com").unwrap();
assert_eq!(headers(vec![("content-type", "text/html"), ("link", "<https://foo.com/>;rel=preload")]).get_signed_headers::<HashMap<String, String>, _>(&url ,200, &[], header_fields),
header_fields::<HashMap<String, String>>(vec![
("content-type", "text/html"),
("link", "<https://foo.com/>;rel=preload"),
(":status", "200"),
("content-encoding", "mi-sha256-03"),
("digest", "mi-sha256-03=")]));
assert_eq!(headers(vec![("content-type", "text/html"), ("link", r#"</foo>;rel=prefetch"#)]).get_signed_headers::<HashMap<String, String>, _>(&url, 200, &[], header_fields),
header_fields::<HashMap<String, String>>(vec![
("content-type", "text/html"),
(":status", "200"),
("content-encoding", "mi-sha256-03"),
("digest", "mi-sha256-03=")]));
}

// === get_signed_headers_bytes ===
#[test]
fn get_signed_headers_bytes() {
let url = Url::parse("https://foo.com").unwrap();
assert_eq!(headers(vec![("content-type", "image/jpeg")]).get_signed_headers_bytes(&url, 200, &[]),
b"\xA4FdigestMmi-sha256-03=G:statusC200Lcontent-typeJimage/jpegPcontent-encodingLmi-sha256-03");
assert_eq!(headers(vec![("content-type", "text/html;charset=utf-8"), ("x-request-id", "abcdef123")]).get_signed_headers_bytes(200, &[]),
b"\xA5FdigestMmi-sha256-03=G:statusC200Lcontent-typeWtext/html;charset=utf-8Lx-request-idIabcdef123Pcontent-encodingLmi-sha256-03");
}
}
4 changes: 2 additions & 2 deletions sxg_rs/src/http_parser/base.rs
Expand Up @@ -35,7 +35,7 @@ pub fn token(input: &str) -> IResult<&str, &str> {
take_while1(is_tchar)(input)
}

fn is_tchar(c: char) -> bool {
pub fn is_tchar(c: char) -> bool {
match c {
'!' | '#' | '$' | '%' | '&' | '\'' | '*' => true,
'+' | '-' | '.' | '^' | '_' | '`' | '|' | '~' => true,
Expand Down Expand Up @@ -83,7 +83,7 @@ fn is_qdtext(c: char) -> bool {
}
}

fn is_quoted_pair_payload(c: char) -> bool {
pub fn is_quoted_pair_payload(c: char) -> bool {
match c {
'\t' | ' ' => true,
'\x21'..='\x7E' => true,
Expand Down
140 changes: 140 additions & 0 deletions sxg_rs/src/http_parser/link.rs
@@ -0,0 +1,140 @@
// Copyright 2021 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

use nom::{
IResult,
bytes::complete::take_while,
character::complete::char,
combinator::{map, opt},
multi::many0,
sequence::{delimited, pair, preceded, terminated, tuple},
};
use super::base::{
is_quoted_pair_payload,
is_tchar,
ows,
parameter_value,
token,
};

// Represents an individual link directive i.e. an instance of `link-value`
// from https://datatracker.ietf.org/doc/html/rfc8288#section-3.
// Parameters with alternate character encodings (via RFC8187) are not
// supported.
#[derive(Clone, Debug, PartialEq)]
pub struct Link<'a> {
pub uri: &'a str,
pub params: Vec<(&'a str, Option<String>)>,
}

fn quote(value: &str) -> Option<String> {
if value.chars().all(|c| is_tchar(c)) {
Some(value.into())
} else if value.chars().all(|c| is_quoted_pair_payload(c)) {
Some("\"".to_string() + &value.chars().map(|c: char| {
let mut quoted_pair = if c == '\\' || c == '"' {
"\\"
} else {
""
}.to_string();
quoted_pair.push(c);
quoted_pair
twifkak marked this conversation as resolved.
Show resolved Hide resolved
}).collect::<String>() + "\"")
} else {
None
}
}

impl <'a> Link<'a> {
pub fn serialize(&self) -> String {
"<".to_string() + self.uri + ">" +
&self.params.iter().filter_map(|(k, v)| {
let mut directive = ";".to_string() + k;
if let Some(v) = v {
if let Some(quoted) = quote(v) {
directive.push('=');
directive.push_str(&quoted);
} else {
return None
}
}
twifkak marked this conversation as resolved.
Show resolved Hide resolved
Some(directive)
}).collect::<String>()
}
}

fn uri_ref(input: &str) -> IResult<&str, &str> {
// We don't need to fully parse the URI ref using nom. It would be
// sufficient to scan up until the closing delimiter '>' and then pass the result to the
// URL class for parsing and validation. For defense in depth, we only allow
// the characters specified in
// https://datatracker.ietf.org/doc/html/rfc3986#appendix-A.
take_while(|c: char|
match c {
// unreserved
'A'..='Z' | 'a'..='z' | '0'..='9' | '-' | '.' | '_' | '~' => true,
// gen-delims
':' | '|' | '?' | '#' | '[' | ']' | '@' => true,
// sub-delims
'!' | '$' | '&' | '\'' | '(' | ')' | '*' | '+' | ',' | ';' | '=' => true,
// pct-encoded
'%' => true,
// path
'/' => true,
_ => false,
}
)(input)
}

fn link_param<'a>(input: &'a str) -> IResult<&str, (&'a str, Option<String>)> {
pair(terminated(token, ows),
opt(preceded(pair(char('='), ows), parameter_value)))(input)
}

pub fn link(input: &str) -> IResult<&str, Link> {
map(pair(delimited(char('<'), uri_ref, char('>')),
many0(preceded(tuple((ows, char(';'), ows)), link_param))), |(uri, params)|
Link{uri, params}
)(input)
}

#[cfg(test)]
mod tests {
use super::*;
#[test]
fn parse() {
assert_eq!(link("<>").unwrap(), ("", Link{uri: "", params: vec![]}));
assert_eq!(link("</foo,bar;baz>").unwrap(), ("", Link{uri: "/foo,bar;baz", params: vec![]}));
assert_eq!(link("</foo>;bar;baz=quux").unwrap(),
("", Link{uri: "/foo",
params: vec![("bar", None), ("baz", Some("quux".into()))]}));
assert_eq!(link(r#"</foo>;bar="baz \\\"quux""#).unwrap(),
("", Link{uri: "/foo",
params: vec![("bar", Some(r#"baz \"quux"#.into()))]}));
assert!(matches!(link(r#"</foo>;bar="baz \""#).unwrap_err(), nom::Err::Incomplete(_)));
}
#[test]
fn serialize() {
assert_eq!(Link{uri: "/foo", params: vec![("bar", None)]}.serialize(),
"</foo>;bar");
assert_eq!(Link{uri: "/foo", params: vec![("bar", Some("baz".into()))]}.serialize(),
"</foo>;bar=baz");
assert_eq!(Link{uri: "/foo", params: vec![("bar", Some("baz quux".into()))]}.serialize(),
r#"</foo>;bar="baz quux""#);
assert_eq!(Link{uri: "/foo", params: vec![("bar", Some(r#"baz\"quux"#.into()))]}.serialize(),
r#"</foo>;bar="baz\\\"quux""#);
assert_eq!(Link{uri: "/foo", params: vec![("bar", Some("\x7f".into()))]}.serialize(),
"</foo>");
}
}
13 changes: 9 additions & 4 deletions sxg_rs/src/http_parser/mod.rs
Expand Up @@ -16,6 +16,7 @@ mod accept;
mod base;
mod cache_control;
pub mod media_type;
pub mod link;

use nom::{
IResult,
Expand Down Expand Up @@ -50,14 +51,18 @@ pub fn parse_accept_header(input: &str) -> Result<Vec<accept::Accept>, String> {
parse_vec(input, accept::accept)
}

// Returns the freshness lifetime for a shared cache.
pub fn parse_cache_control_header(input: &str) -> Result<Duration, String> {
let directives = parse_vec(input, cache_control::directive)?;
cache_control::freshness_lifetime(directives).ok_or("Freshness lifetime is implicit".into())
}

pub fn parse_content_type_header(input: &str) -> Result<media_type::MediaType, String> {
complete(media_type::media_type)(input)
.map(|(_, output)| output)
.map_err(format_nom_err)
}

// Returns the freshness lifetime for a shared cache.
pub fn parse_cache_control_header(input: &str) -> Result<Duration, String> {
let directives = parse_vec(input, cache_control::directive)?;
cache_control::freshness_lifetime(directives).ok_or("Freshness lifetime is implicit".into())
pub fn parse_link_header(input: &str) -> Result<Vec<link::Link>, String> {
parse_vec(input, link::link)
}
2 changes: 1 addition & 1 deletion sxg_rs/src/lib.rs
Expand Up @@ -89,7 +89,7 @@ impl SxgWorker {
// 16384 is the max mice record size allowed by SXG spec.
// https://wicg.github.io/webpackage/draft-yasskin-http-origin-signed-responses.html#section-3.5-7.9.1
let (mice_digest, payload_body) = crate::mice::calculate(payload_body, 16384);
let signed_headers = payload_headers.get_signed_headers_bytes(status_code, &mice_digest);
let signed_headers = payload_headers.get_signed_headers_bytes(&fallback_base, status_code, &mice_digest);
let cert_url = cert_base.join(&format!("{}{}", &self.config.cert_url_dirname, &self.cert_basename()))
.map_err(|_| "Failed to parse cert_url_dirname")?;
let validity_url = fallback_base.join(&format!("{}{}", &self.config.validity_url_dirname, "validity"))
Expand Down