Skip to content

Commit

Permalink
Refactor resource_info to support v2.3 resource names
Browse files Browse the repository at this point in the history
closes #269
  • Loading branch information
allada committed Sep 19, 2023
1 parent 082a85c commit d3d0b64
Show file tree
Hide file tree
Showing 5 changed files with 741 additions and 78 deletions.
4 changes: 2 additions & 2 deletions cas/grpc_service/bytestream_server.rs
Original file line number Diff line number Diff line change
Expand Up @@ -226,7 +226,7 @@ impl ByteStreamServer {

let read_limit =
usize::try_from(read_request.read_limit).err_tip(|| "read_limit has is not convertible to usize")?;
let resource_info = ResourceInfo::new(&read_request.resource_name)?;
let resource_info = ResourceInfo::new(&read_request.resource_name, false)?;
let instance_name = resource_info.instance_name;
let store = self
.stores
Expand Down Expand Up @@ -433,7 +433,7 @@ impl ByteStreamServer {
&self,
query_request: &QueryWriteStatusRequest,
) -> Result<Response<QueryWriteStatusResponse>, Error> {
let mut resource_info = ResourceInfo::new(&query_request.resource_name)?;
let mut resource_info = ResourceInfo::new(&query_request.resource_name, true)?;

let store_clone = self
.stores
Expand Down
19 changes: 3 additions & 16 deletions cas/grpc_service/tests/bytestream_server_test.rs
Original file line number Diff line number Diff line change
Expand Up @@ -493,13 +493,7 @@ pub mod read_tests {
store.update_oneshot(digest, VALUE1.into()).await?;

let read_request = ReadRequest {
resource_name: format!(
"{}/uploads/{}/blobs/{}/{}",
INSTANCE_NAME,
"4dcec57e-1389-4ab5-b188-4a59f22ceb4b", // Randomly generated.
HASH1,
VALUE1.len()
),
resource_name: format!("{}/blobs/{}/{}", INSTANCE_NAME, HASH1, VALUE1.len()),
read_offset: 0,
read_limit: VALUE1.len() as i64,
};
Expand Down Expand Up @@ -539,13 +533,7 @@ pub mod read_tests {
store.update_oneshot(digest, raw_data.clone().into()).await?;

let read_request = ReadRequest {
resource_name: format!(
"{}/uploads/{}/blobs/{}/{}",
INSTANCE_NAME,
"4dcec57e-1389-4ab5-b188-4a59f22ceb4b", // Randomly generated.
HASH1,
raw_data.len()
),
resource_name: format!("{}/blobs/{}/{}", INSTANCE_NAME, HASH1, raw_data.len()),
read_offset: 0,
read_limit: raw_data.len() as i64,
};
Expand All @@ -572,9 +560,8 @@ pub mod read_tests {
let bs_server = make_bytestream_server(store_manager.as_ref()).err_tip(|| "Couldn't make store")?;
let read_request = ReadRequest {
resource_name: format!(
"{}/uploads/{}/blobs/{}/{}",
"{}/blobs/{}/{}",
INSTANCE_NAME,
"4dcec57e-1389-4ab5-b188-4a59f22ceb4b", // Randomly generated.
HASH1,
55, // Dummy value
),
Expand Down
220 changes: 181 additions & 39 deletions util/resource_info.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,61 +12,203 @@
// See the License for the specific language governing permissions and
// limitations under the License.

use error::{error_if, Error, ResultExt};
use error::{error_if, make_input_err, Error, ResultExt};

const ERROR_MSG: &str = concat!(
"Expected resource_name to be of pattern ",
"'{?instance_name/}(?uploads/{uuid}/)blobs/{?/digest_function}{/hash}/{size}{?/optional_metadata}' or ",
"'{?instance_name/}(?uploads/{uuid}/)compressed-blobs{?/compressor}{?/digest_function}{/hash}/{size}{?/optional_metadata}'",
);
const COMPRESSORS: [&str; 4] = ["identity", "zstd", "deflate", "brotli"];
const DIGEST_FUNCTIONS: [&str; 9] = [
"sha256",
"sha1",
"md5",
"vso",
"sha384",
"sha512",
"murmur3",
"sha256tree",
"blake3",
];

// Named struct to make the code easier to read when adding the slash size.
const SLASH_SIZE: usize = 1;

// Rules are as follows:
//
// "instance_name" may contain slashes and may contain or equal "uploads", "compressed-blobs" and "blobs".
// if is_upload is false:
// {instance_name}/ compressed-blobs/{compressor}/{digest_function/}{hash}/{size}{/optional_metadata}
// {instance_name}/ compressed-blobs/{compressor}/{digest_function/}{hash}/{size}
// {instance_name}/ blobs/ {digest_function/}{hash}/{size}{/optional_metadata}
// {instance_name}/ blobs/ {digest_function/}{hash}/{size}
// compressed-blobs/{compressor}/{digest_function/}{hash}/{size}{/optional_metadata}
// compressed-blobs/{compressor}/{digest_function/}{hash}/{size}
// blobs/ {digest_function/}{hash}/{size}{/optional_metadata}
// blobs/ {digest_function/}{hash}/{size}
// {instance_name}/ compressed-blobs/{compressor}/ {hash}/{size}{/optional_metadata}
// {instance_name}/ compressed-blobs/{compressor}/ {hash}/{size}
// {instance_name}/ blobs/ {hash}/{size}{/optional_metadata}
// {instance_name}/ blobs/ {hash}/{size}
// compressed-blobs/{compressor}/ {hash}/{size}{/optional_metadata}
// compressed-blobs/{compressor}/ {hash}/{size}
//
// blobs/ {hash}/{size}{/optional_metadata}
// blobs/ {hash}/{size}
//
// if is_upload is true:
// {instance_name}/uploads/{uuid}/compressed-blobs/{compressor}/{digest_function/}{hash}/{size}{/optional_metadata}
// {instance_name}/uploads/{uuid}/compressed-blobs/{compressor}/{digest_function/}{hash}/{size}
// {instance_name}/uploads/{uuid}/blobs/ {digest_function/}{hash}/{size}{/optional_metadata}
// {instance_name}/uploads/{uuid}/blobs/ {digest_function/}{hash}/{size}
// uploads/{uuid}/compressed-blobs/{compressor}/{digest_function/}{hash}/{size}{/optional_metadata}
// uploads/{uuid}/compressed-blobs/{compressor}/{digest_function/}{hash}/{size}
// uploads/{uuid}/blobs/ {digest_function/}{hash}/{size}{/optional_metadata}
// uploads/{uuid}/blobs/ {digest_function/}{hash}/{size}
// {instance_name}/uploads/{uuid}/compressed-blobs/{compressor}/ {hash}/{size}{/optional_metadata}
// {instance_name}/uploads/{uuid}/compressed-blobs/{compressor}/ {hash}/{size}
// {instance_name}/uploads/{uuid}/blobs/ {hash}/{size}{/optional_metadata}
// {instance_name}/uploads/{uuid}/blobs/ {hash}/{size}
// uploads/{uuid}/compressed-blobs/{compressor}/ {hash}/{size}{/optional_metadata}
// uploads/{uuid}/compressed-blobs/{compressor}/ {hash}/{size}
// uploads/{uuid}/blobs/ {hash}/{size}{/optional_metadata}
// uploads/{uuid}/blobs/ {hash}/{size}
//

// Useful utility struct for converting bazel's (uri-like path) into it's parts.
#[derive(Debug, Default)]
pub struct ResourceInfo<'a> {
pub instance_name: &'a str,
pub uuid: Option<&'a str>,
pub compressor: Option<&'a str>,
pub digest_function: Option<&'a str>,
pub hash: &'a str,
pub expected_size: usize,
pub optional_metadata: Option<&'a str>,
}

impl<'a> ResourceInfo<'a> {
pub fn new(resource_name: &'a str) -> Result<ResourceInfo<'a>, Error> {
let mut parts = resource_name.splitn(6, '/').peekable();
const ERROR_MSG: &str = concat!(
"Expected resource_name to be of pattern ",
"'{?instance_name/}uploads/{uuid}/blobs/{hash}/{size}' or ",
"'{?instance_name/}blobs/{hash}/{size}'",
pub fn new(resource_name: &'a str, is_upload: bool) -> Result<ResourceInfo<'a>, Error> {
// The most amount of slashes there can be to get to "(compressed-)blobs" section is 7.
let mut rparts = resource_name.rsplitn(7, '/');
let mut output = ResourceInfo::default();
let mut end_bytes_processed = 0;
let end_state = recursive_parse(&mut rparts, &mut output, State::Unknown, &mut end_bytes_processed)?;
error_if!(
end_state != State::OptionalMetadata,
"Expected the final state to be OptionalMetadata. Got: {end_state:?}"
);

let mut instance_name = "";
let maybe_instance_name = parts.next().err_tip(|| ERROR_MSG)?;
let peek_next = parts.peek().err_tip(|| ERROR_MSG)?;
let blobs_or_uploads = if *peek_next == "uploads" || *peek_next == "blobs" {
instance_name = maybe_instance_name;
// We do have an instance_name, so set blobs_or_uploads to the peek_next and consume it.
parts.next().err_tip(|| ERROR_MSG)?
// Slice off the processed parts of `resource_name`.
let beginning_part = if end_bytes_processed == resource_name.len() {
""
} else {
// We don't have an instance_name, so use our first item as blobs_or_uploads.
maybe_instance_name
&resource_name[..resource_name.len() - end_bytes_processed - SLASH_SIZE]
};
let mut uuid = None;
if blobs_or_uploads == "uploads" {
uuid = Some(parts.next().err_tip(|| ERROR_MSG)?);
let blobs = parts.next().err_tip(|| ERROR_MSG)?;
error_if!(
blobs != "blobs",
"Expected resource_name to have 'blobs' here. Got: {}",
blobs_or_uploads
);
if !is_upload {
output.instance_name = beginning_part;
return Ok(output);
}

let hash = parts.next().err_tip(|| ERROR_MSG)?;
let raw_digest_size = parts.next().err_tip(|| ERROR_MSG)?;
let expected_size = raw_digest_size.parse::<usize>().err_tip(|| {
format!(
"Digest size_bytes was not convertible to usize. Got: {}",
raw_digest_size
)
})?;
// If it's an upload, at this point it will have be:
// `{?instance_name}/uploads/{uuid}`.
// Remember, `instance_name` can contain slashes and/or special names
// like "blobs" or "uploads".
let mut parts = beginning_part.rsplitn(3, '/');
output.uuid = Some(parts.next().err_tip(|| ERROR_MSG)?);
{
// Sanity check that our next item is "uploads".
let uploads = parts.next().err_tip(|| ERROR_MSG)?;
error_if!(uploads != "uploads", "Expected part to be 'uploads'. Got: {uploads}");
}

Ok(ResourceInfo {
instance_name,
uuid,
hash,
expected_size,
})
// `instance_name` is optional.
if let Some(instance_name) = parts.next() {
output.instance_name = instance_name;
}
Ok(output)
}
}

#[derive(Debug, PartialEq)]
enum State {
Unknown,
Compressor,
DigestFunction,
Hash,
Size,
OptionalMetadata,
}

// Iterate backwards looking for "(compressed-)blobs", once found, move foward
// populating the output struct. This recursive function utilises the stack to
// temporarly hold the reference to the previous item reducing the need for
// a heap allocation.
fn recursive_parse<'a>(
rparts: &mut impl Iterator<Item = &'a str>,
output: &mut ResourceInfo<'a>,
mut state: State,
bytes_processed: &mut usize,
) -> Result<State, Error> {
let part = rparts.next().err_tip(|| ERROR_MSG)?;
if state == State::Unknown {
if part == "blobs" {
*bytes_processed = part.len() + SLASH_SIZE;
return Ok(State::DigestFunction);
}
if part == "compressed-blobs" {
*bytes_processed = part.len() + SLASH_SIZE;
return Ok(State::Compressor);
}
state = recursive_parse(rparts, output, state, bytes_processed)?;
}

loop {
match state {
State::Unknown => {
return Err(make_input_err!(
"Unknown state should never be reached in ResourceInfo::new"
))
}
State::Compressor => {
state = State::DigestFunction;
if COMPRESSORS.contains(&part) {
output.compressor = Some(part);
*bytes_processed += part.len() + SLASH_SIZE;
return Ok(state);
}
continue;
}
State::DigestFunction => {
state = State::Hash;
if DIGEST_FUNCTIONS.contains(&part) {
output.digest_function = Some(part);
*bytes_processed += part.len() + SLASH_SIZE;
return Ok(state);
}
continue;
}
State::Hash => {
output.hash = part;
*bytes_processed += part.len() + SLASH_SIZE;
// TODO(allada) Set the digest_function if it is not set based on the hash size.
return Ok(State::Size);
}
State::Size => {
output.expected_size = part
.parse::<usize>()
.map_err(|_| make_input_err!("Digest size_bytes was not convertible to usize. Got: {}", part))?;
*bytes_processed += part.len(); // Special case {size}, so it does not count one slash.
return Ok(State::OptionalMetadata);
}
State::OptionalMetadata => {
output.optional_metadata = Some(part);
*bytes_processed += part.len() + SLASH_SIZE;
// If we get here, we are done parsing backwards and have successfully parsed
// everything beyond the "(compressed-)blobs" section.
return Ok(State::OptionalMetadata);
}
}
}
}
Loading

0 comments on commit d3d0b64

Please sign in to comment.