Skip to content

Commit

Permalink
Removes needless overoptimization of strings for DigestInfo
Browse files Browse the repository at this point in the history
We make so many copies of DigestInfo and it's not trivial-copyable
because of an optimization that adds around 40 bytes of data to each
struct which tries to preserve the string representation of the
digest when possible. This optimization seems to be pointless and
likely ends up costing more than it saves.

On local testing, I found that the overhead of the LazyInit was
slightly more than a single invocation of `.str()`. Since we usually
only call `.str()` once per DigestInfo, it does not seem like a good
idea to assume we would need the hex string more than once per struct.

There is also a significant chance this will give secondary
optimizations because without this optimization the struct is
trivially-copyable. This enables the optimizer to do even more clever
tricks for inlining since it knows it does not need to touch the heap.
  • Loading branch information
allada committed Jul 12, 2023
1 parent 264849b commit 4062d1d
Show file tree
Hide file tree
Showing 7 changed files with 18 additions and 124 deletions.
36 changes: 1 addition & 35 deletions Cargo.Bazel.lock
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
{
"checksum": "ec1aaea79db6c8962a1718248f7b28b82eb70a7dee255529ea671dfed22e58a2",
"checksum": "4f921e8c24fd74362f4fa660db203c02dfdc564ae44a23e8d4526d28aab7aafb",
"crates": {
"addr2line 0.20.0": {
"name": "addr2line",
Expand Down Expand Up @@ -2500,10 +2500,6 @@
"id": "json5 0.4.1",
"target": "json5"
},
{
"id": "lazy-init 0.5.1",
"target": "lazy_init"
},
{
"id": "lazy_static 1.4.0",
"target": "lazy_static"
Expand Down Expand Up @@ -5246,36 +5242,6 @@
},
"license": "ISC"
},
"lazy-init 0.5.1": {
"name": "lazy-init",
"version": "0.5.1",
"repository": {
"Http": {
"url": "https://crates.io/api/v1/crates/lazy-init/0.5.1/download",
"sha256": "9f40963626ac12dcaf92afc15e4c3db624858c92fd9f8ba2125eaada3ac2706f"
}
},
"targets": [
{
"Library": {
"crate_name": "lazy_init",
"crate_root": "src/lib.rs",
"srcs": [
"**/*.rs"
]
}
}
],
"library_target_name": "lazy_init",
"common_attrs": {
"compile_data_glob": [
"**"
],
"edition": "2015",
"version": "0.5.1"
},
"license": "Apache-2.0/MIT"
},
"lazy_static 1.4.0": {
"name": "lazy_static",
"version": "1.4.0",
Expand Down
7 changes: 0 additions & 7 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 0 additions & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,6 @@ tokio = { version = "1.29.1", features = ["macros", "io-util", "fs", "rt-multi-t
tokio-stream = { version = "0.1.14", features = ["fs", "sync"] }
tokio-util = { version = "0.7.8", features = ["io", "io-util", "codec"] }
tonic = { version = "0.9.2", features = ["gzip"] }
lazy-init = "0.5.1"
log = "0.4.19"
env_logger = "0.10.0"
serde = "1.0.167"
Expand Down
15 changes: 5 additions & 10 deletions cas/store/dedup_store.rs
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ use sha2::{Digest, Sha256};
use tokio_util::codec::FramedRead;

use buf_channel::{DropCloserReadHalf, DropCloserWriteHalf, StreamReader};
use common::{log, DigestInfo, JoinHandleDropGuard, SerializableDigestInfo};
use common::{log, DigestInfo, JoinHandleDropGuard};
use config;
use error::{make_err, Code, Error, ResultExt};
use fastcdc::FastCDC;
Expand All @@ -44,7 +44,7 @@ const DEFAULT_MAX_CONCURRENT_FETCH_PER_GET: usize = 10;

#[derive(Serialize, Deserialize, PartialEq, Debug, Default, Clone)]
pub struct DedupIndex {
pub entries: Vec<SerializableDigestInfo>,
pub entries: Vec<DigestInfo>,
}

pub struct DedupStore {
Expand Down Expand Up @@ -138,7 +138,7 @@ impl StoreTrait for DedupStore {
.map(move |index_entry| {
let content_store = self.content_store.clone();
async move {
let digest = DigestInfo::new(index_entry.hash, index_entry.size_bytes as i64);
let digest = DigestInfo::new(index_entry.packed_hash, index_entry.size_bytes as i64);
Pin::new(content_store.as_ref())
.has(digest)
.await
Expand Down Expand Up @@ -184,10 +184,7 @@ impl StoreTrait for DedupStore {
let hash = Sha256::digest(&frame[..]);

let frame_len = frame.len();
let index_entry = SerializableDigestInfo {
hash: hash.into(),
size_bytes: frame_len as u64,
};
let index_entry = DigestInfo::new(hash.into(), frame_len as i64);

let content_store_pin = Pin::new(content_store.as_ref());
let digest = DigestInfo::new(hash.clone().into(), frame.len() as i64);
Expand Down Expand Up @@ -287,10 +284,8 @@ impl StoreTrait for DedupStore {
let content_store = self.content_store.clone();

async move {
let digest = DigestInfo::new(index_entry.hash, index_entry.size_bytes as i64);

let data = Pin::new(content_store.as_ref())
.get_part_unchunked(digest, 0, None, Some(index_entry.size_bytes as usize))
.get_part_unchunked(index_entry, 0, None, Some(index_entry.size_bytes as usize))
.await
.err_tip(|| "Failed to get_part in content_store in dedup_store")?;

Expand Down
1 change: 0 additions & 1 deletion util/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,6 @@ rust_library(
"//proto",
"@crate_index//:bytes",
"@crate_index//:hex",
"@crate_index//:lazy-init",
"@crate_index//:log",
"@crate_index//:prost",
"@crate_index//:serde",
Expand Down
69 changes: 9 additions & 60 deletions util/common.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,14 +16,13 @@ use std::collections::HashMap;
use std::convert::{TryFrom, TryInto};
use std::fmt;
use std::future::Future;
use std::hash::{Hash, Hasher};
use std::hash::Hash;
use std::pin::Pin;
use std::task::{Context, Poll};

use bytes::{BufMut, Bytes, BytesMut};
pub use fs;
use hex::FromHex;
use lazy_init::LazyTransform;
pub use log;
use prost::Message;
use proto::build::bazel::remote::execution::v2::Digest;
Expand All @@ -32,24 +31,21 @@ use tokio::task::{JoinError, JoinHandle};

use error::{make_input_err, Error, ResultExt};

#[derive(Serialize, Deserialize, Default, Clone, Copy, Eq, PartialEq, Hash)]
#[repr(C)]
pub struct DigestInfo {
/// Possibly the size of the digest in bytes. This should only be trusted
/// if `truest_size` is true.
pub size_bytes: i64,

/// Raw hash in packed form.
pub packed_hash: [u8; 32],

/// Cached string representation of the `packed_hash`.
str_hash: LazyTransform<Option<String>, String>,
/// Possibly the size of the digest in bytes.
pub size_bytes: i64,
}

impl DigestInfo {
pub fn new(packed_hash: [u8; 32], size_bytes: i64) -> Self {
DigestInfo {
size_bytes,
packed_hash,
str_hash: LazyTransform::new(None),
}
}

Expand All @@ -64,13 +60,11 @@ impl DigestInfo {
Ok(DigestInfo {
size_bytes,
packed_hash,
str_hash: LazyTransform::new(None),
})
}

pub fn str(&self) -> &str {
self.str_hash
.get_or_create(|v| v.unwrap_or_else(|| hex::encode(self.packed_hash)))
pub fn str(&self) -> String {
hex::encode(self.packed_hash)
}

pub fn empty_digest() -> DigestInfo {
Expand All @@ -81,7 +75,6 @@ impl DigestInfo {
0xe3, 0xb0, 0xc4, 0x42, 0x98, 0xfc, 0x1c, 0x14, 0x9a, 0xfb, 0xf4, 0xc8, 0x99, 0x6f, 0xb9, 0x24, 0x27,
0xae, 0x41, 0xe4, 0x64, 0x9b, 0x93, 0x4c, 0xa4, 0x95, 0x99, 0x1b, 0x78, 0x52, 0xb8, 0x55,
],
str_hash: LazyTransform::new(None),
}
}
}
Expand All @@ -95,31 +88,6 @@ impl fmt::Debug for DigestInfo {
}
}

impl PartialEq for DigestInfo {
fn eq(&self, other: &Self) -> bool {
self.size_bytes == other.size_bytes && self.packed_hash == other.packed_hash
}
}

impl Eq for DigestInfo {}

impl Hash for DigestInfo {
fn hash<H: Hasher>(&self, state: &mut H) {
self.size_bytes.hash(state);
self.packed_hash.hash(state);
}
}

impl Clone for DigestInfo {
fn clone(&self) -> Self {
DigestInfo {
size_bytes: self.size_bytes,
packed_hash: self.packed_hash,
str_hash: LazyTransform::new(None),
}
}
}

impl TryFrom<Digest> for DigestInfo {
type Error = Error;

Expand All @@ -129,20 +97,14 @@ impl TryFrom<Digest> for DigestInfo {
Ok(DigestInfo {
size_bytes: digest.size_bytes,
packed_hash,
str_hash: LazyTransform::new(Some(digest.hash)),
})
}
}

impl From<DigestInfo> for Digest {
fn from(val: DigestInfo) -> Self {
let packed_hash = val.packed_hash;
let hash = val
.str_hash
.into_inner()
.unwrap_or_else(|v| v.unwrap_or_else(|| hex::encode(packed_hash)));
Digest {
hash,
hash: val.str(),
size_bytes: val.size_bytes,
}
}
Expand All @@ -151,25 +113,12 @@ impl From<DigestInfo> for Digest {
impl From<&DigestInfo> for Digest {
fn from(val: &DigestInfo) -> Self {
Digest {
hash: val.str().to_string(),
hash: val.str(),
size_bytes: val.size_bytes,
}
}
}

impl From<SerializableDigestInfo> for DigestInfo {
fn from(val: SerializableDigestInfo) -> Self {
DigestInfo::new(val.hash, val.size_bytes as i64)
}
}

#[derive(Serialize, Deserialize, PartialEq, Debug, Default, Clone)]
#[repr(C)]
pub struct SerializableDigestInfo {
pub hash: [u8; 32],
pub size_bytes: u64,
}

/// Simple wrapper that will abort a future that is running in another spawn in the
/// event that this handle gets dropped.
pub struct JoinHandleDropGuard<T> {
Expand Down
13 changes: 3 additions & 10 deletions util/evicting_map.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,12 +22,12 @@ use async_trait::async_trait;
use lru::LruCache;
use serde::{Deserialize, Serialize};

use common::{log, DigestInfo, SerializableDigestInfo};
use common::{log, DigestInfo};
use config::stores::EvictionPolicy;

#[derive(Serialize, Deserialize, PartialEq, Debug, Clone)]
pub struct SerializedLRU {
pub data: Vec<(SerializableDigestInfo, i32)>,
pub data: Vec<(DigestInfo, i32)>,
pub anchor_time: u64,
}

Expand Down Expand Up @@ -150,13 +150,7 @@ where
anchor_time: self.anchor_time.unix_timestamp(),
};
for (digest, eviction_item) in state.lru.iter() {
let serialized_digest = SerializableDigestInfo {
hash: digest.packed_hash,
size_bytes: digest.size_bytes as u64,
};
serialized_lru
.data
.push((serialized_digest, eviction_item.seconds_since_anchor));
serialized_lru.data.push((*digest, eviction_item.seconds_since_anchor));
}
serialized_lru
}
Expand All @@ -166,7 +160,6 @@ where
self.anchor_time = I::from_secs(seiralized_lru.anchor_time);
state.lru.clear();
for (digest, seconds_since_anchor) in seiralized_lru.data {
let digest: DigestInfo = digest.into();
let entry = entry_builder(&digest);
state.lru.put(
digest,
Expand Down

0 comments on commit 4062d1d

Please sign in to comment.