From 4ccb6458be7d2a6ed79b1f1d399c3a69c520536d Mon Sep 17 00:00:00 2001 From: Jim Garlick Date: Sat, 23 Apr 2022 09:54:10 -0700 Subject: [PATCH 1/7] testsuite: fix repeated typo Problem: some content backing store tests refer to "rooref" when "rootref" was intended. This appears to have proliferated via cut and paste. Fix typos. --- t/t0012-content-sqlite.t | 2 +- t/t0018-content-files.t | 6 +++--- t/t0024-content-s3.t | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/t/t0012-content-sqlite.t b/t/t0012-content-sqlite.t index cfc44ecbb584..69124d5f6eda 100755 --- a/t/t0012-content-sqlite.t +++ b/t/t0012-content-sqlite.t @@ -145,7 +145,7 @@ test_expect_success HAVE_JQ 'kvs-checkpoint.get foo returned correct timestamp' grep 2.2 timestamp.out ' -test_expect_success HAVE_JQ 'kvs-checkpoint.put updates foo rooref to baz' ' +test_expect_success HAVE_JQ 'kvs-checkpoint.put updates foo rootref to baz' ' kvs_checkpoint_put foo baz ' diff --git a/t/t0018-content-files.t b/t/t0018-content-files.t index 990820b63889..72ecf3ec832c 100755 --- a/t/t0018-content-files.t +++ b/t/t0018-content-files.t @@ -115,7 +115,7 @@ test_expect_success HAVE_JQ 'kvs-checkpoint.get foo returned correct timestamp' grep 2.2 timestamp.out ' -test_expect_success HAVE_JQ 'kvs-checkpoint.put updates foo rooref to baz' ' +test_expect_success HAVE_JQ 'kvs-checkpoint.put updates foo rootref to baz' ' kvs_checkpoint_put foo baz ' @@ -151,7 +151,7 @@ test_expect_success HAVE_JQ 'kvs-checkpoint.get foo still returns rootref baz' ' test_cmp rootref3.exp rootref3.out ' -test_expect_success HAVE_JQ 'kvs-checkpoint.put updates foo rooref with longer rootref' ' +test_expect_success HAVE_JQ 'kvs-checkpoint.put updates foo rootref with longer rootref' ' kvs_checkpoint_put foo abcdefghijklmnopqrstuvwxyz ' @@ -161,7 +161,7 @@ test_expect_success HAVE_JQ 'kvs-checkpoint.get foo returned rootref with longer test_cmp rootref3.exp rootref3.out ' -test_expect_success HAVE_JQ 'kvs-checkpoint.put updates foo rooref to shorter rootref' ' +test_expect_success HAVE_JQ 'kvs-checkpoint.put updates foo rootref to shorter rootref' ' kvs_checkpoint_put foo foobar ' diff --git a/t/t0024-content-s3.t b/t/t0024-content-s3.t index cd5c97b10c8c..0dd38d33871e 100755 --- a/t/t0024-content-s3.t +++ b/t/t0024-content-s3.t @@ -136,7 +136,7 @@ test_expect_success HAVE_JQ 'kvs-checkpoint.get foo returned correct timestamp' grep 2.2 timestamp.out ' -test_expect_success HAVE_JQ 'kvs-checkpoint.put updates foo rooref to baz' ' +test_expect_success HAVE_JQ 'kvs-checkpoint.put updates foo rootref to baz' ' kvs_checkpoint_put foo baz ' From cb917c523d4255754b6d50e205d3eadd7430fd73 Mon Sep 17 00:00:00 2001 From: Jim Garlick Date: Thu, 21 Apr 2022 08:38:22 -0700 Subject: [PATCH 2/7] libutil: blobref_validate_hashtype returns size Problem: the size of the hash digest corresponding to a blobref is not disclosed to users of the blobref class. Have blobref_validate_hashtype() return the digest size on success, instead of zero. Update tests. --- src/common/libutil/blobref.c | 11 ++++++++--- src/common/libutil/blobref.h | 5 +++-- src/common/libutil/test/blobref.c | 4 ++-- 3 files changed, 13 insertions(+), 7 deletions(-) diff --git a/src/common/libutil/blobref.c b/src/common/libutil/blobref.c index e1f706ee938b..8bef5da1bcc7 100644 --- a/src/common/libutil/blobref.c +++ b/src/common/libutil/blobref.c @@ -205,13 +205,18 @@ int blobref_validate (const char *blobref) return -1; } -int blobref_validate_hashtype (const char *name) +ssize_t blobref_validate_hashtype (const char *name) { - if (name == NULL || !lookup_blobhash (name)) + struct blobhash *bh; + + if (name == NULL || !(bh = lookup_blobhash (name))) { + errno = EINVAL; return -1; - return 0; + } + return bh->hashlen; } + /* * vi:tabstop=4 shiftwidth=4 expandtab */ diff --git a/src/common/libutil/blobref.h b/src/common/libutil/blobref.h index b07a7da3fbb9..2394998bbe3c 100644 --- a/src/common/libutil/blobref.h +++ b/src/common/libutil/blobref.h @@ -43,9 +43,10 @@ int blobref_hash (const char *hashtype, int blobref_validate (const char *blobref); /* Check the validity of hash type (by name) + * If valid, the digest size is returned. + * If invalid, -1 is returned with errno set. */ -int blobref_validate_hashtype (const char *name); - +ssize_t blobref_validate_hashtype (const char *name); #endif /* _UTIL_BLOBREF_H */ /* diff --git a/src/common/libutil/test/blobref.c b/src/common/libutil/test/blobref.c index b6cb83e11f51..f2b90817ef9d 100644 --- a/src/common/libutil/test/blobref.c +++ b/src/common/libutil/test/blobref.c @@ -148,9 +148,9 @@ int main(int argc, char** argv) } /* blobref_validate_hashtype */ - ok (blobref_validate_hashtype ("sha1") == 0, + ok (blobref_validate_hashtype ("sha1") == SHA1_DIGEST_SIZE, "blobref_validate_hashtype sha1 is valid"); - ok (blobref_validate_hashtype ("sha256") == 0, + ok (blobref_validate_hashtype ("sha256") == SHA256_BLOCK_SIZE, "blobref_validate_hashtype sha256 is valid"); ok (blobref_validate_hashtype ("nerf") == -1, "blobref_validate_hashtype nerf is invalid"); From 0a607e914b2ed3a8fc14b0ac792e176cd44eda0f Mon Sep 17 00:00:00 2001 From: Jim Garlick Date: Fri, 22 Apr 2022 12:32:08 -0700 Subject: [PATCH 3/7] libutil: add blobref_hash_raw() Problem: now that protocols use hash digests directly, it is inconvenient to have to convert a blobref from blobref_hash() back to a digest. Add blobref_hash_raw() which just computes the digest. Add unit tests. --- src/common/libutil/blobref.c | 17 ++++++++++++++++ src/common/libutil/blobref.h | 8 ++++++++ src/common/libutil/test/blobref.c | 32 +++++++++++++++++++++++++++++++ 3 files changed, 57 insertions(+) diff --git a/src/common/libutil/blobref.c b/src/common/libutil/blobref.c index 8bef5da1bcc7..5a2f955ccd99 100644 --- a/src/common/libutil/blobref.c +++ b/src/common/libutil/blobref.c @@ -182,6 +182,23 @@ int blobref_hash (const char *hashtype, return hashtostr (bh, hash, bh->hashlen, blobref, blobref_len); } +int blobref_hash_raw (const char *hashtype, + const void *data, int len, + void *hash, int hash_len) +{ + struct blobhash *bh; + + if (!hashtype + || !(bh = lookup_blobhash (hashtype)) + || hash_len < bh->hashlen + || !hash) { + errno = EINVAL; + return -1; + } + bh->hashfun (data, len, hash, bh->hashlen); + return bh->hashlen; +} + int blobref_validate (const char *blobref) { struct blobhash *bh; diff --git a/src/common/libutil/blobref.h b/src/common/libutil/blobref.h index 2394998bbe3c..034e4ef21b2a 100644 --- a/src/common/libutil/blobref.h +++ b/src/common/libutil/blobref.h @@ -38,6 +38,14 @@ int blobref_hash (const char *hashtype, const void *data, int len, void *blobref, int blobref_len); +/* Compute hash over data and store it in 'hash'. + * The hash algorithm is selected by 'hashtype', e.g. "sha1". + * Returns hash size on success, -1 on error with errno set. + */ +int blobref_hash_raw (const char *hashtype, + const void *data, int len, + void *hash, int hash_len); + /* Check validity of blobref string. */ int blobref_validate (const char *blobref); diff --git a/src/common/libutil/test/blobref.c b/src/common/libutil/test/blobref.c index f2b90817ef9d..8ac7952765ab 100644 --- a/src/common/libutil/test/blobref.c +++ b/src/common/libutil/test/blobref.c @@ -54,6 +54,20 @@ int main(int argc, char** argv) && errno == EINVAL, "blobref_hash fails EINVAL with invalid ref length"); + errno = 0; + ok (blobref_hash_raw ("nerf", data, sizeof (data), + digest, sizeof (digest)) < 0 + && errno == EINVAL, + "blobref_hash_raw fails EINVAL with unknown hash name"); + errno = 0; + ok (blobref_hash_raw ("sha1", data, sizeof (data), NULL, 0) < 0 + && errno == EINVAL, + "blobref_hash_raw fails EINVAL with NULL hash pointer"); + errno = 0; + ok (blobref_hash_raw ("sha1", data, sizeof (data), digest, 1) < 0 + && errno == EINVAL, + "blobref_hash_raw fails EINVAL with invalid hash length"); + errno = 0; ok (blobref_strtohash (badref[0], digest, sizeof (digest)) < 0 && errno == EINVAL, @@ -105,6 +119,15 @@ int main(int argc, char** argv) "blobref_hash sha1 works"); diag ("%s", ref); + ok (blobref_hash_raw ("sha1", + NULL, 0, + digest, sizeof (digest)) == SHA1_DIGEST_SIZE, + "blobref_hash_raw sha1 handles zero length data"); + ok (blobref_hash_raw ("sha1", + data, sizeof (data), + digest, sizeof (digest)) == SHA1_DIGEST_SIZE, + "blobref_hash_raw sha1 works"); + ok (blobref_strtohash (ref, digest, sizeof (digest)) == SHA1_DIGEST_SIZE, "blobref_strtohash returns expected size hash"); ok (blobref_hashtostr ("sha1", digest, SHA1_DIGEST_SIZE, ref2, @@ -122,6 +145,15 @@ int main(int argc, char** argv) "blobref_hash sha256 works"); diag ("%s", ref); + ok (blobref_hash_raw ("sha256", + NULL, 0, + digest, sizeof (digest)) == SHA256_BLOCK_SIZE, + "blobref_hash_raw sha256 handles zero length data"); + ok (blobref_hash_raw ("sha256", + data, sizeof (data), + digest, sizeof (digest)) == SHA256_BLOCK_SIZE, + "blobref_hash_raw sha256 works"); + ok (blobref_strtohash (ref, digest, sizeof (digest)) == SHA256_BLOCK_SIZE, "blobref_strtohash returns expected size hash"); ok (blobref_hashtostr ("sha256", digest, SHA256_BLOCK_SIZE, ref2, From 7f96091a01258c4f42b2748f4bbb117ec6a14739 Mon Sep 17 00:00:00 2001 From: Jim Garlick Date: Sat, 23 Apr 2022 09:05:14 -0700 Subject: [PATCH 4/7] libcontent: rename functions Problem: libcontent internal functions are named inappropriately given RFC 10 protocol changes in the pipeline. We will still want functions that operate on blobrefs for convenience, but also more efficient ones that match the protocol. In anticipation of this, rename: content_load() -> content_load_byblobref() content_store_get() -> content_store_get_blobref() Update users. --- src/broker/content-cache.c | 4 ++-- src/cmd/builtin/content.c | 8 ++++---- src/cmd/builtin/dump.c | 14 +++++++------- src/cmd/builtin/restore.c | 4 ++-- src/common/libcontent/content.c | 6 ++++-- src/common/libcontent/content.h | 8 +++++--- src/modules/kvs/kvs.c | 10 +++++----- t/kvs/content-spam.c | 2 +- 8 files changed, 30 insertions(+), 26 deletions(-) diff --git a/src/broker/content-cache.c b/src/broker/content-cache.c index 1745140d069f..10008a53d706 100644 --- a/src/broker/content-cache.c +++ b/src/broker/content-cache.c @@ -377,7 +377,7 @@ static int cache_load (struct content_cache *cache, struct cache_entry *e) return 0; if (cache->rank == 0) flags = CONTENT_FLAG_CACHE_BYPASS; - if (!(f = content_load (cache->h, e->blobref, flags)) + if (!(f = content_load_byblobref (cache->h, e->blobref, flags)) || flux_future_aux_set (f, "entry", e, NULL) < 0 || flux_future_then (f, -1., cache_load_continuation, cache) < 0) { flux_log_error (cache->h, "content load"); @@ -474,7 +474,7 @@ static void cache_store_continuation (flux_future_t *f, void *arg) e->store_pending = 0; assert (cache->flush_batch_count > 0); cache->flush_batch_count--; - if (content_store_get (f, &blobref) < 0) { + if (content_store_get_blobref (f, &blobref) < 0) { if (cache->rank == 0 && errno == ENOSYS) flux_log (cache->h, LOG_DEBUG, "content store: %s", "backing store service unavailable"); diff --git a/src/cmd/builtin/content.c b/src/cmd/builtin/content.c index 07720d2890dc..fd4e320acf43 100644 --- a/src/cmd/builtin/content.c +++ b/src/cmd/builtin/content.c @@ -39,8 +39,8 @@ static int internal_content_load (optparse_t *p, int ac, char *av[]) log_err_exit ("flux_open"); if (optparse_hasopt (p, "bypass-cache")) flags |= CONTENT_FLAG_CACHE_BYPASS; - if (!(f = content_load (h, ref, flags))) - log_err_exit ("content_load"); + if (!(f = content_load_byblobref (h, ref, flags))) + log_err_exit ("content_load_byblobref"); if (content_load_get (f, (const void **)&data, &size) < 0) log_err_exit ("content_load_get"); if (write_all (STDOUT_FILENO, data, size) < 0) @@ -71,8 +71,8 @@ static int internal_content_store (optparse_t *p, int ac, char *av[]) log_err_exit ("read"); if (!(f = content_store (h, data, size, flags))) log_err_exit ("content_store"); - if (content_store_get (f, &blobref) < 0) - log_err_exit ("content_store_get"); + if (content_store_get_blobref (f, &blobref) < 0) + log_err_exit ("content_store_get_blobref"); printf ("%s\n", blobref); flux_future_destroy (f); flux_close (h); diff --git a/src/cmd/builtin/dump.c b/src/cmd/builtin/dump.c index 93ae97ab3e0b..2d65699f7238 100644 --- a/src/cmd/builtin/dump.c +++ b/src/cmd/builtin/dump.c @@ -134,9 +134,9 @@ static void dump_valref (struct archive *ar, log_err_exit ("could not create message list"); for (int i = 0; i < count; i++) { flux_future_t *f; - if (!(f = content_load (h, - treeobj_get_blobref (treeobj, i), - content_flags)) + if (!(f = content_load_byblobref (h, + treeobj_get_blobref (treeobj, i), + content_flags)) || flux_future_get (f, (const void **)&msg) < 0 || flux_msg_get_payload (msg, &data, &len) < 0) { log_msg_exit ("%s: missing blobref %d: %s", @@ -260,9 +260,9 @@ static void dump_dirref (struct archive *ar, if (treeobj_get_count (treeobj) != 1) log_msg_exit ("%s: blobref count is not 1", path); - if (!(f = content_load (h, - treeobj_get_blobref (treeobj, 0), - content_flags)) + if (!(f = content_load_byblobref (h, + treeobj_get_blobref (treeobj, 0), + content_flags)) || content_load_get (f, &buf, &buflen) < 0) { log_msg_exit ("%s: missing blobref: %s", path, @@ -319,7 +319,7 @@ static void dump_blobref (struct archive *ar, const char *key; json_t *entry; - if (!(f = content_load (h, blobref, content_flags)) + if (!(f = content_load_byblobref (h, blobref, content_flags)) || content_load_get (f, &buf, &buflen) < 0) log_msg_exit ("cannot load root tree object: %s", future_strerror (f, errno)); diff --git a/src/cmd/builtin/restore.c b/src/cmd/builtin/restore.c index e916e4ea173b..16bf01f9f6fb 100644 --- a/src/cmd/builtin/restore.c +++ b/src/cmd/builtin/restore.c @@ -116,7 +116,7 @@ static json_t *restore_dir (flux_t *h, json_t *dir) if (!(s = treeobj_encode (ndir))) log_msg_exit ("out of memory"); if (!(f = content_store (h, s, strlen (s), content_flags)) - || content_store_get (f, &blobref) < 0) + || content_store_get_blobref (f, &blobref) < 0) log_msg_exit ("error storing dirref blob: %s", future_strerror (f, errno)); progress (1, 0); @@ -214,7 +214,7 @@ static void restore_value (flux_t *h, const char *blobref; if (!(f = content_store (h, buf, size, content_flags)) - || content_store_get (f, &blobref) < 0) + || content_store_get_blobref (f, &blobref) < 0) log_msg_exit ("error storing blob for %s: %s", path, future_strerror (f, errno)); diff --git a/src/common/libcontent/content.c b/src/common/libcontent/content.c index 8eeaae7fa0eb..35c019df96bc 100644 --- a/src/common/libcontent/content.c +++ b/src/common/libcontent/content.c @@ -20,7 +20,9 @@ #include "src/common/libutil/blobref.h" -flux_future_t *content_load (flux_t *h, const char *blobref, int flags) +flux_future_t *content_load_byblobref (flux_t *h, + const char *blobref, + int flags) { const char *topic = "content.load"; uint32_t rank = FLUX_NODEID_ANY; @@ -57,7 +59,7 @@ flux_future_t *content_store (flux_t *h, const void *buf, int len, int flags) return flux_rpc_raw (h, topic, buf, len, rank, 0); } -int content_store_get (flux_future_t *f, const char **blobref) +int content_store_get_blobref (flux_future_t *f, const char **blobref) { int ref_size; const char *ref; diff --git a/src/common/libcontent/content.h b/src/common/libcontent/content.h index 080c678f2165..860a3a52da7a 100644 --- a/src/common/libcontent/content.h +++ b/src/common/libcontent/content.h @@ -19,7 +19,9 @@ enum { /* Send request to load blob by blobref. */ -flux_future_t *content_load (flux_t *h, const char *blobref, int flags); +flux_future_t *content_load_byblobref (flux_t *h, + const char *blobref, + int flags); /* Get result of load request (blob). * This blocks until response is received. @@ -33,10 +35,10 @@ int content_load_get (flux_future_t *f, const void **buf, int *len); flux_future_t *content_store (flux_t *h, const void *buf, int len, int flags); /* Get result of store request (blobref). - * Storage for 'blobref' belongs to 'f' and is valid until 'f' is destroyed. + * Storage belongs to 'f' and is valid until 'f' is destroyed. * Returns 0 on success, -1 on failure with errno set. */ -int content_store_get (flux_future_t *f, const char **blobref); +int content_store_get_blobref (flux_future_t *f, const char **blobref); #endif /* !_FLUX_CONTENT_H */ diff --git a/src/modules/kvs/kvs.c b/src/modules/kvs/kvs.c index c9d01e7df3b0..ab8169f8693f 100644 --- a/src/modules/kvs/kvs.c +++ b/src/modules/kvs/kvs.c @@ -492,8 +492,8 @@ static int content_load_request_send (struct kvs_ctx *ctx, const char *ref) char *refcpy; int saved_errno; - if (!(f = content_load (ctx->h, ref, 0))) { - flux_log_error (ctx->h, "%s: content_load", __FUNCTION__); + if (!(f = content_load_byblobref (ctx->h, ref, 0))) { + flux_log_error (ctx->h, "%s: content_load_byblobref", __FUNCTION__); goto error; } if (!(refcpy = strdup (ref))) @@ -593,8 +593,8 @@ static void content_store_completion (flux_future_t *f, void *arg) cache_blobref = flux_future_aux_get (f, "cache_blobref"); assert (cache_blobref); - if (content_store_get (f, &blobref) < 0) { - flux_log_error (ctx->h, "%s: content_store_get", __FUNCTION__); + if (content_store_get_blobref (f, &blobref) < 0) { + flux_log_error (ctx->h, "%s: content_store_get_blobref", __FUNCTION__); goto error; } @@ -2809,7 +2809,7 @@ static int store_initial_rootdir (struct kvs_ctx *ctx, char *ref, int ref_len) goto error_uncache; } if (!(f = content_store (ctx->h, data, len, 0)) - || content_store_get (f, &newref) < 0) { + || content_store_get_blobref (f, &newref) < 0) { flux_log_error (ctx->h, "%s: content_store", __FUNCTION__); goto error_uncache; } diff --git a/t/kvs/content-spam.c b/t/kvs/content-spam.c index 936a015eb4d5..0a4df0912663 100644 --- a/t/kvs/content-spam.c +++ b/t/kvs/content-spam.c @@ -30,7 +30,7 @@ static void store_completion (flux_future_t *f, void *arg) flux_t *h = arg; const char *blobref; - if (content_store_get (f, &blobref) < 0) + if (content_store_get_blobref (f, &blobref) < 0) log_err_exit ("store"); printf ("%s\n", blobref); flux_future_destroy (f); From 2a7788fa335c16a4899b53c399ffc72b70f1db89 Mon Sep 17 00:00:00 2001 From: Jim Garlick Date: Fri, 22 Apr 2022 07:19:20 -0700 Subject: [PATCH 5/7] broker: update content proto per RFC 10 Problem: the RFC 10 content protocol has been updated to use raw hash digests instead of blobrefs, but flux-core still uses blobrefs. Convert to the new protocol. This includes updating - libcontent - broker content cache - all the content backing modules In addition, use the first eight bytes (sizeof (size_t)) of the hash digest directly as the hash key in the content cache, which should already be evenly distributed over its range and more efficient than hash digest -> blobref -> Bernstein_hash (blobref) Update content backing module sharness tests which use the load/store protocol directly. --- src/broker/content-cache.c | 113 +++++++++++++------- src/common/libcontent/content.c | 69 +++++++++--- src/common/libcontent/content.h | 9 +- src/modules/content-files/content-files.c | 54 ++++++---- src/modules/content-s3/content-s3.c | 54 ++++++---- src/modules/content-sqlite/content-sqlite.c | 73 ++++++------- t/t0012-content-sqlite.t | 6 +- t/t0018-content-files.t | 27 ++--- t/t0024-content-s3.t | 21 ++-- 9 files changed, 267 insertions(+), 159 deletions(-) diff --git a/src/broker/content-cache.c b/src/broker/content-cache.c index 10008a53d706..3de92106908e 100644 --- a/src/broker/content-cache.c +++ b/src/broker/content-cache.c @@ -48,6 +48,11 @@ static const uint32_t default_blob_size_limit = 1048576*1024; static const uint32_t default_flush_batch_limit = 256; +/* Hash digests are used as zhashx keys. The digest size needs to be + * available to zhashx comparator so make this global. + */ +static int content_hash_size; + struct msgstack { const flux_msg_t *msg; struct msgstack *next; @@ -56,7 +61,7 @@ struct msgstack { struct cache_entry { void *data; int len; - char *blobref; + void *hash; // key storage is contiguous with struct uint8_t valid:1; // entry contains valid data uint8_t dirty:1; // entry needs to be stored upstream // or to backing store (rank 0) @@ -188,22 +193,31 @@ static void cache_entry_destructor (void **item) *item = NULL; } } +/* zhashx_hash_fn footprint + */ +static size_t cache_entry_hasher (const void *key) +{ + return *(size_t *)key; +} +/* zhashx_comparator_fn footprint + */ +static int cache_entry_comparator (const void *item1, const void *item2) +{ + return memcmp (item1, item2, content_hash_size); +} /* Create a cache entry. - * Entries are created with a blobref, but no data (e.g. "invalid"). - * The blobref is copied to the space following the entry struct so the entry - * and the blobref can be co-located in memory, and allocated with one malloc. + * Entries are created with no data (e.g. "invalid"). * Returns entry on success, NULL with errno set on failure. */ -static struct cache_entry *cache_entry_create (const char *blobref) +static struct cache_entry *cache_entry_create (const void *hash) { struct cache_entry *e; - int bloblen = strlen (blobref) + 1; - if (!(e = calloc (1, sizeof (*e) + bloblen))) + if (!(e = calloc (1, sizeof (*e) + content_hash_size))) return NULL; - e->blobref = (char *)(e + 1); - memcpy (e->blobref, blobref, bloblen); + e->hash = (char *)(e + 1); + memcpy (e->hash, hash, content_hash_size); list_node_init (&e->list); return e; } @@ -264,23 +278,29 @@ static void cache_entry_dirty_clear (struct content_cache *cache, request_list_respond_raw (&e->store_requests, cache->h, - e->blobref, - strlen (e->blobref) + 1, + e->hash, + content_hash_size, "store"); } } -/* Create and insert a cache entry, using 'blobref' as the hash key. +/* Create and insert a cache entry. * Returns 0 on success, -1 on failure with errno set. */ static struct cache_entry *cache_entry_insert (struct content_cache *cache, - const char *blobref) + const void *hash, + int hash_size) { struct cache_entry *e; - if (!(e = cache_entry_create (blobref))) + + if (hash_size != content_hash_size) { + errno = EINVAL; + return NULL; + } + if (!(e = cache_entry_create (hash))) return NULL; - if (zhashx_insert (cache->entries, e->blobref, e) < 0) { + if (zhashx_insert (cache->entries, e->hash, e) < 0) { errno = EEXIST; cache_entry_destroy (e); return NULL; @@ -288,16 +308,20 @@ static struct cache_entry *cache_entry_insert (struct content_cache *cache, return e; } -/* Look up a cache entry, by blobref. +/* Look up a cache entry. * Move to front of LRU because it was looked up. * Returns entry on success, NULL on failure. * N.B. errno is not set */ static struct cache_entry *cache_entry_lookup (struct content_cache *cache, - const char *blobref) + const void *hash, + int hash_size) { struct cache_entry *e; - if (!(e = zhashx_lookup (cache->entries, blobref))) + + if (hash_size != content_hash_size) + return NULL; + if (!(e = zhashx_lookup (cache->entries, hash))) return NULL; if (e->valid && !e->dirty) { @@ -323,7 +347,7 @@ static void cache_entry_remove (struct content_cache *cache, } if (e->dirty) cache->acct_dirty--; - zhashx_delete (cache->entries, e->blobref); + zhashx_delete (cache->entries, e->hash); } /* Load operation @@ -377,7 +401,7 @@ static int cache_load (struct content_cache *cache, struct cache_entry *e) return 0; if (cache->rank == 0) flags = CONTENT_FLAG_CACHE_BYPASS; - if (!(f = content_load_byblobref (cache->h, e->blobref, flags)) + if (!(f = content_load_byhash (cache->h, e->hash, content_hash_size, flags)) || flux_future_aux_set (f, "entry", e, NULL) < 0 || flux_future_then (f, -1., cache_load_continuation, cache) < 0) { flux_log_error (cache->h, "content load"); @@ -392,25 +416,24 @@ void content_load_request (flux_t *h, flux_msg_handler_t *mh, const flux_msg_t *msg, void *arg) { struct content_cache *cache = arg; - const char *blobref; - int blobref_size; + const void *hash; + int hash_size; void *data = NULL; int len = 0; struct cache_entry *e; - if (flux_request_decode_raw (msg, NULL, (const void **)&blobref, - &blobref_size) < 0) + if (flux_request_decode_raw (msg, NULL, &hash, &hash_size) < 0) goto error; - if (!blobref || blobref[blobref_size - 1] != '\0') { + if (hash_size != content_hash_size) { errno = EPROTO; goto error; } - if (!(e = cache_entry_lookup (cache, blobref))) { + if (!(e = cache_entry_lookup (cache, hash, hash_size))) { if (cache->rank == 0 && !cache->backing) { errno = ENOENT; goto error; } - if (!(e = cache_entry_insert (cache, blobref))) { + if (!(e = cache_entry_insert (cache, hash, hash_size))) { flux_log_error (h, "content load"); goto error; } @@ -469,12 +492,13 @@ static void cache_store_continuation (flux_future_t *f, void *arg) { struct content_cache *cache = arg; struct cache_entry *e = flux_future_aux_get (f, "entry"); - const char *blobref; + const void *hash; + int hash_size; e->store_pending = 0; assert (cache->flush_batch_count > 0); cache->flush_batch_count--; - if (content_store_get_blobref (f, &blobref) < 0) { + if (content_store_get_hash (f, &hash, &hash_size) < 0) { if (cache->rank == 0 && errno == ENOSYS) flux_log (cache->h, LOG_DEBUG, "content store: %s", "backing store service unavailable"); @@ -482,8 +506,8 @@ static void cache_store_continuation (flux_future_t *f, void *arg) flux_log_error (cache->h, "content store"); goto error; } - if (strcmp (blobref, e->blobref)) { - flux_log (cache->h, LOG_ERR, "content store: wrong blobref"); + if (hash_size != content_hash_size + || memcmp (hash, e->hash, content_hash_size) != 0) { errno = EIO; goto error; } @@ -536,7 +560,8 @@ static void content_store_request (flux_t *h, flux_msg_handler_t *mh, const void *data; int len; struct cache_entry *e = NULL; - char blobref[BLOBREF_MAX_STRING_SIZE]; + uint8_t hash[BLOBREF_MAX_DIGEST_SIZE]; + int hash_size; if (flux_request_decode_raw (msg, NULL, &data, &len) < 0) goto error; @@ -544,12 +569,14 @@ static void content_store_request (flux_t *h, flux_msg_handler_t *mh, errno = EFBIG; goto error; } - if (blobref_hash (cache->hash_name, (uint8_t *)data, len, blobref, - sizeof (blobref)) < 0) + if ((hash_size = blobref_hash_raw (cache->hash_name, + data, + len, + hash, + sizeof (hash))) < 0) goto error; - - if (!(e = cache_entry_lookup (cache, blobref))) { - if (!(e = cache_entry_insert (cache, blobref))) + if (!(e = cache_entry_lookup (cache, hash, hash_size))) { + if (!(e = cache_entry_insert (cache, hash, hash_size))) goto error; } if (cache_entry_fill (cache, e, data, len, true) < 0) @@ -565,7 +592,7 @@ static void content_store_request (flux_t *h, flux_msg_handler_t *mh, } } } - if (flux_respond_raw (h, msg, blobref, strlen (blobref) + 1) < 0) + if (flux_respond_raw (h, msg, hash, hash_size) < 0) flux_log_error (h, "content store: flux_respond_raw"); return; error: @@ -890,13 +917,15 @@ static int register_attrs (struct content_cache *cache, attr_t *attr) return -1; } else { - if (blobref_validate_hashtype (s) < 0) { + int hash_size; + if ((hash_size = blobref_validate_hashtype (s)) < 0) { log_msg ("%s: unknown hash type", s); return -1; } if (attr_set_flags (attr, "content.hash", FLUX_ATTRFLAG_IMMUTABLE) < 0) return -1; cache->hash_name = s; + content_hash_size = hash_size; } /* Purge tunables @@ -944,7 +973,10 @@ struct content_cache *content_cache_create (flux_t *h, attr_t *attrs) return NULL; if (!(cache->entries = zhashx_new ())) goto nomem; + zhashx_set_destructor (cache->entries, cache_entry_destructor); + zhashx_set_key_hasher (cache->entries, cache_entry_hasher); + zhashx_set_key_comparator (cache->entries, cache_entry_comparator); zhashx_set_key_destructor (cache->entries, NULL); // key is part of entry zhashx_set_key_duplicator (cache->entries, NULL); // key is part of entry @@ -954,6 +986,8 @@ struct content_cache *content_cache_create (flux_t *h, attr_t *attrs) cache->purge_target_size = default_cache_purge_target_size; cache->purge_old_entry = default_cache_purge_old_entry; cache->hash_name = default_hash; + if ((content_hash_size = blobref_validate_hashtype (default_hash)) < 0) + goto error; cache->h = h; cache->reactor = flux_get_reactor (h); list_head_init (&cache->lru); @@ -961,6 +995,7 @@ struct content_cache *content_cache_create (flux_t *h, attr_t *attrs) if (register_attrs (cache, attrs) < 0) goto error; + assert (content_hash_size >= sizeof (size_t)); // hasher assumes this if (flux_msg_handler_addvec (h, htab, cache, &cache->handlers) < 0) goto error; diff --git a/src/common/libcontent/content.c b/src/common/libcontent/content.c index 35c019df96bc..19531454a044 100644 --- a/src/common/libcontent/content.c +++ b/src/common/libcontent/content.c @@ -19,15 +19,17 @@ #include "content.h" #include "src/common/libutil/blobref.h" +#include "src/common/libutil/errno_safe.h" -flux_future_t *content_load_byblobref (flux_t *h, - const char *blobref, - int flags) +flux_future_t *content_load_byhash (flux_t *h, + const void *hash, + int hash_size, + int flags) { const char *topic = "content.load"; uint32_t rank = FLUX_NODEID_ANY; - if (!h || !blobref || blobref_validate (blobref) < 0) { + if (!h || !hash) { errno = EINVAL; return NULL; } @@ -37,7 +39,19 @@ flux_future_t *content_load_byblobref (flux_t *h, topic = "content-backing.load"; rank = 0; } - return flux_rpc_raw (h, topic, blobref, strlen (blobref) + 1, rank, 0); + return flux_rpc_raw (h, topic, hash, hash_size, rank, 0); +} + +flux_future_t *content_load_byblobref (flux_t *h, + const char *blobref, + int flags) +{ + uint32_t hash[BLOBREF_MAX_DIGEST_SIZE]; + int hash_size; + + if ((hash_size = blobref_strtohash (blobref, hash, sizeof (hash))) < 0) + return NULL; + return content_load_byhash (h, hash, hash_size, flags); } int content_load_get (flux_future_t *f, const void **buf, int *len) @@ -59,19 +73,48 @@ flux_future_t *content_store (flux_t *h, const void *buf, int len, int flags) return flux_rpc_raw (h, topic, buf, len, rank, 0); } -int content_store_get_blobref (flux_future_t *f, const char **blobref) +int content_store_get_hash (flux_future_t *f, const void **hash, int *hash_size) { - int ref_size; - const char *ref; + const void *buf; + int buf_size; - if (flux_rpc_get_raw (f, (const void **)&ref, &ref_size) < 0) - return -1; - if (!ref || ref[ref_size - 1] != '\0' || blobref_validate (ref) < 0) { - errno = EPROTO; + if (flux_rpc_get_raw (f, &buf, &buf_size) < 0) return -1; + if (hash) + *hash = buf; + if (hash_size) + *hash_size = buf_size; + return 0; +} + +int content_store_get_blobref (flux_future_t *f, const char **blobref) +{ + flux_t *h = flux_future_get_flux (f); + const char *auxkey = "flux::blobref"; + const char *result; + + if (!(result = flux_future_aux_get (f, auxkey))) { + const char *hash_name = flux_attr_get (h, "content.hash"); + const void *hash; + int hash_len; + char buf[BLOBREF_MAX_STRING_SIZE]; + char *cpy = NULL; + + if (content_store_get_hash (f, &hash, &hash_len) < 0 + || blobref_hashtostr (hash_name, + hash, + hash_len, + buf, + sizeof (buf)) < 0 + || !(cpy = strdup (buf)) + || flux_future_aux_set (f, auxkey, cpy, (flux_free_f)free) < 0) { + ERRNO_SAFE_WRAP (free, cpy); + return -1; + } + result = cpy; } if (blobref) - *blobref = ref; + *blobref = result; return 0; } diff --git a/src/common/libcontent/content.h b/src/common/libcontent/content.h index 860a3a52da7a..692c99f644dc 100644 --- a/src/common/libcontent/content.h +++ b/src/common/libcontent/content.h @@ -17,8 +17,12 @@ enum { CONTENT_FLAG_UPSTREAM = 2, /* make request of upstream TBON peer */ }; -/* Send request to load blob by blobref. +/* Send request to load blob by hash or blobref. */ +flux_future_t *content_load_byhash (flux_t *h, + const void *hash, + int hash_len, + int flags); flux_future_t *content_load_byblobref (flux_t *h, const char *blobref, int flags); @@ -34,10 +38,11 @@ int content_load_get (flux_future_t *f, const void **buf, int *len); */ flux_future_t *content_store (flux_t *h, const void *buf, int len, int flags); -/* Get result of store request (blobref). +/* Get result of store request (hash or blobref). * Storage belongs to 'f' and is valid until 'f' is destroyed. * Returns 0 on success, -1 on failure with errno set. */ +int content_store_get_hash (flux_future_t *f, const void **hash, int *hash_len); int content_store_get_blobref (flux_future_t *f, const char **blobref); #endif /* !_FLUX_CONTENT_H */ diff --git a/src/modules/content-files/content-files.c b/src/modules/content-files/content-files.c index 80278fb5122b..9e55e37b3fd8 100644 --- a/src/modules/content-files/content-files.c +++ b/src/modules/content-files/content-files.c @@ -18,10 +18,10 @@ * There are four main operations (RPC handlers): * * content-backing.load: - * Given a blobref, lookup blob and return it or a "not found" error. + * Given a hash, lookup blob and return it or a "not found" error. * * content-backing.store: - * Given a blob, store it and return its blobref + * Given a blob, store it and return its hash * * kvs-checkpoint.get: * Given a string key, lookup string value and return it or a "not found" error. @@ -66,11 +66,12 @@ struct content_files { char *dbpath; flux_t *h; const char *hashfun; + int hash_size; }; /* Handle a content-backing.load request from the rank 0 broker's - * content-cache service. The raw request payload is a blobref string, - * including NULL terminator. The raw response payload is the blob content. + * content-cache service. The raw request payload is a hash digest. + * The raw response payload is the blob content. * These payloads are specified in RFC 10. */ static void load_cb (flux_t *h, @@ -79,23 +80,25 @@ static void load_cb (flux_t *h, void *arg) { struct content_files *ctx = arg; - const char *blobref; - int blobref_size; + const void *hash; + int hash_size; + char blobref[BLOBREF_MAX_STRING_SIZE]; void *data = NULL; size_t size; const char *errstr = NULL; - if (flux_request_decode_raw (msg, - NULL, - (const void **)&blobref, - &blobref_size) < 0) + if (flux_request_decode_raw (msg, NULL, &hash, &hash_size) < 0) goto error; - if (!blobref || blobref[blobref_size - 1] != '\0' - || blobref_validate (blobref) < 0) { + if (hash_size != ctx->hash_size) { errno = EPROTO; - errstr = "invalid blobref"; goto error; } + if (blobref_hashtostr (ctx->hashfun, + hash, + hash_size, + blobref, + sizeof (blobref)) < 0) + goto error; if (filedb_get (ctx->dbpath, blobref, &data, &size, &errstr) < 0) goto error; if (flux_respond_raw (h, msg, data, size) < 0) @@ -110,7 +113,7 @@ static void load_cb (flux_t *h, /* Handle a content-backing.store request from the rank 0 broker's * content-cache service. The raw request payload is the blob content. - * The raw response payload is a blobref string including NULL terminator. + * The raw response payload is hash digest. * These payloads are specified in RFC 10. */ void store_cb (flux_t *h, @@ -122,19 +125,27 @@ void store_cb (flux_t *h, const void *data; int size; char blobref[BLOBREF_MAX_STRING_SIZE]; + char hash[BLOBREF_MAX_DIGEST_SIZE]; + int hash_size; const char *errstr = NULL; if (flux_request_decode_raw (msg, NULL, &data, &size) < 0) goto error; - if (blobref_hash (ctx->hashfun, - (uint8_t *)data, - size, - blobref, - sizeof (blobref)) < 0) + if ((hash_size = blobref_hash_raw (ctx->hashfun, + data, + size, + hash, + sizeof (hash))) < 0) + goto error; + if (blobref_hashtostr (ctx->hashfun, + hash, + hash_size, + blobref, + sizeof (blobref)) < 0) goto error; if (filedb_put (ctx->dbpath, blobref, data, size, &errstr) < 0) goto error; - if (flux_respond_raw (h, msg, blobref, strlen (blobref) + 1) < 0) + if (flux_respond_raw (h, msg, hash, hash_size) < 0) flux_log_error (h, "error responding to store request"); return; error: @@ -266,7 +277,8 @@ static struct content_files *content_files_create (flux_t *h) * - the hash function, e.g. sha1, sha256 * - path to sqlite file */ - if (!(ctx->hashfun = flux_attr_get (h, "content.hash"))) { + if (!(ctx->hashfun = flux_attr_get (h, "content.hash")) + || (ctx->hash_size = blobref_validate_hashtype (ctx->hashfun)) < 0) { flux_log_error (h, "content.hash"); goto error; } diff --git a/src/modules/content-s3/content-s3.c b/src/modules/content-s3/content-s3.c index d12b54719eab..551164e84da4 100644 --- a/src/modules/content-s3/content-s3.c +++ b/src/modules/content-s3/content-s3.c @@ -13,6 +13,7 @@ #endif #include #include +#include #include "src/common/libutil/blobref.h" #include "src/common/libutil/log.h" @@ -31,6 +32,7 @@ struct content_s3 { struct s3_config *cfg; flux_t *h; const char *hashfun; + int hash_size; }; static void s3_config_destroy (struct s3_config *ctx) @@ -244,30 +246,33 @@ static void config_reload_cb (flux_t *h, } /* Handle a content-backing.load request from the rank 0 broker's - * content-cache service. The raw request payload is a blobref string, - * including NULL terminator. The raw response payload is the blob content. - * These payloads are specified in RFC 10. + * content-cache service. The raw request payload is a hash digest, + * The raw response payload is the blob content. These payloads are specified + * in RFC 10. */ static void load_cb (flux_t *h, flux_msg_handler_t *mh, const flux_msg_t *msg, void *arg) { struct content_s3 *ctx = arg; - const char *blobref; - int blobref_size; + const void *hash; + int hash_size; + char blobref[BLOBREF_MAX_STRING_SIZE]; void *data = NULL; size_t size; const char *errstr = NULL; - if (flux_request_decode_raw (msg, - NULL, - (const void **)&blobref, - &blobref_size) < 0) + if (flux_request_decode_raw (msg, NULL, &hash, &hash_size) < 0) goto error; - if (!blobref || blobref[blobref_size - 1] != '\0' - || blobref_validate (blobref) < 0) { + if (hash_size != ctx->hash_size) { errno = EPROTO; - errstr = "invalid blobref"; + errstr = "incorrect hash size"; goto error; } + if (blobref_hashtostr (ctx->hashfun, + hash, + hash_size, + blobref, + sizeof (blobref)) < 0) + goto error; if (s3_get (ctx->cfg, blobref, &data, &size, &errstr) < 0) goto error; if (flux_respond_raw (h, msg, data, size) < 0) @@ -283,7 +288,7 @@ static void load_cb (flux_t *h, flux_msg_handler_t *mh, const flux_msg_t *msg, v /* Handle a content-backing.store request from the rank 0 broker's * content-cache service. The raw request payload is the blob content. - * The raw response payload is a blobref string including NULL terminator. + * The raw response payload is a hash digest. * These payloads are specified in RFC 10. */ void store_cb (flux_t *h, flux_msg_handler_t *mh, const flux_msg_t *msg, void *arg) @@ -292,19 +297,27 @@ void store_cb (flux_t *h, flux_msg_handler_t *mh, const flux_msg_t *msg, void *a const void *data; int size; char blobref[BLOBREF_MAX_STRING_SIZE]; + uint8_t hash[BLOBREF_MAX_DIGEST_SIZE]; + int hash_size; const char *errstr = NULL; if (flux_request_decode_raw (msg, NULL, &data, &size) < 0) goto error; - if (blobref_hash (ctx->hashfun, - (uint8_t *)data, - size, - blobref, - sizeof (blobref)) < 0) + if ((hash_size = blobref_hash_raw (ctx->hashfun, + data, + size, + hash, + sizeof (hash))) < 0 + || blobref_hashtostr (ctx->hashfun, + hash, + hash_size, + blobref, + sizeof (blobref)) < 0) goto error; + assert (hash_size == ctx->hash_size); if (s3_put (ctx->cfg, blobref, data, size, &errstr) < 0) goto error; - if (flux_respond_raw (h, msg, blobref, strlen (blobref) + 1) < 0) + if (flux_respond_raw (h, msg, hash, hash_size) < 0) flux_log_error (h, "error responding to store request"); return; @@ -424,7 +437,8 @@ static struct content_s3 *content_s3_create (flux_t *h) return NULL; ctx->h = h; - if (!(ctx->hashfun = flux_attr_get (h, "content.hash"))) { + if (!(ctx->hashfun = flux_attr_get (h, "content.hash")) + || (ctx->hash_size = blobref_validate_hashtype (ctx->hashfun)) < 0) { flux_log_error (h, "content.hash"); goto error; } diff --git a/src/modules/content-sqlite/content-sqlite.c b/src/modules/content-sqlite/content-sqlite.c index 87acc2259174..e3a44dbf5611 100644 --- a/src/modules/content-sqlite/content-sqlite.c +++ b/src/modules/content-sqlite/content-sqlite.c @@ -22,6 +22,7 @@ #include #include #include +#include #include "src/common/libutil/blobref.h" #include "src/common/libutil/log.h" @@ -69,6 +70,7 @@ struct content_sqlite { sqlite3_stmt *checkpt_put_stmt; flux_t *h; const char *hashfun; + int hash_size; size_t lzo_bufsize; void *lzo_buf; struct content_stats stats; @@ -145,25 +147,19 @@ static int grow_lzo_buf (struct content_sqlite *ctx, size_t size) * which invalidates returned data. */ static int content_sqlite_load (struct content_sqlite *ctx, - const char *blobref, + const void *hash, + int hash_size, const void **datap, int *sizep) { - uint8_t hash[BLOBREF_MAX_DIGEST_SIZE]; - int hash_len; const void *data = NULL; int size = 0; int uncompressed_size; - if ((hash_len = blobref_strtohash (blobref, hash, sizeof (hash))) < 0) { - errno = ENOENT; - flux_log_error (ctx->h, "load: unexpected foreign blobref"); - return -1; - } if (sqlite3_bind_text (ctx->load_stmt, 1, (char *)hash, - hash_len, + hash_size, SQLITE_STATIC) != SQLITE_OK) { log_sqlite_error (ctx, "load: binding key"); set_errno_from_sqlite_error (ctx); @@ -216,27 +212,25 @@ static int content_sqlite_load (struct content_sqlite *ctx, } /* Store blob to objects table, compressing if necessary. - * Blobref resulting from hash over 'data' is stored to 'blobref'. - * Returns 0 on success, -1 on error with errno set. + * hash over 'data' is stored to 'hash'. + * Returns hash size on success, -1 on error with errno set. */ static int content_sqlite_store (struct content_sqlite *ctx, const void *data, int size, - char *blobref, - int blobrefsz) + void *hash, + int hash_len) { - uint8_t hash[BLOBREF_MAX_DIGEST_SIZE]; - int hash_len; int uncompressed_size = -1; + int hash_size; - if (blobref_hash (ctx->hashfun, - (uint8_t *)data, - size, - blobref, - blobrefsz) < 0) - return -1; - if ((hash_len = blobref_strtohash (blobref, hash, sizeof (hash))) < 0) + if ((hash_size = blobref_hash_raw (ctx->hashfun, + data, + size, + hash, + hash_len)) < 0) return -1; + assert (hash_size == ctx->hash_size); if (size >= compression_threshold) { int r; int out_len = LZ4_compressBound(size); @@ -253,8 +247,8 @@ static int content_sqlite_store (struct content_sqlite *ctx, } if (sqlite3_bind_text (ctx->store_stmt, 1, - (char *)hash, - hash_len, + hash, + hash_size, SQLITE_STATIC) != SQLITE_OK) { log_sqlite_error (ctx, "store: binding key"); set_errno_from_sqlite_error (ctx); @@ -287,7 +281,7 @@ static int content_sqlite_store (struct content_sqlite *ctx, goto error; } sqlite3_reset (ctx->store_stmt); - return 0; + return hash_size; error: ERRNO_SAFE_WRAP (sqlite3_reset, ctx->store_stmt); return -1; @@ -299,26 +293,23 @@ static void load_cb (flux_t *h, void *arg) { struct content_sqlite *ctx = arg; - const char *blobref; - int blobref_size; + const void *hash; + int hash_size; const void *data; int size; struct timespec t0; if (flux_request_decode_raw (msg, NULL, - (const void **)&blobref, - &blobref_size) < 0) { - flux_log_error (h, "load: request decode failed"); + &hash, + &hash_size) < 0) goto error; - } - if (!blobref || blobref[blobref_size - 1] != '\0') { + if (hash_size != ctx->hash_size) { errno = EPROTO; - flux_log_error (h, "load: malformed blobref"); goto error; } monotime (&t0); - if (content_sqlite_load (ctx, blobref, &data, &size) < 0) + if (content_sqlite_load (ctx, hash, hash_size, &data, &size) < 0) goto error; tstat_push (&ctx->stats.load, monotime_since (t0)); if (flux_respond_raw (h, msg, data, size) < 0) @@ -338,7 +329,8 @@ void store_cb (flux_t *h, struct content_sqlite *ctx = arg; const void *data; int size; - char blobref[BLOBREF_MAX_STRING_SIZE]; + uint8_t hash[BLOBREF_MAX_DIGEST_SIZE]; + int hash_size; struct timespec t0; if (flux_request_decode_raw (msg, NULL, &data, &size) < 0) { @@ -346,10 +338,14 @@ void store_cb (flux_t *h, goto error; } monotime (&t0); - if (content_sqlite_store (ctx, data, size, blobref, sizeof (blobref)) < 0) + if ((hash_size = content_sqlite_store (ctx, + data, + size, + hash, + sizeof (hash))) < 0) goto error; tstat_push (&ctx->stats.store, monotime_since (t0)); - if (flux_respond_raw (h, msg, blobref, strlen (blobref) + 1) < 0) + if (flux_respond_raw (h, msg, hash, hash_size) < 0) flux_log_error (h, "store: flux_respond_raw"); return; error: @@ -749,7 +745,8 @@ static struct content_sqlite *content_sqlite_create (flux_t *h) * - the maximum blob size * - path to sqlite file */ - if (!(ctx->hashfun = flux_attr_get (h, "content.hash"))) { + if (!(ctx->hashfun = flux_attr_get (h, "content.hash")) + || (ctx->hash_size = blobref_validate_hashtype (ctx->hashfun)) < 0) { flux_log_error (h, "content.hash"); goto error; } diff --git a/t/t0012-content-sqlite.t b/t/t0012-content-sqlite.t index 69124d5f6eda..902a686b1928 100755 --- a/t/t0012-content-sqlite.t +++ b/t/t0012-content-sqlite.t @@ -171,9 +171,9 @@ test_expect_success HAVE_JQ 'kvs-checkpoint.get noexist fails with No such...' ' grep "No such file or directory" badkey.err ' -test_expect_success 'content-backing.load invalid blobref fails' ' - echo -n sha999-000 >bad.blobref && - $RPC content-backing.load 2 load.err +test_expect_success 'content-backing.load wrong size hash fails with EPROTO' ' + echo -n xxx >badhash && + $RPC content-backing.load 71 load.err ' getsize() { diff --git a/t/t0018-content-files.t b/t/t0018-content-files.t index 72ecf3ec832c..15a5aaf1f7cb 100755 --- a/t/t0018-content-files.t +++ b/t/t0018-content-files.t @@ -22,11 +22,11 @@ LARGE_SIZES="8388608 10000000 16777216 33554432 67108864" # Functions used by tests ## -# Usage: backing_load blobref +# Usage: backing_load blobref +# Usage: backing_store hash backing_store() { $RPC -r content-backing.store } @@ -39,23 +39,24 @@ make_blob() { fi } # Usage: check_blob size -# Leaves behind blob. and blobref. +# Leaves behind blob. and hash. check_blob() { make_blob $1 >blob.$1 && - backing_store blobref.$1 && - backing_load $(cat blobref.$1) >blob.$1.check && + backing_store hash.$1 && + backing_load blob.$1.check && test_cmp blob.$1 blob.$1.check } # Usage: check_blob size -# Relies on existence of blob. and blobref. +# Relies on existence of blob. and hash. recheck_blob() { - backing_load $(cat blobref.$1) >blob.$1.recheck && + backing_load blob.$1.recheck && test_cmp blob.$1 blob.$1.recheck } # Usage: recheck_cache_blob size -# Relies on existence of blob. and blobref. +# Relies on existence of blob. recheck_cache_blob() { - flux content load $(cat blobref.$1) >blob.$1.cachecheck && + local blobref=$($BLOBREF sha1 blob.$1.cachecheck && test_cmp blob.$1 blob.$1.cachecheck } # Usage: kvs_checkpoint_put key rootref @@ -171,9 +172,9 @@ test_expect_success HAVE_JQ 'kvs-checkpoint.get foo returned rootref with shorte test_cmp rootref4.exp rootref4.out ' -test_expect_success 'load with invalid blobref fails' ' - test_must_fail backing_load notblobref 2>notblobref.err && - grep "invalid blobref" notblobref.err +test_expect_success 'load with invalid hash size fails with EPROTO' ' + test_must_fail backing_load badhash.err && + grep "Protocol error" badhash.err ' test_expect_success 'kvs-checkpoint.get bad request fails with EPROTO' ' test_must_fail $RPC kvs-checkpoint.get badget.err && diff --git a/t/t0024-content-s3.t b/t/t0024-content-s3.t index 0dd38d33871e..025328bcaa0f 100755 --- a/t/t0024-content-s3.t +++ b/t/t0024-content-s3.t @@ -27,11 +27,11 @@ LARGE_SIZES="8388608 10000000 16777216 33554432 67108864" # Functions used by tests ## -# Usage: backing_load blobref +# Usage: backing_load blobref +# Usage: backing_store hash backing_store() { $RPC -r content-backing.store } @@ -44,23 +44,24 @@ make_blob() { fi } # Usage: check_blob size -# Leaves behind blob. and blobref. +# Leaves behind blob. and hash. check_blob() { make_blob $1 >blob.$1 && - backing_store blobref.$1 && - backing_load $(cat blobref.$1) >blob.$1.check && + backing_store hash.$1 && + backing_load blob.$1.check && test_cmp blob.$1 blob.$1.check } # Usage: check_blob size -# Relies on existence of blob. and blobref. +# Relies on existence of blob. and hash. recheck_blob() { - backing_load $(cat blobref.$1) >blob.$1.recheck && + backing_load blob.$1.recheck && test_cmp blob.$1 blob.$1.recheck } # Usage: recheck_cache_blob size -# Relies on existence of blob. and blobref. +# Relies on existence of blob. recheck_cache_blob() { - flux content load $(cat blobref.$1) >blob.$1.cachecheck && + local blobref=$($BLOBREF sha1 blob.$1.cachecheck && test_cmp blob.$1 blob.$1.cachecheck } # Usage: kvs_checkpoint_put key rootref From 3511a0b51f972adbeac2c042fc538a4fde6b2bac Mon Sep 17 00:00:00 2001 From: Jim Garlick Date: Sat, 23 Apr 2022 21:44:27 -0700 Subject: [PATCH 6/7] testsuite: avoid reusing s3 buckets Problem: the althash s3 test fails because it reuses the bucket from the content-s3 test with an incompatible hash. Append the test name to the s3 bucket to avoid this. Also drop the second s3 test that uses sha1, since that was already covered in content-s3. --- t/t2008-althash.t | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/t/t2008-althash.t b/t/t2008-althash.t index c5f8ff74dc08..db2030c58270 100755 --- a/t/t2008-althash.t +++ b/t/t2008-althash.t @@ -43,12 +43,13 @@ test_expect_success S3 'create creds.toml from env' ' CREDS ' +# N.B. don't reuse the bucket from previous tests test_expect_success S3 'create content-s3.toml from env' ' cat >content-s3.toml <<-TOML [content-s3] credential-file = "$(pwd)/creds/creds.toml" uri = "http://$S3_HOSTNAME" - bucket = "$S3_BUCKET" + bucket = "${S3_BUCKET}althash" virtual-host-style = false TOML ' @@ -66,13 +67,6 @@ test_expect_success S3 'Content store nil returns correct hash for sha256' ' test "$OUT" = "$nil256" ' -test_expect_success S3 'Content store nil returns correct hash for sha1' ' - OUT=$(flux start -o,-Scontent.hash=sha1 \ - -o,-Scontent.backing-module=content-s3 \ - flux content store Date: Sat, 23 Apr 2022 21:51:14 -0700 Subject: [PATCH 7/7] testsuite: drop S3 prereq from althash test Problem: a generic test in the althash sharness test had the S3 prereq. Drop S3 prereq. The test is not S3 related. --- t/t2008-althash.t | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/t/t2008-althash.t b/t/t2008-althash.t index db2030c58270..d8d525fcca4a 100755 --- a/t/t2008-althash.t +++ b/t/t2008-althash.t @@ -67,7 +67,7 @@ test_expect_success S3 'Content store nil returns correct hash for sha256' ' test "$OUT" = "$nil256" ' -test_expect_success S3 'Attempt to start instance with invalid hash fails hard' ' +test_expect_success 'Attempt to start instance with invalid hash fails hard' ' test_must_fail flux start -o,-Scontent.hash=wronghash /bin/true '