diff --git a/ChangeLog b/ChangeLog index 095d7750a..cc3766218 100644 --- a/ChangeLog +++ b/ChangeLog @@ -3,6 +3,7 @@ XXX * Version X.X.X - Fix "The pool was not closed" message (no ADR failure) (daos-stack/pmdk#1, DAOS-18692) + - Recalculate curr_allocated on underflow WA (daos-stack/pmdk#37, DAOS-18882) Fri Jan 16 2026 Oksana Sałyk diff --git a/src/libpmemobj/heap.c b/src/libpmemobj/heap.c index 64cc19f42..8c07987f2 100644 --- a/src/libpmemobj/heap.c +++ b/src/libpmemobj/heap.c @@ -1,5 +1,6 @@ // SPDX-License-Identifier: BSD-3-Clause /* Copyright 2015-2024, Intel Corporation */ +/* Copyright 2026, Hewlett Packard Enterprise Development LP */ /* * heap.c -- heap implementation @@ -812,6 +813,65 @@ heap_reclaim_zone_garbage(struct palloc_heap *heap, struct bucket *bucket, } } +/* + * heap_zone_get_allocated -- (internal) sums up the real size of all allocated + * (non-free) memory blocks in the zone. + * + * Note: It is not meant to calculate the sum of all allocations. + * It follows the algorithm used to calculate the heap_curr_allocated statistic. + * e.g. It does not take into account free space in runs. + */ +static uint64_t +heap_zone_get_allocated(struct palloc_heap *heap, uint32_t zone_id) +{ + struct zone *z = ZID_TO_ZONE(heap->layout, zone_id); + uint64_t used = 0; + + if (z->header.magic != ZONE_HEADER_MAGIC) + return 0; + + for (uint32_t i = 0; i < z->header.size_idx; ) { + struct chunk_header *hdr = &z->chunk_headers[i]; + ASSERT(hdr->size_idx != 0); + + struct memory_block m = MEMORY_BLOCK_NONE; + m.zone_id = zone_id; + m.chunk_id = i; + m.size_idx = hdr->size_idx; + memblock_rebuild_state(heap, &m); + + switch (hdr->type) { + case CHUNK_TYPE_USED: + case CHUNK_TYPE_RUN: + used += m.m_ops->get_real_size(&m); + break; + case CHUNK_TYPE_FREE: + break; + default: + ASSERT(0); + } + + i = m.chunk_id + m.size_idx; + } + + return used; +} + +/* + * heap_curr_allocated_sum -- calculate the curr_allocated statistic + */ +uint64_t +heap_curr_allocated_sum(struct palloc_heap *heap) +{ + uint64_t value = 0; + + for (uint32_t z = 0; z < heap->rt->nzones; ++z) { + value += heap_zone_get_allocated(heap, z); + } + + return value; +} + /* * heap_ensure_zone_reclaimed -- make sure that the specified zone has been * already reclaimed. diff --git a/src/libpmemobj/heap.h b/src/libpmemobj/heap.h index 09278bad6..779364548 100644 --- a/src/libpmemobj/heap.h +++ b/src/libpmemobj/heap.h @@ -1,5 +1,6 @@ /* SPDX-License-Identifier: BSD-3-Clause */ /* Copyright 2015-2024, Intel Corporation */ +/* Copyright 2026, Hewlett Packard Enterprise Development LP */ /* * heap.h -- internal definitions for heap @@ -74,6 +75,9 @@ heap_memblock_on_free(struct palloc_heap *heap, const struct memory_block *m); void heap_ensure_zone_reclaimed(struct palloc_heap *heap, uint32_t zone_id); +uint64_t +heap_curr_allocated_sum(struct palloc_heap *heap); + int heap_free_chunk_reuse(struct palloc_heap *heap, struct bucket *bucket, struct memory_block *m); diff --git a/src/libpmemobj/stats.c b/src/libpmemobj/stats.c index 2f3e23e30..6174022d9 100644 --- a/src/libpmemobj/stats.c +++ b/src/libpmemobj/stats.c @@ -5,11 +5,16 @@ * stats.c -- implementation of statistics */ +#include + +#include "heap.h" #include "obj.h" #include "stats.h" #include "core_assert.h" -STATS_CTL_HANDLER(persistent, curr_allocated, heap_curr_allocated); +static int +CTL_READ_HANDLER(persistent_curr_allocated)(void *ctx, + enum ctl_query_source source, void *arg, struct ctl_indexes *indexes); STATS_CTL_HANDLER(transient, run_allocated, heap_run_allocated); STATS_CTL_HANDLER(transient, run_active, heap_run_active); @@ -22,6 +27,59 @@ static const struct ctl_node CTL_NODE(heap)[] = { CTL_NODE_END }; +#define CURR_ALLOCATED_UNDERFLOW_FMT \ + "heap_curr_allocated underflowed: %" PRIu64 " > heap.size: %" PRIu64 \ + "; recalculating" + +/* + * CTL_READ_HANDLER(persistent_curr_allocated) -- returns curr_allocated field + */ +static int +CTL_READ_HANDLER(persistent_curr_allocated)(void *ctx, + enum ctl_query_source source, void *arg, struct ctl_indexes *indexes) +{ + /* suppress unused-parameter errors */ + SUPPRESS_UNUSED(source, indexes); + + PMEMobjpool *pop = ctx; + uint64_t *curr_allocated = &pop->stats->persistent->heap_curr_allocated; + + uint64_t *argv = arg; + util_atomic_load_explicit64(curr_allocated, argv, memory_order_acquire); + + /* + * heap_curr_allocated although is stored persistently it is not updated + * transactionally nor reliably persisted. This means e.g. that in case + * a transaction succeeds but the process will get terminated before + * the update of heap_curr_allocated, the value of heap_curr_allocated + * will get out of sync with the actual heap state. + * + * The most obvious case of this happening is when heap_curr_allocated + * is actually smaller than the sum of the sizes of all allocations in + * the heap, so in case all of the allocations are freed, + * heap_curr_allocated will underflow and get a very big value, bigger + * than heap size. + * + * Ref: https://daosio.atlassian.net/browse/DAOS-18882 + * + * This workaround detects this most obvious case and recalculates. + * + * Note: Not thread-safe. + */ + if (*argv > *pop->heap.sizep) { /* covers the == UINT64_MAX case */ + CORE_LOG_WARNING(CURR_ALLOCATED_UNDERFLOW_FMT, + *argv, *pop->heap.sizep); + + /* if the value is broken, recalculate it */ + *argv = heap_curr_allocated_sum(&pop->heap); + + util_atomic_store_explicit64(curr_allocated, *argv, + memory_order_release); + } + + return 0; +} + /* * CTL_READ_HANDLER(enabled) -- returns whether or not statistics are enabled */ diff --git a/src/libpmemobj/stats.h b/src/libpmemobj/stats.h index 72eeceb89..213e727d3 100644 --- a/src/libpmemobj/stats.h +++ b/src/libpmemobj/stats.h @@ -42,8 +42,15 @@ struct stats { #define STATS_INC_persistent(stats, name, value) do {\ if ((stats)->enabled == POBJ_STATS_ENABLED_PERSISTENT ||\ - (stats)->enabled == POBJ_STATS_ENABLED_BOTH)\ - util_fetch_and_add64((&(stats)->persistent->name), (value));\ + (stats)->enabled == POBJ_STATS_ENABLED_BOTH) {\ + uint64_t curr_value;\ + util_atomic_load_explicit64((&(stats)->persistent->name),\ + &curr_value, memory_order_acquire);\ + if (curr_value != UINT64_MAX) {\ + util_fetch_and_add64(\ + (&(stats)->persistent->name), (value));\ + }\ + }\ } while (0) #define STATS_SUB(stats, type, name, value) do {\ @@ -56,10 +63,29 @@ struct stats { util_fetch_and_sub64((&(stats)->transient->name), (value));\ } while (0) +/* + * This macro handles only the curr_allocated statistic. Sadly, it's current + * design makes it possible for it to underflow, which would lead to very large + * values due to the unsigned type. To mitigate this, if the value is already at + * UINT64_MAX or the subtraction would cause an underflow, the value is set to + * UINT64_MAX to indicate that it's broken and needs to be recalculated. + */ #define STATS_SUB_persistent(stats, name, value) do {\ if ((stats)->enabled == POBJ_STATS_ENABLED_PERSISTENT ||\ - (stats)->enabled == POBJ_STATS_ENABLED_BOTH)\ - util_fetch_and_sub64((&(stats)->persistent->name), (value));\ + (stats)->enabled == POBJ_STATS_ENABLED_BOTH) {\ + uint64_t curr_value;\ + util_atomic_load_explicit64((&(stats)->persistent->name),\ + &curr_value, memory_order_acquire);\ + if (curr_value != UINT64_MAX) {\ + curr_value = util_fetch_and_sub64(\ + (&(stats)->persistent->name), (value));\ + if (curr_value < value || curr_value == UINT64_MAX) {\ + util_atomic_store_explicit64(\ + (&(stats)->persistent->name),\ + UINT64_MAX, memory_order_release);\ + }\ + }\ + }\ } while (0) #define STATS_SET(stats, type, name, value) do {\ diff --git a/src/test/Makefile b/src/test/Makefile index c223ccae6..dab26479f 100644 --- a/src/test/Makefile +++ b/src/test/Makefile @@ -1,5 +1,6 @@ # SPDX-License-Identifier: BSD-3-Clause # Copyright 2014-2024, Intel Corporation +# Copyright 2026, Hewlett Packard Enterprise Development LP # # @@ -39,6 +40,7 @@ OBJ_TESTS = \ obj_ctl_debug\ obj_ctl_heap_size\ obj_ctl_stats\ + obj_ctl_stats_curr_allocated_wa\ obj_debug\ obj_defrag\ obj_defrag_advanced\ diff --git a/src/test/obj_ctl_stats_curr_allocated_wa/.gitignore b/src/test/obj_ctl_stats_curr_allocated_wa/.gitignore new file mode 100644 index 000000000..0de75f5ab --- /dev/null +++ b/src/test/obj_ctl_stats_curr_allocated_wa/.gitignore @@ -0,0 +1 @@ +obj_ctl_stats_curr_allocated_wa diff --git a/src/test/obj_ctl_stats_curr_allocated_wa/Makefile b/src/test/obj_ctl_stats_curr_allocated_wa/Makefile new file mode 100644 index 000000000..cd8fdcea5 --- /dev/null +++ b/src/test/obj_ctl_stats_curr_allocated_wa/Makefile @@ -0,0 +1,15 @@ +# SPDX-License-Identifier: BSD-3-Clause +# Copyright 2026, Hewlett Packard Enterprise Development LP + +# +# src/test/obj_ctl_stats_curr_allocated_wa/Makefile -- +# +TARGET = obj_ctl_stats_curr_allocated_wa +OBJS = obj_ctl_stats_curr_allocated_wa.o + +BUILD_STATIC_DEBUG=n +BUILD_STATIC_NONDEBUG=n + +LIBPMEMOBJ=y + +include ../Makefile.inc diff --git a/src/test/obj_ctl_stats_curr_allocated_wa/TEST0 b/src/test/obj_ctl_stats_curr_allocated_wa/TEST0 new file mode 100755 index 000000000..786402641 --- /dev/null +++ b/src/test/obj_ctl_stats_curr_allocated_wa/TEST0 @@ -0,0 +1,21 @@ +#!/usr/bin/env bash +# SPDX-License-Identifier: BSD-3-Clause +# Copyright 2026, Hewlett Packard Enterprise Development LP + +. ../unittest/unittest.sh + +require_test_type short +require_fs_type any +require_build_type debug + +setup + +OUT_CHECK=pmemobj_grep${UNITTEST_NUM}.log + +expect_normal_exit ./obj_ctl_stats_curr_allocated_wa$EXESUFFIX $DIR/testfile1 + +cat $PMEMOBJ_LOG_FILE | grep "ctl__persistent_curr_allocated_read" > $OUT_CHECK + +check + +pass diff --git a/src/test/obj_ctl_stats_curr_allocated_wa/obj_ctl_stats_curr_allocated_wa.c b/src/test/obj_ctl_stats_curr_allocated_wa/obj_ctl_stats_curr_allocated_wa.c new file mode 100644 index 000000000..8a78a43c5 --- /dev/null +++ b/src/test/obj_ctl_stats_curr_allocated_wa/obj_ctl_stats_curr_allocated_wa.c @@ -0,0 +1,118 @@ +// SPDX-License-Identifier: BSD-3-Clause +/* Copyright 2026, Hewlett Packard Enterprise Development LP */ + +/* + * obj_ctl_stats_curr_allocated_wa.c -- tests for curr_allocated WA + */ + +#include "unittest.h" + +#include <../../libpmemobj/obj.h> + +#define LAYOUT "ctl" + +#define MB (1 << 20) + +/* + * set_curr_allocated -- set curr_allocated to a specific value + */ +static void +set_curr_allocated(PMEMobjpool *pop, uint64_t value) +{ + struct pmemobjpool *ppop = (struct pmemobjpool *)pop; + uint64_t *curr_allocated = &ppop->stats_persistent.heap_curr_allocated; + *curr_allocated = value; + pmem_persist(curr_allocated, sizeof(*curr_allocated)); +} + +/* + * get_curr_allocated -- get the curr_allocated value + */ +static uint64_t +get_curr_allocated(PMEMobjpool *pop) +{ + int ret; + uint64_t allocated; + + ret = pmemobj_ctl_get(pop, "stats.heap.curr_allocated", &allocated); + UT_ASSERTeq(ret, 0); + + return allocated; +} + +/* + * huge_alloc -- make a huge allocation + */ +static void +huge_alloc(PMEMobjpool *pop, PMEMoid *oid) +{ + int ret = pmemobj_alloc(pop, oid, 4 * MB, 0, NULL, NULL); + UT_ASSERTeq(ret, 0); +} + +static void +underflow(PMEMobjpool *pop, uint64_t *exp_allocated) +{ + UT_ASSERTne(exp_allocated, NULL); + + /* Create a huge allocation and query the statistic */ + PMEMoid oid; + huge_alloc(pop, &oid); + + *exp_allocated = get_curr_allocated(pop); + + /* Inject a smaller than expected value. */ + set_curr_allocated(pop, *exp_allocated - 1); + + /* Trigger underflow. */ + pmemobj_free(&oid); +} + +static void +test(PMEMobjpool *pop) +{ + PMEMoid oid; + uint64_t exp_allocated; + + /* 1. trigger underflow */ + underflow(pop, &exp_allocated); + + /* Check the statistic is updated correctly. */ + UT_ASSERTeq(get_curr_allocated(pop), 0); + + /* 2. trigger underflow and attempt crossing zero again */ + underflow(pop, &exp_allocated); + huge_alloc(pop, &oid); + + /* Check the statistic is updated correctly. */ + UT_ASSERTeq(get_curr_allocated(pop), exp_allocated); +} + +int +main(int argc, char *argv[]) +{ + START(argc, argv, "obj_ctl_stats_curr_allocated_wa"); + + if (argc != 2) { + UT_FATAL("usage: %s file-name", argv[0]); + } + + const char *path = argv[1]; + + PMEMobjpool *pop; + pop = pmemobj_create(path, LAYOUT, PMEMOBJ_MIN_POOL, S_IWUSR | S_IRUSR); + UT_ASSERTne(pop, NULL); + + int enabled = 1; + int ret; + + /* Enable stats. */ + ret = pmemobj_ctl_set(pop, "stats.enabled", &enabled); + UT_ASSERTeq(ret, 0); + + test(pop); + + pmemobj_close(pop); + + DONE(NULL); +} diff --git a/src/test/obj_ctl_stats_curr_allocated_wa/pmemobj_grep0.log.match b/src/test/obj_ctl_stats_curr_allocated_wa/pmemobj_grep0.log.match new file mode 100644 index 000000000..dd72bd43e --- /dev/null +++ b/src/test/obj_ctl_stats_curr_allocated_wa/pmemobj_grep0.log.match @@ -0,0 +1,2 @@ +$(S) heap_curr_allocated underflowed: $(N) > heap.size: $(N); recalculating +$(S) heap_curr_allocated underflowed: $(N) > heap.size: $(N); recalculating