Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions ChangeLog
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ XXX
* Version X.X.X

- Fix "The pool was not closed" message (no ADR failure) (daos-stack/pmdk#1, DAOS-18692)
- Recalculate curr_allocated on underflow WA (daos-stack/pmdk#37, DAOS-18882)

Fri Jan 16 2026 Oksana Sałyk <oksana.salyk@hpe.com>

Expand Down
60 changes: 60 additions & 0 deletions src/libpmemobj/heap.c
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
// SPDX-License-Identifier: BSD-3-Clause
/* Copyright 2015-2024, Intel Corporation */
/* Copyright 2026, Hewlett Packard Enterprise Development LP */

/*
* heap.c -- heap implementation
Expand Down Expand Up @@ -812,6 +813,65 @@ heap_reclaim_zone_garbage(struct palloc_heap *heap, struct bucket *bucket,
}
}

/*
* heap_zone_get_allocated -- (internal) sums up the real size of all allocated
* (non-free) memory blocks in the zone.
*
* Note: It is not meant to calculate the sum of all allocations.
* It follows the algorithm used to calculate the heap_curr_allocated statistic.
* e.g. It does not take into account free space in runs.
*/
static uint64_t
heap_zone_get_allocated(struct palloc_heap *heap, uint32_t zone_id)
{
struct zone *z = ZID_TO_ZONE(heap->layout, zone_id);
uint64_t used = 0;

if (z->header.magic != ZONE_HEADER_MAGIC)
return 0;

for (uint32_t i = 0; i < z->header.size_idx; ) {
struct chunk_header *hdr = &z->chunk_headers[i];
ASSERT(hdr->size_idx != 0);

struct memory_block m = MEMORY_BLOCK_NONE;
m.zone_id = zone_id;
m.chunk_id = i;
m.size_idx = hdr->size_idx;
memblock_rebuild_state(heap, &m);

switch (hdr->type) {
case CHUNK_TYPE_USED:
case CHUNK_TYPE_RUN:
used += m.m_ops->get_real_size(&m);
break;
case CHUNK_TYPE_FREE:
break;
default:
ASSERT(0);
}

i = m.chunk_id + m.size_idx;
}

return used;
}

/*
* heap_curr_allocated_sum -- calculate the curr_allocated statistic
*/
uint64_t
heap_curr_allocated_sum(struct palloc_heap *heap)
{
uint64_t value = 0;

for (uint32_t z = 0; z < heap->rt->nzones; ++z) {
value += heap_zone_get_allocated(heap, z);
}

return value;
}

/*
* heap_ensure_zone_reclaimed -- make sure that the specified zone has been
* already reclaimed.
Expand Down
4 changes: 4 additions & 0 deletions src/libpmemobj/heap.h
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
/* SPDX-License-Identifier: BSD-3-Clause */
/* Copyright 2015-2024, Intel Corporation */
/* Copyright 2026, Hewlett Packard Enterprise Development LP */

/*
* heap.h -- internal definitions for heap
Expand Down Expand Up @@ -74,6 +75,9 @@ heap_memblock_on_free(struct palloc_heap *heap, const struct memory_block *m);
void
heap_ensure_zone_reclaimed(struct palloc_heap *heap, uint32_t zone_id);

uint64_t
heap_curr_allocated_sum(struct palloc_heap *heap);

int
heap_free_chunk_reuse(struct palloc_heap *heap,
struct bucket *bucket, struct memory_block *m);
Expand Down
60 changes: 59 additions & 1 deletion src/libpmemobj/stats.c
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,16 @@
* stats.c -- implementation of statistics
*/

#include <inttypes.h>

#include "heap.h"
#include "obj.h"
#include "stats.h"
#include "core_assert.h"

STATS_CTL_HANDLER(persistent, curr_allocated, heap_curr_allocated);
static int
CTL_READ_HANDLER(persistent_curr_allocated)(void *ctx,
enum ctl_query_source source, void *arg, struct ctl_indexes *indexes);

STATS_CTL_HANDLER(transient, run_allocated, heap_run_allocated);
STATS_CTL_HANDLER(transient, run_active, heap_run_active);
Expand All @@ -22,6 +27,59 @@ static const struct ctl_node CTL_NODE(heap)[] = {
CTL_NODE_END
};

#define CURR_ALLOCATED_UNDERFLOW_FMT \
"heap_curr_allocated underflowed: %" PRIu64 " > heap.size: %" PRIu64 \
"; recalculating"

/*
* CTL_READ_HANDLER(persistent_curr_allocated) -- returns curr_allocated field
*/
static int
CTL_READ_HANDLER(persistent_curr_allocated)(void *ctx,
enum ctl_query_source source, void *arg, struct ctl_indexes *indexes)
{
/* suppress unused-parameter errors */
SUPPRESS_UNUSED(source, indexes);

PMEMobjpool *pop = ctx;
uint64_t *curr_allocated = &pop->stats->persistent->heap_curr_allocated;

uint64_t *argv = arg;
util_atomic_load_explicit64(curr_allocated, argv, memory_order_acquire);

/*
* heap_curr_allocated although is stored persistently it is not updated
* transactionally nor reliably persisted. This means e.g. that in case
* a transaction succeeds but the process will get terminated before
* the update of heap_curr_allocated, the value of heap_curr_allocated
* will get out of sync with the actual heap state.
*
* The most obvious case of this happening is when heap_curr_allocated
* is actually smaller than the sum of the sizes of all allocations in
* the heap, so in case all of the allocations are freed,
* heap_curr_allocated will underflow and get a very big value, bigger
* than heap size.
*
* Ref: https://daosio.atlassian.net/browse/DAOS-18882
*
* This workaround detects this most obvious case and recalculates.
*
* Note: Not thread-safe.
*/
if (*argv > *pop->heap.sizep) { /* covers the == UINT64_MAX case */
CORE_LOG_WARNING(CURR_ALLOCATED_UNDERFLOW_FMT,
*argv, *pop->heap.sizep);

/* if the value is broken, recalculate it */
*argv = heap_curr_allocated_sum(&pop->heap);

util_atomic_store_explicit64(curr_allocated, *argv,
memory_order_release);
}

return 0;
}

/*
* CTL_READ_HANDLER(enabled) -- returns whether or not statistics are enabled
*/
Expand Down
34 changes: 30 additions & 4 deletions src/libpmemobj/stats.h
Original file line number Diff line number Diff line change
Expand Up @@ -42,8 +42,15 @@ struct stats {

#define STATS_INC_persistent(stats, name, value) do {\
if ((stats)->enabled == POBJ_STATS_ENABLED_PERSISTENT ||\
(stats)->enabled == POBJ_STATS_ENABLED_BOTH)\
util_fetch_and_add64((&(stats)->persistent->name), (value));\
(stats)->enabled == POBJ_STATS_ENABLED_BOTH) {\
uint64_t curr_value;\
util_atomic_load_explicit64((&(stats)->persistent->name),\
&curr_value, memory_order_acquire);\
if (curr_value != UINT64_MAX) {\
util_fetch_and_add64(\
(&(stats)->persistent->name), (value));\
}\
}\
} while (0)

#define STATS_SUB(stats, type, name, value) do {\
Expand All @@ -56,10 +63,29 @@ struct stats {
util_fetch_and_sub64((&(stats)->transient->name), (value));\
} while (0)

/*
* This macro handles only the curr_allocated statistic. Sadly, it's current
* design makes it possible for it to underflow, which would lead to very large
* values due to the unsigned type. To mitigate this, if the value is already at
* UINT64_MAX or the subtraction would cause an underflow, the value is set to
* UINT64_MAX to indicate that it's broken and needs to be recalculated.
*/
#define STATS_SUB_persistent(stats, name, value) do {\
if ((stats)->enabled == POBJ_STATS_ENABLED_PERSISTENT ||\
(stats)->enabled == POBJ_STATS_ENABLED_BOTH)\
util_fetch_and_sub64((&(stats)->persistent->name), (value));\
(stats)->enabled == POBJ_STATS_ENABLED_BOTH) {\
uint64_t curr_value;\
util_atomic_load_explicit64((&(stats)->persistent->name),\
&curr_value, memory_order_acquire);\
if (curr_value != UINT64_MAX) {\
curr_value = util_fetch_and_sub64(\
(&(stats)->persistent->name), (value));\
if (curr_value < value || curr_value == UINT64_MAX) {\
util_atomic_store_explicit64(\
(&(stats)->persistent->name),\
UINT64_MAX, memory_order_release);\
}\
}\
}\
} while (0)

#define STATS_SET(stats, type, name, value) do {\
Expand Down
2 changes: 2 additions & 0 deletions src/test/Makefile
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
# SPDX-License-Identifier: BSD-3-Clause
# Copyright 2014-2024, Intel Corporation
# Copyright 2026, Hewlett Packard Enterprise Development LP
#

#
Expand Down Expand Up @@ -39,6 +40,7 @@ OBJ_TESTS = \
obj_ctl_debug\
obj_ctl_heap_size\
obj_ctl_stats\
obj_ctl_stats_curr_allocated_wa\
obj_debug\
obj_defrag\
obj_defrag_advanced\
Expand Down
1 change: 1 addition & 0 deletions src/test/obj_ctl_stats_curr_allocated_wa/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
obj_ctl_stats_curr_allocated_wa
15 changes: 15 additions & 0 deletions src/test/obj_ctl_stats_curr_allocated_wa/Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
# SPDX-License-Identifier: BSD-3-Clause
# Copyright 2026, Hewlett Packard Enterprise Development LP

#
# src/test/obj_ctl_stats_curr_allocated_wa/Makefile --
#
TARGET = obj_ctl_stats_curr_allocated_wa
OBJS = obj_ctl_stats_curr_allocated_wa.o

BUILD_STATIC_DEBUG=n
BUILD_STATIC_NONDEBUG=n

LIBPMEMOBJ=y

include ../Makefile.inc
21 changes: 21 additions & 0 deletions src/test/obj_ctl_stats_curr_allocated_wa/TEST0
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
#!/usr/bin/env bash
# SPDX-License-Identifier: BSD-3-Clause
# Copyright 2026, Hewlett Packard Enterprise Development LP

. ../unittest/unittest.sh

require_test_type short
require_fs_type any
require_build_type debug

setup

OUT_CHECK=pmemobj_grep${UNITTEST_NUM}.log

expect_normal_exit ./obj_ctl_stats_curr_allocated_wa$EXESUFFIX $DIR/testfile1

cat $PMEMOBJ_LOG_FILE | grep "ctl__persistent_curr_allocated_read" > $OUT_CHECK

check

pass
Original file line number Diff line number Diff line change
@@ -0,0 +1,118 @@
// SPDX-License-Identifier: BSD-3-Clause
/* Copyright 2026, Hewlett Packard Enterprise Development LP */

/*
* obj_ctl_stats_curr_allocated_wa.c -- tests for curr_allocated WA
*/

#include "unittest.h"

#include <../../libpmemobj/obj.h>

#define LAYOUT "ctl"

#define MB (1 << 20)

/*
* set_curr_allocated -- set curr_allocated to a specific value
*/
static void
set_curr_allocated(PMEMobjpool *pop, uint64_t value)
{
struct pmemobjpool *ppop = (struct pmemobjpool *)pop;
uint64_t *curr_allocated = &ppop->stats_persistent.heap_curr_allocated;
*curr_allocated = value;
pmem_persist(curr_allocated, sizeof(*curr_allocated));
}

/*
* get_curr_allocated -- get the curr_allocated value
*/
static uint64_t
get_curr_allocated(PMEMobjpool *pop)
{
int ret;
uint64_t allocated;

ret = pmemobj_ctl_get(pop, "stats.heap.curr_allocated", &allocated);
UT_ASSERTeq(ret, 0);

return allocated;
}

/*
* huge_alloc -- make a huge allocation
*/
static void
huge_alloc(PMEMobjpool *pop, PMEMoid *oid)
{
int ret = pmemobj_alloc(pop, oid, 4 * MB, 0, NULL, NULL);
UT_ASSERTeq(ret, 0);
}

static void
underflow(PMEMobjpool *pop, uint64_t *exp_allocated)
{
UT_ASSERTne(exp_allocated, NULL);

/* Create a huge allocation and query the statistic */
PMEMoid oid;
huge_alloc(pop, &oid);

*exp_allocated = get_curr_allocated(pop);

/* Inject a smaller than expected value. */
set_curr_allocated(pop, *exp_allocated - 1);

/* Trigger underflow. */
pmemobj_free(&oid);
}

static void
test(PMEMobjpool *pop)
{
PMEMoid oid;
uint64_t exp_allocated;

/* 1. trigger underflow */
underflow(pop, &exp_allocated);

/* Check the statistic is updated correctly. */
UT_ASSERTeq(get_curr_allocated(pop), 0);

/* 2. trigger underflow and attempt crossing zero again */
underflow(pop, &exp_allocated);
huge_alloc(pop, &oid);

/* Check the statistic is updated correctly. */
UT_ASSERTeq(get_curr_allocated(pop), exp_allocated);
}

int
main(int argc, char *argv[])
{
START(argc, argv, "obj_ctl_stats_curr_allocated_wa");

if (argc != 2) {
UT_FATAL("usage: %s file-name", argv[0]);
}

const char *path = argv[1];

PMEMobjpool *pop;
pop = pmemobj_create(path, LAYOUT, PMEMOBJ_MIN_POOL, S_IWUSR | S_IRUSR);
UT_ASSERTne(pop, NULL);

int enabled = 1;
int ret;

/* Enable stats. */
ret = pmemobj_ctl_set(pop, "stats.enabled", &enabled);
UT_ASSERTeq(ret, 0);

test(pop);

pmemobj_close(pop);

DONE(NULL);
}
Loading
Loading