diff --git a/components/heap/Kconfig b/components/heap/Kconfig index 5f10676bc95..170e720668a 100644 --- a/components/heap/Kconfig +++ b/components/heap/Kconfig @@ -70,6 +70,30 @@ menu "Heap memory debugging" This function depends on heap poisoning being enabled and adds four more bytes of overhead for each block allocated. + config HEAP_TRACE_HASH_MAP + bool "Use hash map mechanism to access heap trace records" + depends on HEAP_TRACING_STANDALONE + default n + help + Enable this flag to use a hash map to increase performance in handling + heap trace records. + + Keeping this as "n" in your project will save RAM and heap memory but will lower + the performance of the heap trace in adding, retrieving and removing trace records. + Making this as "y" in your project, you will decrease free RAM and heap memory but, + the heap trace performances in adding retrieving and removing trace records will be + enhanced. + + config HEAP_TRACE_HASH_MAP_SIZE + int "The number of entries in the hash map" + depends on HEAP_TRACE_HASH_MAP + range 1 10000 + default 10 + help + Defines the number of entries in the heap trace hashmap. The bigger this number is, + the bigger the hash map will be in the memory. In case the tracing mode is set to + HEAP_TRACE_ALL, the bigger the hashmap is, the better the performances are. + config HEAP_ABORT_WHEN_ALLOCATION_FAILS bool "Abort if memory allocation fails" default n diff --git a/components/heap/heap_trace_standalone.c b/components/heap/heap_trace_standalone.c index c422fa84325..7c03a9f93e0 100644 --- a/components/heap/heap_trace_standalone.c +++ b/components/heap/heap_trace_standalone.c @@ -5,6 +5,7 @@ */ #include #include +#include #define HEAP_TRACE_SRCFILE /* don't warn on inclusion here */ #include "esp_heap_trace.h" @@ -58,12 +59,12 @@ typedef struct { // Forward Defines static void heap_trace_dump_base(bool internal_ram, bool psram); -static void record_deep_copy(heap_trace_record_t *rDest, const heap_trace_record_t* r_src); +static void record_deep_copy(heap_trace_record_t *r_dest, const heap_trace_record_t *r_src); static void list_setup(void); -static void list_remove(heap_trace_record_t* r_remove); -static bool list_add(const heap_trace_record_t* r_append); +static void list_remove(heap_trace_record_t *r_remove); +static heap_trace_record_t* list_add(const heap_trace_record_t *r_append); static heap_trace_record_t* list_pop_unused(void); -static heap_trace_record_t* list_find_address_reverse(void* p); +static heap_trace_record_t* list_find_address_reverse(void *p); /* The actual records. */ static records_t records; @@ -78,6 +79,54 @@ static size_t total_frees; static heap_trace_record_t* r_get; static size_t r_get_idx; +#if CONFIG_HEAP_TRACE_HASH_MAP + +/* Define struct: linked list of records used in hash map */ +TAILQ_HEAD(heap_trace_hash_list_struct_t, heap_trace_record_t); +typedef struct heap_trace_hash_list_struct_t heap_trace_hash_list_t; + +static heap_trace_hash_list_t hash_map[(size_t)CONFIG_HEAP_TRACE_HASH_MAP_SIZE]; // Buffer used for hashmap entries +static size_t total_hashmap_hits; +static size_t total_hashmap_miss; + +static size_t hash_idx(void* p) +{ + static const uint32_t fnv_prime = 16777619UL; // expression 2^24 + 2^8 + 0x93 (32 bits size) + // since all the addresses are 4 bytes aligned, computing address * fnv_prime always gives + // a modulo 4 number. The bit shift goal is to distribute more evenly the hashes in the + // given range [0 , CONFIG_HEAP_TRACE_HASH_MAP_SIZE - 1]. + return ((((uint32_t)p >> 3) + + ((uint32_t)p >> 5) + + ((uint32_t)p >> 7)) * fnv_prime) % (uint32_t)CONFIG_HEAP_TRACE_HASH_MAP_SIZE; +} + +static void map_add(heap_trace_record_t *r_add) +{ + size_t idx = hash_idx(r_add->address); + TAILQ_INSERT_TAIL(&hash_map[idx], r_add, tailq_hashmap); +} + +static void map_remove(heap_trace_record_t *r_remove) +{ + size_t idx = hash_idx(r_remove->address); + TAILQ_REMOVE(&hash_map[idx], r_remove, tailq_hashmap); +} + +static heap_trace_record_t* map_find(void *p) +{ + size_t idx = hash_idx(p); + heap_trace_record_t *r_cur = NULL; + TAILQ_FOREACH(r_cur, &hash_map[idx], tailq_hashmap) { + if (r_cur->address == p) { + total_hashmap_hits++; + return r_cur; + } + } + total_hashmap_miss++; + return NULL; +} +#endif // CONFIG_HEAP_TRACE_HASH_MAP + esp_err_t heap_trace_init_standalone(heap_trace_record_t *record_buffer, size_t num_records) { if (tracing) { @@ -94,6 +143,15 @@ esp_err_t heap_trace_init_standalone(heap_trace_record_t *record_buffer, size_t return ESP_OK; } +static esp_err_t set_tracing(bool enable) +{ + if (tracing == enable) { + return ESP_ERR_INVALID_STATE; + } + tracing = enable; + return ESP_OK; +} + esp_err_t heap_trace_start(heap_trace_mode_t mode_param) { if (records.buffer == NULL || records.capacity == 0) { @@ -102,41 +160,48 @@ esp_err_t heap_trace_start(heap_trace_mode_t mode_param) portENTER_CRITICAL(&trace_mux); - tracing = false; + set_tracing(false); mode = mode_param; - // clear buffer + // clear buffers memset(records.buffer, 0, sizeof(heap_trace_record_t) * records.capacity); +#if CONFIG_HEAP_TRACE_HASH_MAP + for (size_t i = 0; i < (size_t)CONFIG_HEAP_TRACE_HASH_MAP_SIZE; i++) { + TAILQ_INIT(&hash_map[i]); + } + + total_hashmap_hits = 0; + total_hashmap_miss = 0; +#endif // CONFIG_HEAP_TRACE_HASH_MAP + records.count = 0; records.has_overflowed = false; list_setup(); total_allocations = 0; total_frees = 0; - heap_trace_resume(); - portEXIT_CRITICAL(&trace_mux); - return ESP_OK; -} + const esp_err_t ret_val = set_tracing(true); -static esp_err_t set_tracing(bool enable) -{ - if (tracing == enable) { - return ESP_ERR_INVALID_STATE; - } - tracing = enable; - return ESP_OK; + portEXIT_CRITICAL(&trace_mux); + return ret_val; } esp_err_t heap_trace_stop(void) { - return set_tracing(false); + portENTER_CRITICAL(&trace_mux); + const esp_err_t ret_val = set_tracing(false); + portEXIT_CRITICAL(&trace_mux); + return ret_val; } esp_err_t heap_trace_resume(void) { - return set_tracing(true); + portENTER_CRITICAL(&trace_mux); + const esp_err_t ret_val = set_tracing(true); + portEXIT_CRITICAL(&trace_mux); + return ret_val; } size_t heap_trace_get_count(void) @@ -163,7 +228,7 @@ esp_err_t heap_trace_get(size_t index, heap_trace_record_t *r_out) // Perf: speed up sequential access if (r_get && r_get_idx == index - 1) { - r_get = TAILQ_NEXT(r_get, tailq); + r_get = TAILQ_NEXT(r_get, tailq_list); r_get_idx = index; } else { @@ -178,21 +243,15 @@ esp_err_t heap_trace_get(size_t index, heap_trace_record_t *r_out) break; } - r_get = TAILQ_NEXT(r_get, tailq); + r_get = TAILQ_NEXT(r_get, tailq_list); r_get_idx = i + 1; } } - // copy to destination - if (r_get) { - memcpy(r_out, r_get, sizeof(heap_trace_record_t)); - } else { - // this should not happen since we already - // checked that index < records.count, - // but could be indicative of memory corruption - result = ESP_ERR_INVALID_STATE; - memset(r_out, 0, sizeof(heap_trace_record_t)); - } + // We already checked that index < records.count, + // This could be indicative of memory corruption. + assert(r_get != NULL); + memcpy(r_out, r_get, sizeof(heap_trace_record_t)); } portEXIT_CRITICAL(&trace_mux); @@ -213,6 +272,10 @@ esp_err_t heap_trace_summary(heap_trace_summary_t *summary) summary->capacity = records.capacity; summary->high_water_mark = records.high_water_mark; summary->has_overflowed = records.has_overflowed; +#if CONFIG_HEAP_TRACE_HASH_MAP + summary->total_hashmap_hits = total_hashmap_hits; + summary->total_hashmap_miss = total_hashmap_miss; +#endif // CONFIG_HEAP_TRACE_HASH_MAP portEXIT_CRITICAL(&trace_mux); return ESP_OK; @@ -234,76 +297,81 @@ static void heap_trace_dump_base(bool internal_ram, bool psram) size_t delta_allocs = 0; size_t start_count = records.count; - esp_rom_printf("====== Heap Trace: %u records (%u capacity) ======\n", + esp_rom_printf("====== Heap Trace: %"PRIu32" records (%"PRIu32" capacity) ======\n", records.count, records.capacity); // Iterate through through the linked list - heap_trace_record_t *rCur = TAILQ_FIRST(&records.list); + heap_trace_record_t *r_cur = TAILQ_FIRST(&records.list); for (int i = 0; i < records.count; i++) { // check corruption - if (rCur == NULL) { + if (r_cur == NULL) { esp_rom_printf("\nError: heap trace linked list is corrupt. expected more records.\n"); break; } - bool should_print = rCur->address != NULL && + bool should_print = r_cur->address != NULL && ((psram && internal_ram) || - (internal_ram && esp_ptr_internal(rCur->address)) || - (psram && esp_ptr_external_ram(rCur->address))); + (internal_ram && esp_ptr_internal(r_cur->address)) || + (psram && esp_ptr_external_ram(r_cur->address))); if (should_print) { const char* label = ""; - if (esp_ptr_internal(rCur->address)) { + if (esp_ptr_internal(r_cur->address)) { label = ", Internal"; } - if (esp_ptr_external_ram(rCur->address)) { + if (esp_ptr_external_ram(r_cur->address)) { label = ", PSRAM"; } esp_rom_printf("%6d bytes (@ %p%s) allocated CPU %d ccount 0x%08x caller ", - rCur->size, rCur->address, label, rCur->ccount & 1, rCur->ccount & ~3); + r_cur->size, r_cur->address, label, r_cur->ccount & 1, r_cur->ccount & ~3); - for (int j = 0; j < STACK_DEPTH && rCur->alloced_by[j] != 0; j++) { - esp_rom_printf("%p%s", rCur->alloced_by[j], + for (int j = 0; j < STACK_DEPTH && r_cur->alloced_by[j] != 0; j++) { + esp_rom_printf("%p%s", r_cur->alloced_by[j], (j < STACK_DEPTH - 1) ? ":" : ""); } - if (mode != HEAP_TRACE_ALL || STACK_DEPTH == 0 || rCur->freed_by[0] == NULL) { - delta_size += rCur->size; + if (mode != HEAP_TRACE_ALL || STACK_DEPTH == 0 || r_cur->freed_by[0] == NULL) { + delta_size += r_cur->size; delta_allocs++; esp_rom_printf("\n"); } else { esp_rom_printf("\nfreed by "); for (int j = 0; j < STACK_DEPTH; j++) { - esp_rom_printf("%p%s", rCur->freed_by[j], + esp_rom_printf("%p%s", r_cur->freed_by[j], (j < STACK_DEPTH - 1) ? ":" : "\n"); } } } - rCur = TAILQ_NEXT(rCur, tailq); + r_cur = TAILQ_NEXT(r_cur, tailq_list); } esp_rom_printf("====== Heap Trace Summary ======\n"); if (mode == HEAP_TRACE_ALL) { esp_rom_printf("Mode: Heap Trace All\n"); - esp_rom_printf("%u bytes alive in trace (%u/%u allocations)\n", + esp_rom_printf("%"PRIu32" bytes alive in trace (%"PRIu32"/%"PRIu32" allocations)\n", delta_size, delta_allocs, heap_trace_get_count()); } else { esp_rom_printf("Mode: Heap Trace Leaks\n"); - esp_rom_printf("%u bytes 'leaked' in trace (%u allocations)\n", delta_size, delta_allocs); + esp_rom_printf("%"PRIu32" bytes 'leaked' in trace (%"PRIu32" allocations)\n", delta_size, delta_allocs); } - esp_rom_printf("records: %u (%u capacity, %u high water mark)\n", + esp_rom_printf("records: %"PRIu32" (%"PRIu32" capacity, %"PRIu32" high water mark)\n", records.count, records.capacity, records.high_water_mark); - esp_rom_printf("total allocations: %u\n", total_allocations); - esp_rom_printf("total frees: %u\n", total_frees); +#if CONFIG_HEAP_TRACE_HASH_MAP + esp_rom_printf("hashmap: %"PRIu32" capacity (%"PRIu32" hits, %"PRIu32" misses)\n", + (size_t)CONFIG_HEAP_TRACE_HASH_MAP_SIZE, total_hashmap_hits, total_hashmap_miss); +#endif // CONFIG_HEAP_TRACE_HASH_MAP + + esp_rom_printf("total allocations: %"PRIu32"\n", total_allocations); + esp_rom_printf("total frees: %"PRIu32"\n", total_frees); if (start_count != records.count) { // only a problem if trace isn't stopped before dumping esp_rom_printf("(NB: New entries were traced while dumping, so trace dump may have duplicate entries.)\n"); @@ -317,30 +385,27 @@ static void heap_trace_dump_base(bool internal_ram, bool psram) } /* Add a new allocation to the heap trace records */ -static IRAM_ATTR void record_allocation(const heap_trace_record_t *rAllocation) +static IRAM_ATTR void record_allocation(const heap_trace_record_t *r_allocation) { - if (!tracing || rAllocation->address == NULL) { + if (!tracing || r_allocation->address == NULL) { return; } portENTER_CRITICAL(&trace_mux); if (tracing) { - // If buffer is full, pop off the oldest // record to make more space if (records.count == records.capacity) { records.has_overflowed = true; - heap_trace_record_t *rFirst = TAILQ_FIRST(&records.list); + heap_trace_record_t *r_first = TAILQ_FIRST(&records.list); - list_remove(rFirst); + list_remove(r_first); } - // push onto end of list - list_add(rAllocation); - + list_add(r_allocation); total_allocations++; } @@ -357,30 +422,34 @@ static IRAM_ATTR void record_allocation(const heap_trace_record_t *rAllocation) */ static IRAM_ATTR void record_free(void *p, void **callers) { - if (!tracing || p == NULL) { + if (!tracing || p == NULL) { return; } portENTER_CRITICAL(&trace_mux); + // return directly if records.count == 0. In case of hashmap being used + // this prevents the hashmap to return an item that is no longer in the + // records list. + if (records.count == 0) { + portEXIT_CRITICAL(&trace_mux); + return; + } if (tracing) { total_frees++; - // search backwards for the allocation record matching this free - heap_trace_record_t* rFound = list_find_address_reverse(p); - - if (rFound) { + heap_trace_record_t *r_found = list_find_address_reverse(p); + if (r_found) { if (mode == HEAP_TRACE_ALL) { // add 'freed_by' info to the record - memcpy(rFound->freed_by, callers, sizeof(void *) * STACK_DEPTH); + memcpy(r_found->freed_by, callers, sizeof(void *) * STACK_DEPTH); } else { // HEAP_TRACE_LEAKS - // Leak trace mode, once an allocation is freed - // we remove it from the list - list_remove(rFound); + // we remove it from the list & hashmap + list_remove(r_found); } } } @@ -396,27 +465,31 @@ static void list_setup(void) for (int i = 0; i < records.capacity; i++) { - heap_trace_record_t* rCur = &records.buffer[i]; + heap_trace_record_t *r_cur = &records.buffer[i]; - TAILQ_INSERT_TAIL(&records.unused, rCur, tailq); + TAILQ_INSERT_TAIL(&records.unused, r_cur, tailq_list); } } /* 1. removes record r_remove from records.list, 2. places it into records.unused */ -static IRAM_ATTR void list_remove(heap_trace_record_t* r_remove) +static IRAM_ATTR void list_remove(heap_trace_record_t *r_remove) { assert(records.count > 0); +#if CONFIG_HEAP_TRACE_HASH_MAP + map_remove(r_remove); +#endif + // remove from records.list - TAILQ_REMOVE(&records.list, r_remove, tailq); + TAILQ_REMOVE(&records.list, r_remove, tailq_list); // set as unused r_remove->address = 0; r_remove->size = 0; // add to records.unused - TAILQ_INSERT_HEAD(&records.unused, r_remove, tailq); + TAILQ_INSERT_HEAD(&records.unused, r_remove, tailq_list); // decrement records.count--; @@ -432,45 +505,45 @@ static IRAM_ATTR heap_trace_record_t* list_pop_unused(void) } // get from records.unused - heap_trace_record_t* rUnused = TAILQ_FIRST(&records.unused); - assert(rUnused->address == NULL); - assert(rUnused->size == 0); + heap_trace_record_t *r_unused = TAILQ_FIRST(&records.unused); + assert(r_unused->address == NULL); + assert(r_unused->size == 0); // remove from records.unused - TAILQ_REMOVE(&records.unused, rUnused, tailq); + TAILQ_REMOVE(&records.unused, r_unused, tailq_list); - return rUnused; + return r_unused; } // deep copy a record. // Note: only copies the *allocation data*, not the next & prev ptrs -static IRAM_ATTR void record_deep_copy(heap_trace_record_t *rDest, const heap_trace_record_t *r_src) +static IRAM_ATTR void record_deep_copy(heap_trace_record_t *r_dest, const heap_trace_record_t *r_src) { - rDest->ccount = r_src->ccount; - rDest->address = r_src->address; - rDest->size = r_src->size; - memcpy(rDest->freed_by, r_src->freed_by, sizeof(void *) * STACK_DEPTH); - memcpy(rDest->alloced_by, r_src->alloced_by, sizeof(void *) * STACK_DEPTH); + r_dest->ccount = r_src->ccount; + r_dest->address = r_src->address; + r_dest->size = r_src->size; + memcpy(r_dest->freed_by, r_src->freed_by, sizeof(void *) * STACK_DEPTH); + memcpy(r_dest->alloced_by, r_src->alloced_by, sizeof(void *) * STACK_DEPTH); } // Append a record to records.list // Note: This deep copies r_append -static IRAM_ATTR bool list_add(const heap_trace_record_t *r_append) +static IRAM_ATTR heap_trace_record_t* list_add(const heap_trace_record_t *r_append) { if (records.count < records.capacity) { // get unused record - heap_trace_record_t* rDest = list_pop_unused(); + heap_trace_record_t *r_dest = list_pop_unused(); // we checked that there is capacity, so this // should never be null. - assert(rDest != NULL); + assert(r_dest != NULL); // copy allocation data - record_deep_copy(rDest, r_append); + record_deep_copy(r_dest, r_append); // append to records.list - TAILQ_INSERT_TAIL(&records.list, rDest, tailq); + TAILQ_INSERT_TAIL(&records.list, r_dest, tailq_list); // increment records.count++; @@ -480,34 +553,43 @@ static IRAM_ATTR bool list_add(const heap_trace_record_t *r_append) records.high_water_mark = records.count; } - return true; +#if CONFIG_HEAP_TRACE_HASH_MAP + map_add(r_dest); +#endif + + return r_dest; } else { records.has_overflowed = true; - return false; + return NULL; } } // search records.list backwards for the allocation record matching this address -static IRAM_ATTR heap_trace_record_t* list_find_address_reverse(void* p) +static IRAM_ATTR heap_trace_record_t* list_find_address_reverse(void *p) { - if (records.count == 0) { - return NULL; - } + heap_trace_record_t *r_found = NULL; + +#if CONFIG_HEAP_TRACE_HASH_MAP + // check the hashmap + r_found = map_find(p); - heap_trace_record_t* rFound = NULL; + if (r_found != NULL) { + return r_found; + } +#endif // Perf: We search backwards because new allocations are appended // to the end of the list and most allocations are short lived. - heap_trace_record_t *rCur = NULL; - TAILQ_FOREACH_REVERSE(rCur, &records.list, heap_trace_record_list_struct_t, tailq) { - if (rCur->address == p) { - rFound = rCur; + heap_trace_record_t *r_cur = NULL; + TAILQ_FOREACH(r_cur, &records.list, tailq_list) { + if (r_cur->address == p) { + r_found = r_cur; break; } } - return rFound; + return r_found; } #include "heap_trace.inc" diff --git a/components/heap/include/esp_heap_trace.h b/components/heap/include/esp_heap_trace.h index b1c5d476e4c..2b0daa2e4c6 100644 --- a/components/heap/include/esp_heap_trace.h +++ b/components/heap/include/esp_heap_trace.h @@ -36,8 +36,11 @@ typedef struct heap_trace_record_t { size_t size; ///< Size of the allocation void *alloced_by[CONFIG_HEAP_TRACING_STACK_DEPTH]; ///< Call stack of the caller which allocated the memory. void *freed_by[CONFIG_HEAP_TRACING_STACK_DEPTH]; ///< Call stack of the caller which freed the memory (all zero if not freed.) -#ifdef CONFIG_HEAP_TRACING_STANDALONE - TAILQ_ENTRY(heap_trace_record_t) tailq; ///< Linked list: prev & next records +#if CONFIG_HEAP_TRACING_STANDALONE + TAILQ_ENTRY(heap_trace_record_t) tailq_list; ///< Linked list: prev & next records +#if CONFIG_HEAP_TRACE_HASH_MAP + TAILQ_ENTRY(heap_trace_record_t) tailq_hashmap; ///< Linked list: prev & next in hashmap entry list +#endif // CONFIG_HEAP_TRACE_HASH_MAP #endif // CONFIG_HEAP_TRACING_STANDALONE } heap_trace_record_t; @@ -52,6 +55,10 @@ typedef struct { size_t capacity; ///< The capacity of the internal buffer size_t high_water_mark; ///< The maximum value that 'count' got to size_t has_overflowed; ///< True if the internal buffer overflowed at some point +#if CONFIG_HEAP_TRACE_HASH_MAP + size_t total_hashmap_hits; ///< If hashmap is used, the total number of hits + size_t total_hashmap_miss; ///< If hashmap is used, the total number of misses (possibly due to overflow) +#endif } heap_trace_summary_t; /**