Skip to content

Commit

Permalink
Merge branch 'feature/lower-iram-utilization-of-heap-component' into …
Browse files Browse the repository at this point in the history
…'master'

heap: lower the utilization of IRAM by the heap component binary

Closes IDF-2282 and IDFGH-6233

See merge request espressif/esp-idf!20926
  • Loading branch information
SoucheSouche committed Nov 25, 2022
2 parents 1b09f26 + b496bab commit 1d16ca6
Show file tree
Hide file tree
Showing 12 changed files with 152 additions and 37 deletions.
3 changes: 1 addition & 2 deletions components/heap/heap_caps.c
Expand Up @@ -15,7 +15,6 @@
#include "heap_private.h"
#include "esp_system.h"


/* Forward declaration for base function, put in IRAM.
* These functions don't check for errors after trying to allocate memory. */
static void *heap_caps_realloc_base( void *ptr, size_t size, uint32_t caps );
Expand Down Expand Up @@ -56,7 +55,7 @@ IRAM_ATTR static void *dram_alloc_to_iram_addr(void *addr, size_t len)
}


static void heap_caps_alloc_failed(size_t requested_size, uint32_t caps, const char *function_name)
IRAM_ATTR NOINLINE_ATTR static void heap_caps_alloc_failed(size_t requested_size, uint32_t caps, const char *function_name)
{
if (alloc_failed_callback) {
alloc_failed_callback(requested_size, caps, function_name);
Expand Down
2 changes: 1 addition & 1 deletion components/heap/heap_private.h
Expand Up @@ -43,7 +43,7 @@ extern SLIST_HEAD(registered_heap_ll, heap_t_) registered_heaps;
bool heap_caps_match(const heap_t *heap, uint32_t caps);

/* return all possible capabilities (across all priorities) for a given heap */
inline static IRAM_ATTR uint32_t get_all_caps(const heap_t *heap)
inline static uint32_t get_all_caps(const heap_t *heap)
{
if (heap->heap == NULL) {
return 0;
Expand Down
9 changes: 9 additions & 0 deletions components/heap/internals.md
@@ -0,0 +1,9 @@
# Function placement in IRAM section

The heap component is compiled and linked in a way that minimizes the utilization of the IRAM section of memory without impacting the performance of its core functionalities. For this reason, the heap component API provided through [esp_heap_caps.h](./include/esp_heap_caps.h) and [esp_heap_caps_init.h](./include/esp_heap_caps_init.h) can be sorted into two sets of functions.

1. The performance related functions placed into the IRAM by using the `IRAM_ATTR` defined in [esp_attr.h](./../../components/esp_common/include/esp_attr.h) (e.g., `heap_caps_malloc`, `heap_caps_free`, `heap_caps_realloc`, etc.)

2. The functions that does not require the best of performance placed in the flash (e.g., `heap_caps_print_heap_info`, `heap_caps_dump`, `heap_caps_dump_all`, etc.)

With that in mind, all the functions defined in [multi_heap.c](./multi_heap.c), [multi_heap_poisoning.c](./multi_heap_poisoning.c) and [tlsf.c](./tlsf/tlsf.c) that are directly or indirectly called from one of the heap component API functions placed in IRAM have to also be placed in IRAM. Symmetrically, the functions directly or indirectly called from one of the heap component API functions placed in flash will also be placed in flash.
52 changes: 49 additions & 3 deletions components/heap/linker.lf
Expand Up @@ -2,7 +2,53 @@
archive: libheap.a
entries:
if HEAP_TLSF_USE_ROM_IMPL = n:
tlsf (noflash)
multi_heap (noflash)
tlsf:tlsf_block_size (noflash)
tlsf:tlsf_size (noflash)
tlsf:tlsf_align_size (noflash)
tlsf:tlsf_block_size_min (noflash)
tlsf:tlsf_block_size_max (noflash)
tlsf:tlsf_alloc_overhead (noflash)
tlsf:tlsf_get_pool (noflash)
tlsf:tlsf_malloc (noflash)
tlsf:tlsf_memalign_offs (noflash)
tlsf:tlsf_memalign (noflash)
tlsf:tlsf_free (noflash)
tlsf:tlsf_realloc (noflash)

multi_heap:multi_heap_get_block_address_impl (noflash)
multi_heap:multi_heap_get_allocated_size_impl (noflash)
multi_heap:multi_heap_set_lock (noflash)
multi_heap:multi_heap_get_first_block (noflash)
multi_heap:multi_heap_get_next_block (noflash)
multi_heap:multi_heap_is_free (noflash)
multi_heap:multi_heap_malloc_impl (noflash)
multi_heap:multi_heap_free_impl (noflash)
multi_heap:multi_heap_realloc_impl (noflash)
multi_heap:multi_heap_aligned_alloc_impl_offs (noflash)
multi_heap:multi_heap_aligned_alloc_impl (noflash)
multi_heap:multi_heap_internal_lock (noflash)
multi_heap:multi_heap_internal_unlock (noflash)
multi_heap:assert_valid_block (noflash)

if HEAP_TLSF_USE_ROM_IMPL = y:
multi_heap:_multi_heap_lock (noflash)
multi_heap:_multi_heap_unlock (noflash)
multi_heap:multi_heap_in_rom_init (noflash)

if HEAP_POISONING_DISABLED = n:
multi_heap_poisoning (noflash)
multi_heap_poisoning:poison_allocated_region (noflash)
multi_heap_poisoning:verify_allocated_region (noflash)
multi_heap_poisoning:multi_heap_aligned_alloc (noflash)
multi_heap_poisoning:multi_heap_malloc (noflash)
multi_heap_poisoning:multi_heap_free (noflash)
multi_heap_poisoning:multi_heap_aligned_free (noflash)
multi_heap_poisoning:multi_heap_realloc (noflash)
multi_heap_poisoning:multi_heap_get_block_address (noflash)
multi_heap_poisoning:multi_heap_get_block_owner (noflash)
multi_heap_poisoning:multi_heap_get_allocated_size (noflash)
multi_heap_poisoning:multi_heap_internal_check_block_poisoning (noflash)
multi_heap_poisoning:multi_heap_internal_poison_fill_region (noflash)

if HEAP_POISONING_COMPREHENSIVE = y:
multi_heap_poisoning:verify_fill_pattern (noflash)
multi_heap_poisoning:block_absorb_post_hook (noflash)
29 changes: 8 additions & 21 deletions components/heap/multi_heap.c
Expand Up @@ -105,20 +105,8 @@ void multi_heap_in_rom_init(void)

#else // CONFIG_HEAP_TLSF_USE_ROM_IMPL

/* Return true if this block is free. */
static inline bool is_free(const block_header_t *block)
{
return ((block->size & 0x01) != 0);
}

/* Data size of the block (excludes this block's header) */
static inline size_t block_data_size(const block_header_t *block)
{
return (block->size & ~0x03);
}

/* Check a block is valid for this heap. Used to verify parameters. */
static void assert_valid_block(const heap_t *heap, const block_header_t *block)
__attribute__((noinline)) NOCLONE_ATTR static void assert_valid_block(const heap_t *heap, const block_header_t *block)
{
pool_t pool = tlsf_get_pool(heap->heap_data);
void *ptr = block_to_ptr(block);
Expand All @@ -130,8 +118,7 @@ static void assert_valid_block(const heap_t *heap, const block_header_t *block)

void *multi_heap_get_block_address_impl(multi_heap_block_handle_t block)
{
void *ptr = block_to_ptr(block);
return (ptr);
return block_to_ptr(block);
}

size_t multi_heap_get_allocated_size_impl(multi_heap_handle_t heap, void *p)
Expand Down Expand Up @@ -170,12 +157,12 @@ void multi_heap_set_lock(multi_heap_handle_t heap, void *lock)
heap->lock = lock;
}

void inline multi_heap_internal_lock(multi_heap_handle_t heap)
void multi_heap_internal_lock(multi_heap_handle_t heap)
{
MULTI_HEAP_LOCK(heap->lock);
}

void inline multi_heap_internal_unlock(multi_heap_handle_t heap)
void multi_heap_internal_unlock(multi_heap_handle_t heap)
{
MULTI_HEAP_UNLOCK(heap->lock);
}
Expand All @@ -195,7 +182,7 @@ multi_heap_block_handle_t multi_heap_get_next_block(multi_heap_handle_t heap, mu
assert_valid_block(heap, block);
block_header_t* next = block_next(block);

if(block_data_size(next) == 0) {
if(block_size(next) == 0) {
//Last block:
return NULL;
} else {
Expand All @@ -206,7 +193,7 @@ multi_heap_block_handle_t multi_heap_get_next_block(multi_heap_handle_t heap, mu

bool multi_heap_is_free(multi_heap_block_handle_t block)
{
return is_free(block);
return block_is_free(block);
}

void *multi_heap_malloc_impl(multi_heap_handle_t heap, size_t size)
Expand Down Expand Up @@ -364,7 +351,7 @@ bool multi_heap_check(multi_heap_handle_t heap, bool print_errors)
return valid;
}

static void multi_heap_dump_tlsf(void* ptr, size_t size, int used, void* user)
__attribute__((noinline)) static void multi_heap_dump_tlsf(void* ptr, size_t size, int used, void* user)
{
(void)user;
MULTI_HEAP_STDERR_PRINTF("Block %p data, size: %d bytes, Free: %s \n",
Expand Down Expand Up @@ -401,7 +388,7 @@ size_t multi_heap_minimum_free_size_impl(multi_heap_handle_t heap)
return heap->minimum_free_bytes;
}

static void multi_heap_get_info_tlsf(void* ptr, size_t size, int used, void* user)
__attribute__((noinline)) static void multi_heap_get_info_tlsf(void* ptr, size_t size, int used, void* user)
{
multi_heap_info_t *info = user;

Expand Down
8 changes: 8 additions & 0 deletions components/heap/multi_heap_internal.h
Expand Up @@ -5,6 +5,14 @@
*/
#pragma once

/* Define a noclone attribute when compiled with GCC as certain functions
* in the heap component should not be cloned by the compiler */
#if defined __has_attribute && __has_attribute(noclone)
#define NOCLONE_ATTR __attribute((noclone))
#else
#define NOCLONE_ATTR
#endif

/* Define a structure that contains some function pointers that point to OS-related functions.
An instance of this structure will be provided to the heap in ROM for use if needed.
*/
Expand Down
14 changes: 10 additions & 4 deletions components/heap/multi_heap_poisoning.c
Expand Up @@ -66,7 +66,7 @@ typedef struct {
Returns the pointer to the actual usable data buffer (ie after 'head')
*/
static uint8_t *poison_allocated_region(poison_head_t *head, size_t alloc_size)
__attribute__((noinline)) static uint8_t *poison_allocated_region(poison_head_t *head, size_t alloc_size)
{
uint8_t *data = (uint8_t *)(&head[1]); /* start of data ie 'real' allocated buffer */
poison_tail_t *tail = (poison_tail_t *)(data + alloc_size);
Expand All @@ -90,7 +90,7 @@ static uint8_t *poison_allocated_region(poison_head_t *head, size_t alloc_size)
Returns a pointer to the poison header structure, or NULL if the poison structures are corrupt.
*/
static poison_head_t *verify_allocated_region(void *data, bool print_errors)
__attribute__((noinline)) static poison_head_t *verify_allocated_region(void *data, bool print_errors)
{
poison_head_t *head = (poison_head_t *)((intptr_t)data - sizeof(poison_head_t));
poison_tail_t *tail = (poison_tail_t *)((intptr_t)data + head->alloc_size);
Expand Down Expand Up @@ -132,8 +132,12 @@ static poison_head_t *verify_allocated_region(void *data, bool print_errors)
if swap_pattern is true, swap patterns in the buffer (ie replace MALLOC_FILL_PATTERN with FREE_FILL_PATTERN, and vice versa.)
Returns true if verification checks out.
This function has the attribute noclone to prevent the compiler to create a clone on flash where expect_free is removed (as this
function is called only with expect_free == true throughout the component).
*/
static bool verify_fill_pattern(void *data, size_t size, bool print_errors, bool expect_free, bool swap_pattern)
__attribute__((noinline)) NOCLONE_ATTR
static bool verify_fill_pattern(void *data, size_t size, const bool print_errors, const bool expect_free, bool swap_pattern)
{
const uint32_t FREE_FILL_WORD = (FREE_FILL_PATTERN << 24) | (FREE_FILL_PATTERN << 16) | (FREE_FILL_PATTERN << 8) | FREE_FILL_PATTERN;
const uint32_t MALLOC_FILL_WORD = (MALLOC_FILL_PATTERN << 24) | (MALLOC_FILL_PATTERN << 16) | (MALLOC_FILL_PATTERN << 8) | MALLOC_FILL_PATTERN;
Expand Down Expand Up @@ -259,7 +263,9 @@ void *multi_heap_malloc(multi_heap_handle_t heap, size_t size)
return data;
}

void multi_heap_free(multi_heap_handle_t heap, void *p)
/* This function has the noclone attribute to prevent the compiler to optimize out the
* check for p == NULL and create a clone function placed in flash. */
NOCLONE_ATTR void multi_heap_free(multi_heap_handle_t heap, void *p)
{
if (p == NULL) {
return;
Expand Down
14 changes: 14 additions & 0 deletions components/heap/test_apps/CMakeLists.txt
Expand Up @@ -4,3 +4,17 @@ cmake_minimum_required(VERSION 3.16)

include($ENV{IDF_PATH}/tools/cmake/project.cmake)
project(test_heap)

if(CONFIG_COMPILER_DUMP_RTL_FILES)
add_custom_target(check_test_app_sections ALL
COMMAND ${PYTHON} $ENV{IDF_PATH}/tools/ci/check_callgraph.py
--rtl-dir ${CMAKE_BINARY_DIR}/esp-idf/heap/
--elf-file ${CMAKE_BINARY_DIR}/test_heap.elf
find-refs
--from-sections=.iram0.text
--to-sections=.flash.text,.flash.rodata
--ignore-symbols=__func__/__assert_func,__func__/heap_caps_alloc_failed
--exit-code
DEPENDS ${elf}
)
endif()
1 change: 1 addition & 0 deletions components/heap/test_apps/sdkconfig.defaults
@@ -1,2 +1,3 @@
CONFIG_COMPILER_DUMP_RTL_FILES=y
CONFIG_ESP_TASK_WDT_CHECK_IDLE_TASK_CPU0=n
CONFIG_ESP_SYSTEM_MEMPROT_FEATURE=n # memory protection needs to be disabled for certain tests
2 changes: 1 addition & 1 deletion components/heap/tlsf
Submodule tlsf updated 1 files
+1 −1 tlsf.c
23 changes: 22 additions & 1 deletion docs/en/api-reference/system/mem_alloc.rst
Expand Up @@ -129,7 +129,28 @@ Thread Safety

Heap functions are thread safe, meaning they can be called from different tasks simultaneously without any limitations.

It is technically possible to call ``malloc``, ``free``, and related functions from interrupt handler (ISR) context. However this is not recommended, as heap function calls may delay other interrupts. It is strongly recommended to refactor applications so that any buffers used by an ISR are pre-allocated outside of the ISR. Support for calling heap functions from ISRs may be removed in a future update.
It is technically possible to call ``malloc``, ``free``, and related functions from interrupt handler (ISR) context (see :ref:`calling-heap-related-functions-from-isr`). However this is not recommended, as heap function calls may delay other interrupts. It is strongly recommended to refactor applications so that any buffers used by an ISR are pre-allocated outside of the ISR. Support for calling heap functions from ISRs may be removed in a future update.

.. _calling-heap-related-functions-from-isr:

Calling heap related functions from ISR
---------------------------------------

The following functions from the heap component can be called form interrupt handler (ISR):

* :cpp:func:`heap_caps_malloc`
* :cpp:func:`heap_caps_malloc_default`
* :cpp:func:`heap_caps_realloc_default`
* :cpp:func:`heap_caps_malloc_prefer`
* :cpp:func:`heap_caps_realloc_prefer`
* :cpp:func:`heap_caps_calloc_prefer`
* :cpp:func:`heap_caps_free`
* :cpp:func:`heap_caps_realloc`
* :cpp:func:`heap_caps_calloc`
* :cpp:func:`heap_caps_aligned_alloc`
* :cpp:func:`heap_caps_aligned_free`

Note however this practice is strongly discouraged.

Heap Tracing & Debugging
------------------------
Expand Down
32 changes: 28 additions & 4 deletions tools/ci/check_callgraph.py
Expand Up @@ -95,6 +95,11 @@ def __str__(self) -> str:
)


class IgnorePair():
def __init__(self, pair: str) -> None:
self.symbol, self.function_call = pair.split('/')


class ElfInfo(object):
def __init__(self, elf_file: BinaryIO) -> None:
self.elf_file = elf_file
Expand Down Expand Up @@ -159,7 +164,7 @@ def section_for_addr(self, sym_addr: int) -> Optional[str]:
return None


def load_rtl_file(rtl_filename: str, tu_filename: str, functions: List[RtlFunction]) -> None:
def load_rtl_file(rtl_filename: str, tu_filename: str, functions: List[RtlFunction], ignore_pairs: List[IgnorePair]) -> None:
last_function: Optional[RtlFunction] = None
for line in open(rtl_filename):
# Find function definition
Expand All @@ -175,6 +180,17 @@ def load_rtl_file(rtl_filename: str, tu_filename: str, functions: List[RtlFuncti
match = re.match(CALL_REGEX, line)
if match:
target = match.group('target')

# if target matches on of the IgnorePair function_call attributes, remove
# the last occurrence of the associated symbol from the last_function.refs list.
call_matching_pairs = [pair for pair in ignore_pairs if pair.function_call == target]
if call_matching_pairs and last_function and last_function.refs:
for pair in call_matching_pairs:
ignored_symbols = [ref for ref in last_function.refs if pair.symbol in ref]
if ignored_symbols:
last_ref = ignored_symbols.pop()
last_function.refs = [ref for ref in last_function.refs if last_ref != ref]

if target not in last_function.calls:
last_function.calls.append(target)
continue
Expand Down Expand Up @@ -304,12 +320,12 @@ def match_rtl_funcs_to_symbols(rtl_functions: List[RtlFunction], elfinfo: ElfInf
return symbols, refs


def get_symbols_and_refs(rtl_list: List[str], elf_file: BinaryIO) -> Tuple[List[Symbol], List[Reference]]:
def get_symbols_and_refs(rtl_list: List[str], elf_file: BinaryIO, ignore_pairs: List[IgnorePair]) -> Tuple[List[Symbol], List[Reference]]:
elfinfo = ElfInfo(elf_file)

rtl_functions: List[RtlFunction] = []
for file_name in rtl_list:
load_rtl_file(file_name, file_name, rtl_functions)
load_rtl_file(file_name, file_name, rtl_functions, ignore_pairs)

return match_rtl_funcs_to_symbols(rtl_functions, elfinfo)

Expand Down Expand Up @@ -361,6 +377,10 @@ def main() -> None:
find_refs_parser.add_argument(
'--to-sections', help='comma-separated list of target sections'
)
find_refs_parser.add_argument(
'--ignore-symbols', help='comma-separated list of symbol/function_name pairs. \
This will force the parser to ignore the symbol preceding the call to function_name'
)
find_refs_parser.add_argument(
'--exit-code',
action='store_true',
Expand All @@ -384,7 +404,11 @@ def main() -> None:
if not rtl_list:
raise RuntimeError('No RTL files specified')

_, refs = get_symbols_and_refs(rtl_list, args.elf_file)
ignore_pairs = []
for pair in args.ignore_symbols.split(',') if args.ignore_symbols else []:
ignore_pairs.append(IgnorePair(pair))

_, refs = get_symbols_and_refs(rtl_list, args.elf_file, ignore_pairs)

if args.action == 'find-refs':
from_sections = args.from_sections.split(',') if args.from_sections else []
Expand Down

0 comments on commit 1d16ca6

Please sign in to comment.