From 9804adfd405056ec332bb8e13d68c7b52bd3a6c1 Mon Sep 17 00:00:00 2001 From: "Field G. Van Zee" Date: Thu, 21 Dec 2017 19:22:57 -0600 Subject: [PATCH] Added option to disable pack buffer memory pools. Details: - Added a new configure option, --[en|dis]able-packbuf-pools, which will enable or disable the use of internal memory pools for managing buffers used for packing. When disabled, the function specified by the cpp macro BLIS_MALLOC_POOL is called whenever a packing buffer is needed (and BLIS_FREE_POOL is called when the buffer is ready to be released, usually at the end of a loop). When enabled, which was the status quo prior to this commit, a memory pool data structure is created and managed to provide threads with packing buffers. The memory pool minimizes calls to bli_malloc_pool() (i.e., the wrapper that calls BLIS_MALLOC_POOL), but does so through a somewhat more complex mechanism that may incur additional overhead in some (but not all) situations. The new option defaults to --enable-packbuf-pools. - Removed the reinitialization of the memory pools from the level-3 front-ends and replaced it with automatic reinitialization within the pool API's implementation. This required an extra argument to bli_pool_checkout_block() in the form of a requested size, but hides the complexity entirely from BLIS. And since bli_pool_checkout_block() is only ever called within a critical section, this change fixes a potential race condition in which threads using contexts with different cache blocksizes--most likely a heterogeneous environment--can check out pool blocks that are too small for the submatrices it wishes to pack. Thanks to Nisanth Padinharepatt for reporting this potential issue. - Removed several functions in light of the relocation of pool reinit, including bli_membrk_reinit_pools(), bli_memsys_reinit(), bli_pool_reinit_if(), and bli_check_requested_block_size_for_pool(). - Updated the testsuite to print whether the memory pools are enabled or disabled. --- build/bli_config.h.in | 4 ++ configure | 30 +++++++++++++ frame/3/gemm/bli_gemm_front.c | 4 -- frame/3/hemm/bli_hemm_front.c | 4 -- frame/3/her2k/bli_her2k_front.c | 4 -- frame/3/herk/bli_herk_front.c | 4 -- frame/3/symm/bli_symm_front.c | 4 -- frame/3/syr2k/bli_syr2k_front.c | 4 -- frame/3/syrk/bli_syrk_front.c | 4 -- frame/3/trmm/bli_trmm_front.c | 4 -- frame/3/trmm3/bli_trmm3_front.c | 4 -- frame/3/trsm/bli_trsm_front.c | 4 -- frame/base/bli_check.c | 10 ----- frame/base/bli_check.h | 1 - frame/base/bli_error.c | 2 - frame/base/bli_info.c | 8 ++++ frame/base/bli_info.h | 1 + frame/base/bli_membrk.c | 77 ++++++++------------------------- frame/base/bli_membrk.h | 5 --- frame/base/bli_memsys.c | 21 --------- frame/base/bli_memsys.h | 1 - frame/base/bli_pool.c | 46 +++++++------------- frame/base/bli_pool.h | 6 +-- frame/include/bli_type_defs.h | 1 - testsuite/src/test_libblis.c | 3 ++ 25 files changed, 82 insertions(+), 174 deletions(-) diff --git a/build/bli_config.h.in b/build/bli_config.h.in index c4c59db56..8ab884baa 100644 --- a/build/bli_config.h.in +++ b/build/bli_config.h.in @@ -52,6 +52,10 @@ #define BLIS_ENABLE_PTHREADS #endif +#if @enable_packbuf_pools@ +#define BLIS_ENABLE_PACKBUF_POOLS +#endif + #if @int_type_size@ == 64 #define BLIS_INT_TYPE_SIZE 64 #elif @int_type_size@ == 32 diff --git a/configure b/configure index 79fcb6079..918ee178e 100755 --- a/configure +++ b/configure @@ -99,6 +99,21 @@ print_usage() echo " --disable-threading is specified, threading will be" echo " disabled. The default is 'no'." echo " " + echo " --disable-packbuf-pools, --enable-packbuf-pools" + echo " " + echo " Disable (enabled by default) use of internal memory" + echo " pools for managing packing buffers. When disabled," + echo " the function specified by BLIS_MALLOC_POOL is called" + echo " on-demand, whenever a packing buffer is needed, and" + echo " the buffer is released via the function specified by" + echo " BLIS_FREE_POOL() when the loop in which it was" + echo " allocated terminates. When enabled, the memory pools" + echo " minimize calls to both BLIS_MALLOC_POOL() and" + echo " BLIS_FREE_POOL(), especially in a multithreaded" + echo " environment, but does so through a mechanism that may" + echo " incur additional overhead in some (but not all)" + echo " situations." + echo " " echo " -q, --quiet Suppress informational output. By default, configure" echo " is verbose. (NOTE: -q is not yet implemented)" echo " " @@ -502,6 +517,7 @@ main() enable_verbose='no' enable_static='yes' enable_shared='no' + enable_packbuf_pools='yes' int_type_size=0 blas2blis_int_type_size=32 enable_blas2blis='yes' @@ -582,6 +598,12 @@ main() disable-threading) threading_model='no' ;; + enable-packbuf-pools) + enable_packbuf_pools='yes' + ;; + disable-packbuf-pools) + enable_packbuf_pools='no' + ;; int-size=*) int_type_size=${OPTARG#*=} ;; @@ -1029,6 +1051,13 @@ main() fi # Convert 'yes' and 'no' flags to booleans. + if [ "x${enable_packbuf_pools}" = "xyes" ]; then + echo "${script_name}: internal memory pools for packing buffers are enabled." + enable_packbuf_pools_01=1 + else + echo "${script_name}: internal memory pools for packing buffers are disabled." + enable_packbuf_pools_01=0 + fi if [ "x${enable_blas2blis}" = "xyes" ]; then echo "${script_name}: the BLAS compatibility layer is enabled." enable_blas2blis_01=1 @@ -1135,6 +1164,7 @@ main() | perl -pe "s/\@kernel_list_defines\@/${kernel_list_defines}/g" \ | sed -e "s/@enable_openmp@/${enable_openmp_01}/g" \ | sed -e "s/@enable_pthreads@/${enable_pthreads_01}/g" \ + | sed -e "s/@enable_packbuf_pools@/${enable_packbuf_pools_01}/g" \ | sed -e "s/@int_type_size@/${int_type_size}/g" \ | sed -e "s/@blas2blis_int_type_size@/${blas2blis_int_type_size}/g" \ | sed -e "s/@enable_blas2blis@/${enable_blas2blis_01}/g" \ diff --git a/frame/3/gemm/bli_gemm_front.c b/frame/3/gemm/bli_gemm_front.c index f12838b3a..e9a617dee 100644 --- a/frame/3/gemm/bli_gemm_front.c +++ b/frame/3/gemm/bli_gemm_front.c @@ -62,10 +62,6 @@ void bli_gemm_front return; } - // Reinitialize the memory allocator to accommodate the blocksizes - // in the current context. - bli_memsys_reinit( cntx ); - // Alias A, B, and C in case we need to apply transformations. bli_obj_alias_to( *a, a_local ); bli_obj_alias_to( *b, b_local ); diff --git a/frame/3/hemm/bli_hemm_front.c b/frame/3/hemm/bli_hemm_front.c index 9df40581c..480892332 100644 --- a/frame/3/hemm/bli_hemm_front.c +++ b/frame/3/hemm/bli_hemm_front.c @@ -63,10 +63,6 @@ void bli_hemm_front return; } - // Reinitialize the memory allocator to accommodate the blocksizes - // in the current context. - bli_memsys_reinit( cntx ); - // Alias A, B, and C in case we need to apply transformations. bli_obj_alias_to( *a, a_local ); bli_obj_alias_to( *b, b_local ); diff --git a/frame/3/her2k/bli_her2k_front.c b/frame/3/her2k/bli_her2k_front.c index 1a99f205d..d37077ef9 100644 --- a/frame/3/her2k/bli_her2k_front.c +++ b/frame/3/her2k/bli_her2k_front.c @@ -67,10 +67,6 @@ void bli_her2k_front return; } - // Reinitialize the memory allocator to accommodate the blocksizes - // in the current context. - bli_memsys_reinit( cntx ); - // Alias A, B, and C in case we need to apply transformations. bli_obj_alias_to( *a, a_local ); bli_obj_alias_to( *b, b_local ); diff --git a/frame/3/herk/bli_herk_front.c b/frame/3/herk/bli_herk_front.c index 473665663..b89c0d6df 100644 --- a/frame/3/herk/bli_herk_front.c +++ b/frame/3/herk/bli_herk_front.c @@ -63,10 +63,6 @@ void bli_herk_front return; } - // Reinitialize the memory allocator to accommodate the blocksizes - // in the current context. - bli_memsys_reinit( cntx ); - // Alias A and C in case we need to apply transformations. bli_obj_alias_to( *a, a_local ); bli_obj_alias_to( *c, c_local ); diff --git a/frame/3/symm/bli_symm_front.c b/frame/3/symm/bli_symm_front.c index d43fcda81..661afc0c1 100644 --- a/frame/3/symm/bli_symm_front.c +++ b/frame/3/symm/bli_symm_front.c @@ -63,10 +63,6 @@ void bli_symm_front return; } - // Reinitialize the memory allocator to accommodate the blocksizes - // in the current context. - bli_memsys_reinit( cntx ); - // Alias A, B, and C in case we need to apply transformations. bli_obj_alias_to( *a, a_local ); bli_obj_alias_to( *b, b_local ); diff --git a/frame/3/syr2k/bli_syr2k_front.c b/frame/3/syr2k/bli_syr2k_front.c index 4b6b04d6c..725659d0f 100644 --- a/frame/3/syr2k/bli_syr2k_front.c +++ b/frame/3/syr2k/bli_syr2k_front.c @@ -64,10 +64,6 @@ void bli_syr2k_front return; } - // Reinitialize the memory allocator to accommodate the blocksizes - // in the current context. - bli_memsys_reinit( cntx ); - // Alias A, B, and C in case we need to apply transformations. bli_obj_alias_to( *a, a_local ); bli_obj_alias_to( *b, b_local ); diff --git a/frame/3/syrk/bli_syrk_front.c b/frame/3/syrk/bli_syrk_front.c index 3b6217cf8..0c344e184 100644 --- a/frame/3/syrk/bli_syrk_front.c +++ b/frame/3/syrk/bli_syrk_front.c @@ -61,10 +61,6 @@ void bli_syrk_front return; } - // Reinitialize the memory allocator to accommodate the blocksizes - // in the current context. - bli_memsys_reinit( cntx ); - // Alias A and C in case we need to apply transformations. bli_obj_alias_to( *a, a_local ); bli_obj_alias_to( *c, c_local ); diff --git a/frame/3/trmm/bli_trmm_front.c b/frame/3/trmm/bli_trmm_front.c index d39a2a6bb..2dacf18f2 100644 --- a/frame/3/trmm/bli_trmm_front.c +++ b/frame/3/trmm/bli_trmm_front.c @@ -61,10 +61,6 @@ void bli_trmm_front return; } - // Reinitialize the memory allocator to accommodate the blocksizes - // in the current context. - bli_memsys_reinit( cntx ); - // Alias A and B so we can tweak the objects if necessary. bli_obj_alias_to( *a, a_local ); bli_obj_alias_to( *b, b_local ); diff --git a/frame/3/trmm3/bli_trmm3_front.c b/frame/3/trmm3/bli_trmm3_front.c index 5d812f9ee..e4aba834b 100644 --- a/frame/3/trmm3/bli_trmm3_front.c +++ b/frame/3/trmm3/bli_trmm3_front.c @@ -63,10 +63,6 @@ void bli_trmm3_front return; } - // Reinitialize the memory allocator to accommodate the blocksizes - // in the current context. - bli_memsys_reinit( cntx ); - // Alias A, B, and C so we can tweak the objects if necessary. bli_obj_alias_to( *a, a_local ); bli_obj_alias_to( *b, b_local ); diff --git a/frame/3/trsm/bli_trsm_front.c b/frame/3/trsm/bli_trsm_front.c index a820d0788..10817bee1 100644 --- a/frame/3/trsm/bli_trsm_front.c +++ b/frame/3/trsm/bli_trsm_front.c @@ -61,10 +61,6 @@ void bli_trsm_front return; } - // Reinitialize the memory allocator to accommodate the blocksizes - // in the current context. - bli_memsys_reinit( cntx ); - // Alias A and B so we can tweak the objects if necessary. bli_obj_alias_to( *a, a_local ); bli_obj_alias_to( *b, b_local ); diff --git a/frame/base/bli_check.c b/frame/base/bli_check.c index 61062f33b..4b40a726b 100644 --- a/frame/base/bli_check.c +++ b/frame/base/bli_check.c @@ -757,16 +757,6 @@ err_t bli_check_valid_packbuf( packbuf_t buf_type ) return e_val; } -err_t bli_check_requested_block_size_for_pool( siz_t req_size, pool_t* pool ) -{ - err_t e_val = BLIS_SUCCESS; - - if ( bli_pool_block_size( pool ) < req_size ) - e_val = BLIS_REQUESTED_CONTIG_BLOCK_TOO_BIG; - - return e_val; -} - err_t bli_check_if_exhausted_pool( pool_t* pool ) { err_t e_val = BLIS_SUCCESS; diff --git a/frame/base/bli_check.h b/frame/base/bli_check.h index 96e8b826f..bd5cd064f 100644 --- a/frame/base/bli_check.h +++ b/frame/base/bli_check.h @@ -97,7 +97,6 @@ err_t bli_check_packv_schema_on_unpack( obj_t* a ); err_t bli_check_object_buffer( obj_t* a ); err_t bli_check_valid_packbuf( packbuf_t buf_type ); -err_t bli_check_requested_block_size_for_pool( siz_t req_size, pool_t* pool ); err_t bli_check_if_exhausted_pool( pool_t* pool ); err_t bli_check_sufficient_stack_buf_size( num_t dt, cntx_t* cntx ); err_t bli_check_alignment_is_power_of_two( size_t align_size ); diff --git a/frame/base/bli_error.c b/frame/base/bli_error.c index 7a09d3e96..afe86f5ff 100644 --- a/frame/base/bli_error.c +++ b/frame/base/bli_error.c @@ -149,8 +149,6 @@ void bli_error_init_msgs( void ) sprintf( bli_error_string_for_code(BLIS_INVALID_PACKBUF), "Invalid packbuf_t value." ); - sprintf( bli_error_string_for_code(BLIS_REQUESTED_CONTIG_BLOCK_TOO_BIG ), - "Attempted to allocate contiguous memory block that is too big for implementation." ); sprintf( bli_error_string_for_code(BLIS_EXHAUSTED_CONTIG_MEMORY_POOL), "Attempted to allocate more memory from contiguous pool than is available." ); sprintf( bli_error_string_for_code(BLIS_INSUFFICIENT_STACK_BUF_SIZE), diff --git a/frame/base/bli_info.c b/frame/base/bli_info.c index 78edc35b2..9c6db33bf 100644 --- a/frame/base/bli_info.c +++ b/frame/base/bli_info.c @@ -86,6 +86,14 @@ gint_t bli_info_get_enable_cblas( void ) #endif } gint_t bli_info_get_blas2blis_int_type_size( void ) { return BLIS_BLAS2BLIS_INT_TYPE_SIZE; } +gint_t bli_info_get_enable_packbuf_pools( void ) +{ +#ifdef BLIS_ENABLE_PACKBUF_POOLS + return 1; +#else + return 0; +#endif +} diff --git a/frame/base/bli_info.h b/frame/base/bli_info.h index 4b2450fa9..8159ac20a 100644 --- a/frame/base/bli_info.h +++ b/frame/base/bli_info.h @@ -57,6 +57,7 @@ gint_t bli_info_get_enable_stay_auto_init( void ); gint_t bli_info_get_enable_blas2blis( void ); gint_t bli_info_get_enable_cblas( void ); gint_t bli_info_get_blas2blis_int_type_size( void ); +gint_t bli_info_get_enable_packbuf_pools( void ); // -- Kernel implementation-related -------------------------------------------- diff --git a/frame/base/bli_membrk.c b/frame/base/bli_membrk.c index 16578de65..49a094e6e 100644 --- a/frame/base/bli_membrk.c +++ b/frame/base/bli_membrk.c @@ -42,7 +42,9 @@ void bli_membrk_init ) { bli_mutex_init( bli_membrk_mutex( membrk ) ); +#ifdef BLIS_ENABLE_PACKBUF_POOLS bli_membrk_init_pools( cntx, membrk ); +#endif bli_membrk_set_malloc_fp( bli_malloc_pool, membrk ); bli_membrk_set_free_fp( bli_free_pool, membrk ); } @@ -53,7 +55,10 @@ void bli_membrk_finalize ) { bli_membrk_set_malloc_fp( NULL, membrk ); + bli_membrk_set_free_fp( NULL, membrk ); +#ifdef BLIS_ENABLE_PACKBUF_POOLS bli_membrk_finalize_pools( membrk ); +#endif bli_mutex_finalize( bli_membrk_mutex( membrk ) ); } @@ -70,6 +75,13 @@ void bli_membrk_acquire_m dim_t pi; siz_t block_size; + // If the internal memory pools for pack buffers are disabled, we + // spoof the buffer type as BLIS_BUFFER_FOR_GEN_USE to induce the + // immediate usage of bli_membrk_malloc(). +#ifndef BLIS_ENABLE_PACKBUF_POOLS + buf_type = BLIS_BUFFER_FOR_GEN_USE; +#endif + // Make sure the API is initialized. //assert( membrk ); //?? @@ -86,11 +98,12 @@ void bli_membrk_acquire_m // - the buffer type (a packbuf_t value), // - the size of the requested region, // - the membrk_t from which the mem_t entry was acquired. - // NOTE: We do not initialize the pool field since this block did not + // NOTE: We initialize the pool field to NULL since this block did not // come from a memory pool. bli_mem_set_buffer( buf_sys, mem ); bli_mem_set_buf_sys( buf_sys, mem ); bli_mem_set_buf_type( buf_type, mem ); + bli_mem_set_pool( NULL, mem ); bli_mem_set_size( req_size, mem ); bli_mem_set_membrk( membrk, mem ); } @@ -105,17 +118,6 @@ void bli_membrk_acquire_m pi = bli_packbuf_index( buf_type ); pool = bli_membrk_pool( pi, membrk ); - // Unconditionally perform error checking on the memory pool. - { - err_t e_val; - - // Make sure that the requested matrix size fits inside of a block - // of the corresponding pool. If it does not, the pool was somehow - // initialized improperly. - e_val = bli_check_requested_block_size_for_pool( req_size, pool ); - bli_check_error_code( e_val ); - } - // Extract the address of the pblk_t struct within the mem_t. pblk = bli_mem_pblk( mem ); @@ -123,19 +125,21 @@ void bli_membrk_acquire_m bli_membrk_lock( membrk ); { - // Checkout a block from the pool. If the pool is exhausted, + // Checkout a block from the pool. If the pool's blocks are too + // small, it will be reinitialized with blocks large enough to + // accommodate the requested block size. If the pool is exhausted, // either because it is still empty or because all blocks have // been checked out already, additional blocks will be allocated // automatically, as-needed. Note that the addresses are stored // directly into the mem_t struct since pblk is the address of // the struct's pblk_t field. - bli_pool_checkout_block( pblk, pool ); + bli_pool_checkout_block( req_size, pblk, pool ); // Query the size of the blocks in the pool so we can store it in // the mem_t object. At this point, it is guaranteed to be at // least as large as req_size. (NOTE: We must perform the query // within the critical section to ensure that the pool hasn't - // changed, as unlikely as that would be.) + // changed.) block_size = bli_pool_block_size( pool ); } @@ -329,49 +333,6 @@ void bli_membrk_init_pools bli_pool_init( num_blocks_c, block_size_c, align_size, pool_c ); } -void bli_membrk_reinit_pools - ( - cntx_t* cntx, - membrk_t* membrk - ) -{ - // Map each of the packbuf_t values to an index starting at zero. - const dim_t index_a = bli_packbuf_index( BLIS_BUFFER_FOR_A_BLOCK ); - const dim_t index_b = bli_packbuf_index( BLIS_BUFFER_FOR_B_PANEL ); - const dim_t index_c = bli_packbuf_index( BLIS_BUFFER_FOR_C_PANEL ); - - const siz_t align_size = BLIS_POOL_ADDR_ALIGN_SIZE; - - // Alias the pool addresses to convenient identifiers. - pool_t* pool_a = bli_membrk_pool( index_a, membrk ); - pool_t* pool_b = bli_membrk_pool( index_b, membrk ); - pool_t* pool_c = bli_membrk_pool( index_c, membrk ); - - // Query the number of blocks currently allocated in each pool. - const dim_t num_blocks_a = bli_pool_num_blocks( pool_a ); - const dim_t num_blocks_b = bli_pool_num_blocks( pool_b ); - const dim_t num_blocks_c = bli_pool_num_blocks( pool_c ); - - siz_t block_size_a_new = 0; - siz_t block_size_b_new = 0; - siz_t block_size_c_new = 0; - - // Determine the context-implied block size needed for each pool. - bli_membrk_compute_pool_block_sizes( &block_size_a_new, - &block_size_b_new, - &block_size_c_new, - cntx ); - - // Reinitialize the pool, but only if one of the parameters has - // changed in such a way that reinitialization would be required. - // In this case, the align_size is constant, as is num_blocks, so - // what this actually boils down to is that reinitialization of a - // pool occurs only if the block size for that pool has increased. - bli_pool_reinit_if( num_blocks_a, block_size_a_new, align_size, pool_a ); - bli_pool_reinit_if( num_blocks_b, block_size_b_new, align_size, pool_b ); - bli_pool_reinit_if( num_blocks_c, block_size_c_new, align_size, pool_c ); -} - void bli_membrk_finalize_pools ( membrk_t* membrk diff --git a/frame/base/bli_membrk.h b/frame/base/bli_membrk.h index 833ffd474..9c0fd1c40 100644 --- a/frame/base/bli_membrk.h +++ b/frame/base/bli_membrk.h @@ -140,11 +140,6 @@ void bli_membrk_init_pools cntx_t* cntx, membrk_t* membrk ); -void bli_membrk_reinit_pools - ( - cntx_t* cntx, - membrk_t* membrk - ); void bli_membrk_finalize_pools ( membrk_t* membrk diff --git a/frame/base/bli_memsys.c b/frame/base/bli_memsys.c index bbdf3462a..c3ce09536 100644 --- a/frame/base/bli_memsys.c +++ b/frame/base/bli_memsys.c @@ -80,27 +80,6 @@ void bli_memsys_init( void ) #endif } -void bli_memsys_reinit( cntx_t* cntx ) -{ -#ifdef BLIS_ENABLE_OPENMP - _Pragma( "omp critical (mem)" ) -#endif -#ifdef BLIS_ENABLE_PTHREADS - pthread_mutex_lock( &memsys_mutex ); -#endif - - // BEGIN CRITICAL SECTION - { - // Reinitialize the global membrk_t object's memory pools. - bli_membrk_reinit_pools( cntx, &global_membrk ); - } - // END CRITICAL SECTION - -#ifdef BLIS_ENABLE_PTHREADS - pthread_mutex_unlock( &memsys_mutex ); -#endif -} - void bli_memsys_finalize( void ) { #ifdef BLIS_ENABLE_OPENMP diff --git a/frame/base/bli_memsys.h b/frame/base/bli_memsys.h index 2ae16bb74..9be57b80a 100644 --- a/frame/base/bli_memsys.h +++ b/frame/base/bli_memsys.h @@ -43,7 +43,6 @@ membrk_t* bli_memsys_global_membrk( void ); // ----------------------------------------------------------------------------- void bli_memsys_init( void ); -void bli_memsys_reinit( cntx_t* cntx ); void bli_memsys_finalize( void ); diff --git a/frame/base/bli_pool.c b/frame/base/bli_pool.c index aa234e073..878424b70 100644 --- a/frame/base/bli_pool.c +++ b/frame/base/bli_pool.c @@ -90,6 +90,9 @@ void bli_pool_finalize( pool_t* pool ) // Free the block_ptrs array. bli_free_intl( block_ptrs ); + // This explicit clearing of the pool_t struct is not strictly + // necessary and so it has been commented out. +#if 0 // Clear the contents of the pool_t struct. bli_pool_set_block_ptrs( NULL, pool ); bli_pool_set_block_ptrs_len( 0, pool ); @@ -97,6 +100,7 @@ void bli_pool_finalize( pool_t* pool ) bli_pool_set_top_index( 0, pool ); bli_pool_set_block_size( 0, pool ); bli_pool_set_align_size( 0, pool ); +#endif } void bli_pool_reinit( dim_t num_blocks_new, @@ -116,42 +120,24 @@ void bli_pool_reinit( dim_t num_blocks_new, bli_pool_init( num_blocks_new, block_size_new, align_size_new, pool ); } -void bli_pool_reinit_if( dim_t num_blocks_new, - siz_t block_size_new, - siz_t align_size_new, - pool_t* pool ) +void bli_pool_checkout_block( siz_t req_size, pblk_t* block, pool_t* pool ) { - const dim_t num_blocks = bli_pool_num_blocks( pool ); - const dim_t block_size = bli_pool_block_size( pool ); - const dim_t align_size = bli_pool_align_size( pool ); - - // Reinitialize the pool, but only if one or more of new pool - // parameters would require it. Otherwise, if only the number - // of blocks has increased, we can skip a full reinit and just - // grow the pool. - if ( block_size_new > block_size || - align_size_new != align_size ) + pblk_t* block_ptrs; + dim_t top_index; + + if ( bli_pool_block_size( pool ) < req_size ) { - // Reinitialize the pool with the new parameters, in particular, - // the new block size. + const dim_t num_blocks_new = bli_pool_num_blocks( pool ); + const siz_t align_size_new = bli_pool_align_size( pool ); + + // If the requested block size is smaller than what the pool + // was initialized with, reinitialize the pool to contain blocks + // of the requested size. bli_pool_reinit( num_blocks_new, - block_size_new, + req_size, align_size_new, pool ); } - else if ( num_blocks_new > num_blocks ) - { - const dim_t num_blocks_add = num_blocks_new - - num_blocks; - - bli_pool_grow( num_blocks_add, pool ); - } -} - -void bli_pool_checkout_block( pblk_t* block, pool_t* pool ) -{ - pblk_t* block_ptrs; - dim_t top_index; // If the pool is exhausted, add a block. if ( bli_pool_is_exhausted( pool ) ) diff --git a/frame/base/bli_pool.h b/frame/base/bli_pool.h index 65b08eaa9..28be71afa 100644 --- a/frame/base/bli_pool.h +++ b/frame/base/bli_pool.h @@ -173,12 +173,8 @@ void bli_pool_reinit( dim_t num_blocks_new, siz_t block_size_new, siz_t align_size_new, pool_t* pool ); -void bli_pool_reinit_if( dim_t num_blocks_new, - siz_t block_size_new, - siz_t align_size_new, - pool_t* pool ); -void bli_pool_checkout_block( pblk_t* block, pool_t* pool ); +void bli_pool_checkout_block( siz_t req_size, pblk_t* block, pool_t* pool ); void bli_pool_checkin_block( pblk_t* block, pool_t* pool ); void bli_pool_grow( dim_t num_blocks_add, pool_t* pool ); diff --git a/frame/include/bli_type_defs.h b/frame/include/bli_type_defs.h index 7cef992c3..6d1d8ea65 100644 --- a/frame/include/bli_type_defs.h +++ b/frame/include/bli_type_defs.h @@ -1206,7 +1206,6 @@ typedef enum // Memory allocator errors BLIS_INVALID_PACKBUF = (-120), - BLIS_REQUESTED_CONTIG_BLOCK_TOO_BIG = (-121), BLIS_EXHAUSTED_CONTIG_MEMORY_POOL = (-122), BLIS_INSUFFICIENT_STACK_BUF_SIZE = (-123), BLIS_ALIGNMENT_NOT_POWER_OF_TWO = (-124), diff --git a/testsuite/src/test_libblis.c b/testsuite/src/test_libblis.c index 41877dc31..3aa261b87 100644 --- a/testsuite/src/test_libblis.c +++ b/testsuite/src/test_libblis.c @@ -629,6 +629,9 @@ void libblis_test_output_params_struct( FILE* os, test_params_t* params ) libblis_test_fprintf_c( os, "Max stack buffer size (bytes) %d\n", ( int )bli_info_get_stack_buf_max_size() ); libblis_test_fprintf_c( os, "Page size (bytes) %d\n", ( int )bli_info_get_page_size() ); libblis_test_fprintf_c( os, "\n" ); + libblis_test_fprintf_c( os, "memory pools for pack buffers\n" ); + libblis_test_fprintf_c( os, " enabled? %d\n", ( int )bli_info_get_enable_packbuf_pools() ); + libblis_test_fprintf_c( os, "\n" ); libblis_test_fprintf_c( os, "memory alignment (bytes) \n" ); libblis_test_fprintf_c( os, " stack address %d\n", ( int )bli_info_get_stack_buf_align_size() ); libblis_test_fprintf_c( os, " obj_t address %d\n", ( int )bli_info_get_heap_addr_align_size() );