Skip to content

Commit

Permalink
Added option to disable pack buffer memory pools.
Browse files Browse the repository at this point in the history
Details:
- Added a new configure option, --[en|dis]able-packbuf-pools, which will
  enable or disable the use of internal memory pools for managing buffers
  used for packing. When disabled, the function specified by the cpp
  macro BLIS_MALLOC_POOL is called whenever a packing buffer is needed
  (and BLIS_FREE_POOL is called when the buffer is ready to be released,
  usually at the end of a loop). When enabled, which was the status quo
  prior to this commit, a memory pool data structure is created and
  managed to provide threads with packing buffers. The memory pool
  minimizes calls to bli_malloc_pool() (i.e., the wrapper that calls
  BLIS_MALLOC_POOL), but does so through a somewhat more complex
  mechanism that may incur additional overhead in some (but not all)
  situations. The new option defaults to --enable-packbuf-pools.
- Removed the reinitialization of the memory pools from the level-3
  front-ends and replaced it with automatic reinitialization within the
  pool API's implementation. This required an extra argument to
  bli_pool_checkout_block() in the form of a requested size, but hides
  the complexity entirely from BLIS. And since bli_pool_checkout_block()
  is only ever called within a critical section, this change fixes a
  potential race condition in which threads using contexts with different
  cache blocksizes--most likely a heterogeneous environment--can check
  out pool blocks that are too small for the submatrices it wishes to
  pack. Thanks to Nisanth Padinharepatt for reporting this potential
  issue.
- Removed several functions in light of the relocation of pool reinit,
  including bli_membrk_reinit_pools(), bli_memsys_reinit(),
  bli_pool_reinit_if(), and bli_check_requested_block_size_for_pool().
- Updated the testsuite to print whether the memory pools are enabled or
  disabled.
  • Loading branch information
fgvanzee committed Dec 22, 2017
1 parent 107801a commit 9804adf
Show file tree
Hide file tree
Showing 25 changed files with 82 additions and 174 deletions.
4 changes: 4 additions & 0 deletions build/bli_config.h.in
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,10 @@
#define BLIS_ENABLE_PTHREADS
#endif

#if @enable_packbuf_pools@
#define BLIS_ENABLE_PACKBUF_POOLS
#endif

#if @int_type_size@ == 64
#define BLIS_INT_TYPE_SIZE 64
#elif @int_type_size@ == 32
Expand Down
30 changes: 30 additions & 0 deletions configure
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,21 @@ print_usage()
echo " --disable-threading is specified, threading will be"
echo " disabled. The default is 'no'."
echo " "
echo " --disable-packbuf-pools, --enable-packbuf-pools"
echo " "
echo " Disable (enabled by default) use of internal memory"
echo " pools for managing packing buffers. When disabled,"
echo " the function specified by BLIS_MALLOC_POOL is called"
echo " on-demand, whenever a packing buffer is needed, and"
echo " the buffer is released via the function specified by"
echo " BLIS_FREE_POOL() when the loop in which it was"
echo " allocated terminates. When enabled, the memory pools"
echo " minimize calls to both BLIS_MALLOC_POOL() and"
echo " BLIS_FREE_POOL(), especially in a multithreaded"
echo " environment, but does so through a mechanism that may"
echo " incur additional overhead in some (but not all)"
echo " situations."
echo " "
echo " -q, --quiet Suppress informational output. By default, configure"
echo " is verbose. (NOTE: -q is not yet implemented)"
echo " "
Expand Down Expand Up @@ -502,6 +517,7 @@ main()
enable_verbose='no'
enable_static='yes'
enable_shared='no'
enable_packbuf_pools='yes'
int_type_size=0
blas2blis_int_type_size=32
enable_blas2blis='yes'
Expand Down Expand Up @@ -582,6 +598,12 @@ main()
disable-threading)
threading_model='no'
;;
enable-packbuf-pools)
enable_packbuf_pools='yes'
;;
disable-packbuf-pools)
enable_packbuf_pools='no'
;;
int-size=*)
int_type_size=${OPTARG#*=}
;;
Expand Down Expand Up @@ -1029,6 +1051,13 @@ main()
fi

# Convert 'yes' and 'no' flags to booleans.
if [ "x${enable_packbuf_pools}" = "xyes" ]; then
echo "${script_name}: internal memory pools for packing buffers are enabled."
enable_packbuf_pools_01=1
else
echo "${script_name}: internal memory pools for packing buffers are disabled."
enable_packbuf_pools_01=0
fi
if [ "x${enable_blas2blis}" = "xyes" ]; then
echo "${script_name}: the BLAS compatibility layer is enabled."
enable_blas2blis_01=1
Expand Down Expand Up @@ -1135,6 +1164,7 @@ main()
| perl -pe "s/\@kernel_list_defines\@/${kernel_list_defines}/g" \
| sed -e "s/@enable_openmp@/${enable_openmp_01}/g" \
| sed -e "s/@enable_pthreads@/${enable_pthreads_01}/g" \
| sed -e "s/@enable_packbuf_pools@/${enable_packbuf_pools_01}/g" \
| sed -e "s/@int_type_size@/${int_type_size}/g" \
| sed -e "s/@blas2blis_int_type_size@/${blas2blis_int_type_size}/g" \
| sed -e "s/@enable_blas2blis@/${enable_blas2blis_01}/g" \
Expand Down
4 changes: 0 additions & 4 deletions frame/3/gemm/bli_gemm_front.c
Original file line number Diff line number Diff line change
Expand Up @@ -62,10 +62,6 @@ void bli_gemm_front
return;
}

// Reinitialize the memory allocator to accommodate the blocksizes
// in the current context.
bli_memsys_reinit( cntx );

// Alias A, B, and C in case we need to apply transformations.
bli_obj_alias_to( *a, a_local );
bli_obj_alias_to( *b, b_local );
Expand Down
4 changes: 0 additions & 4 deletions frame/3/hemm/bli_hemm_front.c
Original file line number Diff line number Diff line change
Expand Up @@ -63,10 +63,6 @@ void bli_hemm_front
return;
}

// Reinitialize the memory allocator to accommodate the blocksizes
// in the current context.
bli_memsys_reinit( cntx );

// Alias A, B, and C in case we need to apply transformations.
bli_obj_alias_to( *a, a_local );
bli_obj_alias_to( *b, b_local );
Expand Down
4 changes: 0 additions & 4 deletions frame/3/her2k/bli_her2k_front.c
Original file line number Diff line number Diff line change
Expand Up @@ -67,10 +67,6 @@ void bli_her2k_front
return;
}

// Reinitialize the memory allocator to accommodate the blocksizes
// in the current context.
bli_memsys_reinit( cntx );

// Alias A, B, and C in case we need to apply transformations.
bli_obj_alias_to( *a, a_local );
bli_obj_alias_to( *b, b_local );
Expand Down
4 changes: 0 additions & 4 deletions frame/3/herk/bli_herk_front.c
Original file line number Diff line number Diff line change
Expand Up @@ -63,10 +63,6 @@ void bli_herk_front
return;
}

// Reinitialize the memory allocator to accommodate the blocksizes
// in the current context.
bli_memsys_reinit( cntx );

// Alias A and C in case we need to apply transformations.
bli_obj_alias_to( *a, a_local );
bli_obj_alias_to( *c, c_local );
Expand Down
4 changes: 0 additions & 4 deletions frame/3/symm/bli_symm_front.c
Original file line number Diff line number Diff line change
Expand Up @@ -63,10 +63,6 @@ void bli_symm_front
return;
}

// Reinitialize the memory allocator to accommodate the blocksizes
// in the current context.
bli_memsys_reinit( cntx );

// Alias A, B, and C in case we need to apply transformations.
bli_obj_alias_to( *a, a_local );
bli_obj_alias_to( *b, b_local );
Expand Down
4 changes: 0 additions & 4 deletions frame/3/syr2k/bli_syr2k_front.c
Original file line number Diff line number Diff line change
Expand Up @@ -64,10 +64,6 @@ void bli_syr2k_front
return;
}

// Reinitialize the memory allocator to accommodate the blocksizes
// in the current context.
bli_memsys_reinit( cntx );

// Alias A, B, and C in case we need to apply transformations.
bli_obj_alias_to( *a, a_local );
bli_obj_alias_to( *b, b_local );
Expand Down
4 changes: 0 additions & 4 deletions frame/3/syrk/bli_syrk_front.c
Original file line number Diff line number Diff line change
Expand Up @@ -61,10 +61,6 @@ void bli_syrk_front
return;
}

// Reinitialize the memory allocator to accommodate the blocksizes
// in the current context.
bli_memsys_reinit( cntx );

// Alias A and C in case we need to apply transformations.
bli_obj_alias_to( *a, a_local );
bli_obj_alias_to( *c, c_local );
Expand Down
4 changes: 0 additions & 4 deletions frame/3/trmm/bli_trmm_front.c
Original file line number Diff line number Diff line change
Expand Up @@ -61,10 +61,6 @@ void bli_trmm_front
return;
}

// Reinitialize the memory allocator to accommodate the blocksizes
// in the current context.
bli_memsys_reinit( cntx );

// Alias A and B so we can tweak the objects if necessary.
bli_obj_alias_to( *a, a_local );
bli_obj_alias_to( *b, b_local );
Expand Down
4 changes: 0 additions & 4 deletions frame/3/trmm3/bli_trmm3_front.c
Original file line number Diff line number Diff line change
Expand Up @@ -63,10 +63,6 @@ void bli_trmm3_front
return;
}

// Reinitialize the memory allocator to accommodate the blocksizes
// in the current context.
bli_memsys_reinit( cntx );

// Alias A, B, and C so we can tweak the objects if necessary.
bli_obj_alias_to( *a, a_local );
bli_obj_alias_to( *b, b_local );
Expand Down
4 changes: 0 additions & 4 deletions frame/3/trsm/bli_trsm_front.c
Original file line number Diff line number Diff line change
Expand Up @@ -61,10 +61,6 @@ void bli_trsm_front
return;
}

// Reinitialize the memory allocator to accommodate the blocksizes
// in the current context.
bli_memsys_reinit( cntx );

// Alias A and B so we can tweak the objects if necessary.
bli_obj_alias_to( *a, a_local );
bli_obj_alias_to( *b, b_local );
Expand Down
10 changes: 0 additions & 10 deletions frame/base/bli_check.c
Original file line number Diff line number Diff line change
Expand Up @@ -757,16 +757,6 @@ err_t bli_check_valid_packbuf( packbuf_t buf_type )
return e_val;
}

err_t bli_check_requested_block_size_for_pool( siz_t req_size, pool_t* pool )
{
err_t e_val = BLIS_SUCCESS;

if ( bli_pool_block_size( pool ) < req_size )
e_val = BLIS_REQUESTED_CONTIG_BLOCK_TOO_BIG;

return e_val;
}

err_t bli_check_if_exhausted_pool( pool_t* pool )
{
err_t e_val = BLIS_SUCCESS;
Expand Down
1 change: 0 additions & 1 deletion frame/base/bli_check.h
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,6 @@ err_t bli_check_packv_schema_on_unpack( obj_t* a );
err_t bli_check_object_buffer( obj_t* a );

err_t bli_check_valid_packbuf( packbuf_t buf_type );
err_t bli_check_requested_block_size_for_pool( siz_t req_size, pool_t* pool );
err_t bli_check_if_exhausted_pool( pool_t* pool );
err_t bli_check_sufficient_stack_buf_size( num_t dt, cntx_t* cntx );
err_t bli_check_alignment_is_power_of_two( size_t align_size );
Expand Down
2 changes: 0 additions & 2 deletions frame/base/bli_error.c
Original file line number Diff line number Diff line change
Expand Up @@ -149,8 +149,6 @@ void bli_error_init_msgs( void )

sprintf( bli_error_string_for_code(BLIS_INVALID_PACKBUF),
"Invalid packbuf_t value." );
sprintf( bli_error_string_for_code(BLIS_REQUESTED_CONTIG_BLOCK_TOO_BIG ),
"Attempted to allocate contiguous memory block that is too big for implementation." );
sprintf( bli_error_string_for_code(BLIS_EXHAUSTED_CONTIG_MEMORY_POOL),
"Attempted to allocate more memory from contiguous pool than is available." );
sprintf( bli_error_string_for_code(BLIS_INSUFFICIENT_STACK_BUF_SIZE),
Expand Down
8 changes: 8 additions & 0 deletions frame/base/bli_info.c
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,14 @@ gint_t bli_info_get_enable_cblas( void )
#endif
}
gint_t bli_info_get_blas2blis_int_type_size( void ) { return BLIS_BLAS2BLIS_INT_TYPE_SIZE; }
gint_t bli_info_get_enable_packbuf_pools( void )
{
#ifdef BLIS_ENABLE_PACKBUF_POOLS
return 1;
#else
return 0;
#endif
}



Expand Down
1 change: 1 addition & 0 deletions frame/base/bli_info.h
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@ gint_t bli_info_get_enable_stay_auto_init( void );
gint_t bli_info_get_enable_blas2blis( void );
gint_t bli_info_get_enable_cblas( void );
gint_t bli_info_get_blas2blis_int_type_size( void );
gint_t bli_info_get_enable_packbuf_pools( void );


// -- Kernel implementation-related --------------------------------------------
Expand Down
77 changes: 19 additions & 58 deletions frame/base/bli_membrk.c
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,9 @@ void bli_membrk_init
)
{
bli_mutex_init( bli_membrk_mutex( membrk ) );
#ifdef BLIS_ENABLE_PACKBUF_POOLS
bli_membrk_init_pools( cntx, membrk );
#endif
bli_membrk_set_malloc_fp( bli_malloc_pool, membrk );
bli_membrk_set_free_fp( bli_free_pool, membrk );
}
Expand All @@ -53,7 +55,10 @@ void bli_membrk_finalize
)
{
bli_membrk_set_malloc_fp( NULL, membrk );
bli_membrk_set_free_fp( NULL, membrk );
#ifdef BLIS_ENABLE_PACKBUF_POOLS
bli_membrk_finalize_pools( membrk );
#endif
bli_mutex_finalize( bli_membrk_mutex( membrk ) );
}

Expand All @@ -70,6 +75,13 @@ void bli_membrk_acquire_m
dim_t pi;
siz_t block_size;

// If the internal memory pools for pack buffers are disabled, we
// spoof the buffer type as BLIS_BUFFER_FOR_GEN_USE to induce the
// immediate usage of bli_membrk_malloc().
#ifndef BLIS_ENABLE_PACKBUF_POOLS
buf_type = BLIS_BUFFER_FOR_GEN_USE;
#endif

// Make sure the API is initialized.
//assert( membrk ); //??

Expand All @@ -86,11 +98,12 @@ void bli_membrk_acquire_m
// - the buffer type (a packbuf_t value),
// - the size of the requested region,
// - the membrk_t from which the mem_t entry was acquired.
// NOTE: We do not initialize the pool field since this block did not
// NOTE: We initialize the pool field to NULL since this block did not
// come from a memory pool.
bli_mem_set_buffer( buf_sys, mem );
bli_mem_set_buf_sys( buf_sys, mem );
bli_mem_set_buf_type( buf_type, mem );
bli_mem_set_pool( NULL, mem );
bli_mem_set_size( req_size, mem );
bli_mem_set_membrk( membrk, mem );
}
Expand All @@ -105,37 +118,28 @@ void bli_membrk_acquire_m
pi = bli_packbuf_index( buf_type );
pool = bli_membrk_pool( pi, membrk );

// Unconditionally perform error checking on the memory pool.
{
err_t e_val;

// Make sure that the requested matrix size fits inside of a block
// of the corresponding pool. If it does not, the pool was somehow
// initialized improperly.
e_val = bli_check_requested_block_size_for_pool( req_size, pool );
bli_check_error_code( e_val );
}

// Extract the address of the pblk_t struct within the mem_t.
pblk = bli_mem_pblk( mem );

// BEGIN CRITICAL SECTION
bli_membrk_lock( membrk );
{

// Checkout a block from the pool. If the pool is exhausted,
// Checkout a block from the pool. If the pool's blocks are too
// small, it will be reinitialized with blocks large enough to
// accommodate the requested block size. If the pool is exhausted,
// either because it is still empty or because all blocks have
// been checked out already, additional blocks will be allocated
// automatically, as-needed. Note that the addresses are stored
// directly into the mem_t struct since pblk is the address of
// the struct's pblk_t field.
bli_pool_checkout_block( pblk, pool );
bli_pool_checkout_block( req_size, pblk, pool );

// Query the size of the blocks in the pool so we can store it in
// the mem_t object. At this point, it is guaranteed to be at
// least as large as req_size. (NOTE: We must perform the query
// within the critical section to ensure that the pool hasn't
// changed, as unlikely as that would be.)
// changed.)
block_size = bli_pool_block_size( pool );

}
Expand Down Expand Up @@ -329,49 +333,6 @@ void bli_membrk_init_pools
bli_pool_init( num_blocks_c, block_size_c, align_size, pool_c );
}

void bli_membrk_reinit_pools
(
cntx_t* cntx,
membrk_t* membrk
)
{
// Map each of the packbuf_t values to an index starting at zero.
const dim_t index_a = bli_packbuf_index( BLIS_BUFFER_FOR_A_BLOCK );
const dim_t index_b = bli_packbuf_index( BLIS_BUFFER_FOR_B_PANEL );
const dim_t index_c = bli_packbuf_index( BLIS_BUFFER_FOR_C_PANEL );

const siz_t align_size = BLIS_POOL_ADDR_ALIGN_SIZE;

// Alias the pool addresses to convenient identifiers.
pool_t* pool_a = bli_membrk_pool( index_a, membrk );
pool_t* pool_b = bli_membrk_pool( index_b, membrk );
pool_t* pool_c = bli_membrk_pool( index_c, membrk );

// Query the number of blocks currently allocated in each pool.
const dim_t num_blocks_a = bli_pool_num_blocks( pool_a );
const dim_t num_blocks_b = bli_pool_num_blocks( pool_b );
const dim_t num_blocks_c = bli_pool_num_blocks( pool_c );

siz_t block_size_a_new = 0;
siz_t block_size_b_new = 0;
siz_t block_size_c_new = 0;

// Determine the context-implied block size needed for each pool.
bli_membrk_compute_pool_block_sizes( &block_size_a_new,
&block_size_b_new,
&block_size_c_new,
cntx );

// Reinitialize the pool, but only if one of the parameters has
// changed in such a way that reinitialization would be required.
// In this case, the align_size is constant, as is num_blocks, so
// what this actually boils down to is that reinitialization of a
// pool occurs only if the block size for that pool has increased.
bli_pool_reinit_if( num_blocks_a, block_size_a_new, align_size, pool_a );
bli_pool_reinit_if( num_blocks_b, block_size_b_new, align_size, pool_b );
bli_pool_reinit_if( num_blocks_c, block_size_c_new, align_size, pool_c );
}

void bli_membrk_finalize_pools
(
membrk_t* membrk
Expand Down
5 changes: 0 additions & 5 deletions frame/base/bli_membrk.h
Original file line number Diff line number Diff line change
Expand Up @@ -140,11 +140,6 @@ void bli_membrk_init_pools
cntx_t* cntx,
membrk_t* membrk
);
void bli_membrk_reinit_pools
(
cntx_t* cntx,
membrk_t* membrk
);
void bli_membrk_finalize_pools
(
membrk_t* membrk
Expand Down
Loading

0 comments on commit 9804adf

Please sign in to comment.