Skip to content

Commit

Permalink
Merge pull request #250 from timmoon10/blas1-optimization-dev
Browse files Browse the repository at this point in the history
Parallelizing copy and zero routines with OpenMP.
  • Loading branch information
poulson committed Nov 29, 2017
2 parents 50e24bb + 0da3f6a commit 10efdea
Show file tree
Hide file tree
Showing 2 changed files with 30 additions and 9 deletions.
26 changes: 19 additions & 7 deletions include/El/blas_like/level1/Copy.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -24,17 +24,29 @@ void Copy( const Matrix<T>& A, Matrix<T>& B )
B.Resize( height, width );
const Int ldA = A.LDim();
const Int ldB = B.LDim();
const T* ABuf = A.LockedBuffer();
T* BBuf = B.Buffer();
const T* EL_RESTRICT ABuf = A.LockedBuffer();
T* EL_RESTRICT BBuf = B.Buffer();

// Copy all entries if memory is contiguous. Otherwise copy each
// column.
if( ldA == height && ldB == height ) {
if( ldA == height && ldB == height )
{
#ifdef _OPENMP
// Manually copy entries with OpenMP loop
// Note: We attempt to map entries of B to NUMA domains in a
// pattern convenient for future OpenMP loops.
EL_PARALLEL_FOR
for( Int i=0; i<height*width; ++i )
{
BBuf[i] = ABuf[i];
}
#else
MemCopy( BBuf, ABuf, height*width );
#endif
}
else {
else
{
EL_PARALLEL_FOR
for( Int j=0; j<width; ++j ) {
for( Int j=0; j<width; ++j )
{
MemCopy(&BBuf[j*ldB], &ABuf[j*ldA], height);
}
}
Expand Down
13 changes: 11 additions & 2 deletions include/El/blas_like/level1/Zero.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,11 +20,20 @@ void Zero( Matrix<T>& A )
const Int ALDim = A.LDim();
T* ABuf = A.Buffer();

// Zero out all entries if memory is contiguous. Otherwise zero
// out each column.
if( ALDim == height )
{
#ifdef _OPENMP
// Manually set entries with OpenMP loop
// Note: We attempt to map entries of A to NUMA domains in a
// pattern convenient for future OpenMP loops.
EL_PARALLEL_FOR
for( Int i=0; i<height*width; ++i )
{
ABuf[i] = T(0);
}
#else
MemZero( ABuf, height*width );
#endif
}
else
{
Expand Down

0 comments on commit 10efdea

Please sign in to comment.