Skip to content

Commit

Permalink
[OpenMP 4.5] Remove superfluous map clauses
Browse files Browse the repository at this point in the history
Fixes #41.
OpenMP 4.5 does not require the map clauses on the target region
if the data has been previously defined using unstructured data
enter/exit clauses. Removing this clauses works fine with the
Clang compiler, however we noticed issues with the Cray compiler.
The issue is that the Cray compiler does not block the target
region "kernel calls" and so the timing is incorrect. This was
not noticed before due to the presence of the map clauses.
For now, we have had to add an update from clause of a scalar
value to ensure that the kenel blocks. It is hoped that we can
remove this in due course. But in the vein of showing how the
models work we want to keep the minimum required correct code
(which is without the map clause) but need the code to also
work correctly.
  • Loading branch information
tomdeakin committed Feb 7, 2018
1 parent cfb8901 commit 73f1220
Showing 1 changed file with 30 additions and 5 deletions.
35 changes: 30 additions & 5 deletions OMPStream.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ void OMPStream<T>::init_arrays(T initA, T initB, T initC)
T *a = this->a;
T *b = this->b;
T *c = this->c;
#pragma omp target teams distribute parallel for simd map(to: a[0:array_size], b[0:array_size], c[0:array_size])
#pragma omp target teams distribute parallel for simd
#else
#pragma omp parallel for
#endif
Expand All @@ -68,6 +68,11 @@ void OMPStream<T>::init_arrays(T initA, T initB, T initC)
b[i] = initB;
c[i] = initC;
}
#if defined(OMP_TARGET_GPU) && defined(_CRAYC)
// If using the Cray compiler, the kernels do not block, so this update forces
// a small copy to ensure blocking so that timing is correct
#pragma omp target update from(a[0:0])
#endif
}

template <class T>
Expand Down Expand Up @@ -97,14 +102,19 @@ void OMPStream<T>::copy()
unsigned int array_size = this->array_size;
T *a = this->a;
T *c = this->c;
#pragma omp target teams distribute parallel for simd map(to: a[0:array_size], c[0:array_size])
#pragma omp target teams distribute parallel for simd
#else
#pragma omp parallel for
#endif
for (int i = 0; i < array_size; i++)
{
c[i] = a[i];
}
#if defined(OMP_TARGET_GPU) && defined(_CRAYC)
// If using the Cray compiler, the kernels do not block, so this update forces
// a small copy to ensure blocking so that timing is correct
#pragma omp target update from(a[0:0])
#endif
}

template <class T>
Expand All @@ -116,14 +126,19 @@ void OMPStream<T>::mul()
unsigned int array_size = this->array_size;
T *b = this->b;
T *c = this->c;
#pragma omp target teams distribute parallel for simd map(to: b[0:array_size], c[0:array_size])
#pragma omp target teams distribute parallel for simd
#else
#pragma omp parallel for
#endif
for (int i = 0; i < array_size; i++)
{
b[i] = scalar * c[i];
}
#if defined(OMP_TARGET_GPU) && defined(_CRAYC)
// If using the Cray compiler, the kernels do not block, so this update forces
// a small copy to ensure blocking so that timing is correct
#pragma omp target update from(c[0:0])
#endif
}

template <class T>
Expand All @@ -134,14 +149,19 @@ void OMPStream<T>::add()
T *a = this->a;
T *b = this->b;
T *c = this->c;
#pragma omp target teams distribute parallel for simd map(to: a[0:array_size], b[0:array_size], c[0:array_size])
#pragma omp target teams distribute parallel for simd
#else
#pragma omp parallel for
#endif
for (int i = 0; i < array_size; i++)
{
c[i] = a[i] + b[i];
}
#if defined(OMP_TARGET_GPU) && defined(_CRAYC)
// If using the Cray compiler, the kernels do not block, so this update forces
// a small copy to ensure blocking so that timing is correct
#pragma omp target update from(a[0:0])
#endif
}

template <class T>
Expand All @@ -154,14 +174,19 @@ void OMPStream<T>::triad()
T *a = this->a;
T *b = this->b;
T *c = this->c;
#pragma omp target teams distribute parallel for simd map(to: a[0:array_size], b[0:array_size], c[0:array_size])
#pragma omp target teams distribute parallel for simd
#else
#pragma omp parallel for
#endif
for (int i = 0; i < array_size; i++)
{
a[i] = b[i] + scalar * c[i];
}
#if defined(OMP_TARGET_GPU) && defined(_CRAYC)
// If using the Cray compiler, the kernels do not block, so this update forces
// a small copy to ensure blocking so that timing is correct
#pragma omp target update from(a[0:0])
#endif
}

template <class T>
Expand Down

0 comments on commit 73f1220

Please sign in to comment.