Skip to content

Commit

Permalink
MIC: move reductions from VectorHelper to Detail
Browse files Browse the repository at this point in the history
Signed-off-by: Matthias Kretz <kretz@kde.org>
  • Loading branch information
mattkretz committed Feb 24, 2016
1 parent 720f0e0 commit 53a778c
Show file tree
Hide file tree
Showing 3 changed files with 81 additions and 26 deletions.
76 changes: 76 additions & 0 deletions mic/detail.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
/* This file is part of the Vc library. {{{
Copyright © 2016 Matthias Kretz <kretz@kde.org>
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the names of contributing organizations nor the
names of its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE FOR ANY
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
}}}*/

#ifndef VC_MIC_DETAIL_H_
#define VC_MIC_DETAIL_H_

#include "macros.h"

namespace Vc_VERSIONED_NAMESPACE
{
namespace Detail
{
// horizontal add{{{1
Vc_INTRINSIC float add(__m512 a, float) { return _mm512_reduce_add_ps(a); }
Vc_INTRINSIC double add(__m512d a, double) { return _mm512_reduce_add_pd(a); }
Vc_INTRINSIC int add(__m512i a, int) { return _mm512_reduce_add_epi32(a); }
Vc_INTRINSIC uint add(__m512i a, uint) { return _mm512_reduce_add_epi32(a); }
Vc_INTRINSIC short add(__m512i a, short) { return _mm512_reduce_add_epi32(a); }
Vc_INTRINSIC ushort add(__m512i a, ushort) { return _mm512_reduce_add_epi32(a); }

// horizontal mul{{{1
Vc_INTRINSIC float mul(__m512 a, float) { return _mm512_reduce_mul_ps(a); }
Vc_INTRINSIC double mul(__m512d a, double) { return _mm512_reduce_mul_pd(a); }
Vc_INTRINSIC int mul(__m512i a, int) { return _mm512_reduce_mul_epi32(a); }
Vc_INTRINSIC uint mul(__m512i a, uint) { return _mm512_reduce_mul_epi32(a); }
Vc_INTRINSIC short mul(__m512i a, short) { return _mm512_reduce_mul_epi32(a); }
Vc_INTRINSIC ushort mul(__m512i a, ushort) { return _mm512_reduce_mul_epi32(a); }

// horizontal min{{{1
Vc_INTRINSIC float min(__m512 a, float) { return _mm512_reduce_min_ps(a); }
Vc_INTRINSIC double min(__m512d a, double) { return _mm512_reduce_min_pd(a); }
Vc_INTRINSIC int min(__m512i a, int) { return _mm512_reduce_min_epi32(a); }
Vc_INTRINSIC uint min(__m512i a, uint) { return _mm512_reduce_min_epi32(a); }
Vc_INTRINSIC short min(__m512i a, short) { return _mm512_reduce_min_epi32(a); }
Vc_INTRINSIC ushort min(__m512i a, ushort) { return _mm512_reduce_min_epi32(a); }

// horizontal max{{{1
Vc_INTRINSIC float max(__m512 a, float) { return _mm512_reduce_max_ps(a); }
Vc_INTRINSIC double max(__m512d a, double) { return _mm512_reduce_max_pd(a); }
Vc_INTRINSIC int max(__m512i a, int) { return _mm512_reduce_max_epi32(a); }
Vc_INTRINSIC uint max(__m512i a, uint) { return _mm512_reduce_max_epi32(a); }
Vc_INTRINSIC short max(__m512i a, short) { return _mm512_reduce_max_epi32(a); }
Vc_INTRINSIC ushort max(__m512i a, ushort) { return _mm512_reduce_max_epi32(a); }

//}}}1
} // namespace Detail
} // namespace Vc

#endif // VC_MIC_DETAIL_H_

// vim: foldmethod=marker
9 changes: 5 additions & 4 deletions mic/vector.h
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include "writemaskedvector.h"
#include "sorthelper.h"
#include "../common/where.h"
#include "detail.h"
#include "macros.h"

#ifdef isfinite
Expand Down Expand Up @@ -253,10 +254,10 @@ class Vector<T, VectorAbi::Mic> : public MIC::StoreMixin<MIC::Vector<T>, T>
return MIC::WriteMaskedVector<T>(this, k);
}

inline EntryType min() const { return HT::reduce_min(d.v()); }
inline EntryType max() const { return HT::reduce_max(d.v()); }
inline EntryType product() const { return HT::reduce_mul(d.v()); }
inline EntryType sum() const { return HT::reduce_add(d.v()); }
Vc_ALWAYS_INLINE EntryType min() const { return Detail::min(d.v(), EntryType()); }
Vc_ALWAYS_INLINE EntryType max() const { return Detail::max(d.v(), EntryType()); }
Vc_ALWAYS_INLINE EntryType product() const { return Detail::mul(d.v(), EntryType()); }
Vc_ALWAYS_INLINE EntryType sum() const { return Detail::add(d.v(), EntryType()); }
Vc_ALWAYS_INLINE_L Vector partialSum() const Vc_ALWAYS_INLINE_R;
inline EntryType min(MaskArgument m) const;
inline EntryType max(MaskArgument m) const;
Expand Down
22 changes: 0 additions & 22 deletions mic/vectorhelper.h
Original file line number Diff line number Diff line change
Expand Up @@ -71,14 +71,8 @@ template<> struct VectorHelper<double> {
typedef double EntryType;
typedef __m512d VectorType;
#define Vc_SUFFIX pd
static Vc_INTRINSIC VectorType zero() { return Vc_CAT2(_mm512_setzero_, Vc_SUFFIX)(); }
static Vc_INTRINSIC VectorType set(EntryType x) { return Vc_CAT2(_mm512_set_1to8_, Vc_SUFFIX)(x); }

static Vc_INTRINSIC EntryType reduce_max(const VectorType &a) { return _mm512_reduce_max_pd(a); }
static Vc_INTRINSIC EntryType reduce_min(const VectorType &a) { return _mm512_reduce_min_pd(a); }
static Vc_INTRINSIC EntryType reduce_mul(const VectorType &a) { return _mm512_reduce_mul_pd(a); }
static Vc_INTRINSIC EntryType reduce_add(const VectorType &a) { return _mm512_reduce_add_pd(a); }

static Vc_INTRINSIC VectorType abs(VectorType a) {
const __m512i absMask = _mm512_set_4to16_epi32(0xffffffff, 0x7fffffff, 0xffffffff, 0x7fffffff);
return mic_cast<VectorType>(_mm512_and_epi32(mic_cast<__m512i>(a), absMask));
Expand All @@ -101,14 +95,8 @@ template<> struct VectorHelper<float> {
typedef float EntryType;
typedef __m512 VectorType;
#define Vc_SUFFIX ps
static Vc_INTRINSIC VectorType zero() { return Vc_CAT2(_mm512_setzero_, Vc_SUFFIX)(); }
static Vc_INTRINSIC VectorType set(EntryType x) { return Vc_CAT2(_mm512_set_1to16_, Vc_SUFFIX)(x); }

static Vc_INTRINSIC EntryType reduce_max(const VectorType &a) { return _mm512_reduce_max_ps(a); }
static Vc_INTRINSIC EntryType reduce_min(const VectorType &a) { return _mm512_reduce_min_ps(a); }
static Vc_INTRINSIC EntryType reduce_mul(const VectorType &a) { return _mm512_reduce_mul_ps(a); }
static Vc_INTRINSIC EntryType reduce_add(const VectorType &a) { return _mm512_reduce_add_ps(a); }

static Vc_INTRINSIC VectorType abs(VectorType a) {
const __m512i absMask = _mm512_set_1to16_epi32(0x7fffffff);
return mic_cast<VectorType>(_mm512_and_epi32(mic_cast<__m512i>(a), absMask));
Expand All @@ -133,11 +121,6 @@ template<> struct VectorHelper<int> {
#define Vc_SUFFIX epi32
static Vc_INTRINSIC VectorType set(EntryType x) { return Vc_CAT2(_mm512_set_1to16_, Vc_SUFFIX)(x); }

static Vc_INTRINSIC EntryType reduce_max(const VectorType &a) { return _mm512_reduce_max_epi32(a); }
static Vc_INTRINSIC EntryType reduce_min(const VectorType &a) { return _mm512_reduce_min_epi32(a); }
static Vc_INTRINSIC EntryType reduce_mul(const VectorType &a) { return _mm512_reduce_mul_epi32(a); }
static Vc_INTRINSIC EntryType reduce_add(const VectorType &a) { return _mm512_reduce_add_epi32(a); }

static Vc_INTRINSIC VectorType abs(VectorType a) {
VectorType zero = mic_cast<VectorType>(_mm512_setzero());
const VectorType minusOne = _mm512_set_1to16_epi32( -1 );
Expand All @@ -154,11 +137,6 @@ template<> struct VectorHelper<unsigned int> {
typedef unsigned int EntryType;
typedef __m512i VectorType;
#define Vc_SUFFIX epu32
static Vc_INTRINSIC EntryType reduce_max(const VectorType &a) { return _mm512_reduce_max_epi32(a); }
static Vc_INTRINSIC EntryType reduce_min(const VectorType &a) { return _mm512_reduce_min_epi32(a); }
static Vc_INTRINSIC EntryType reduce_mul(const VectorType &a) { return _mm512_reduce_mul_epi32(a); }
static Vc_INTRINSIC EntryType reduce_add(const VectorType &a) { return _mm512_reduce_add_epi32(a); }

Vc_OP(max) Vc_OP(min)
#undef Vc_SUFFIX
#define Vc_SUFFIX epi32
Expand Down

0 comments on commit 53a778c

Please sign in to comment.