Skip to content

Commit

Permalink
Shyrma multiply bp (#6008)
Browse files Browse the repository at this point in the history
* implement refine multiply ff and implement multiply bp

* testing multiply_bp

* minor changes in pragma

* use broadcast_apply instead of NDArray::operator*() method

* use block comment

* make use of tile method in multiply_bp op

* get rid of using temporary arrays
  • Loading branch information
Yurii authored and raver119 committed Aug 1, 2018
1 parent 603a6e0 commit d01f7dd
Show file tree
Hide file tree
Showing 32 changed files with 402 additions and 136 deletions.
9 changes: 6 additions & 3 deletions libnd4j/blas/NDArray.h
Expand Up @@ -698,7 +698,7 @@ namespace nd4j {
void tilei(const std::vector<Nd4jLong>& repeats);

/**
* returns new array which is created by by repeating of this array the number of times given by reps
* returns new array which is created by repeating of this array the number of times given by reps
* repeats - contains numbers of repetitions
*/
NDArray<T> tile(const std::vector<Nd4jLong>& repeats) const;
Expand Down Expand Up @@ -974,7 +974,7 @@ namespace nd4j {
* change an array by repeating it the number of times in order to acquire new shape equal to the input shape
*
* shape - contains new shape to broadcast array to
* target - optional argument, if target != nullptr the resulting array will be placed it target, in opposite case tile operation is done in place
* target - optional argument, if target != nullptr the resulting array will be placed in target, in opposite case tile operation is done in place
*/
void tileToShape(const std::vector<Nd4jLong>& shape, NDArray<T>* target = nullptr);
void tileToShape(const std::initializer_list<Nd4jLong>& shape, NDArray<T>* target = nullptr);
Expand Down Expand Up @@ -1002,6 +1002,9 @@ namespace nd4j {

ResultSet<T>* allExamples()const ;

template <typename OpName>
void saveResultOfBroadcast(const NDArray<T>& x, const NDArray<T>& y, const bool checkThisShape = false);

/**
* default destructor
*/
Expand All @@ -1025,7 +1028,7 @@ namespace nd4j {
/**
* returns the value of "dim" dimension
*/
Nd4jLong sizeAt(int dim) const;
Nd4jLong sizeAt(const int dim) const;

/**
* returns order of array
Expand Down
6 changes: 3 additions & 3 deletions libnd4j/blas/cpu/NDArray.cpp
Expand Up @@ -2064,7 +2064,7 @@ void NDArray<T>::tile(NDArray<T>& target) const {

//////////////////////////////////////////////////////////////////////////
template<typename T>
Nd4jLong NDArray<T>::sizeAt(int dim) const {
Nd4jLong NDArray<T>::sizeAt(const int dim) const {
if (dim >= this->rankOf() || dim < -this->rankOf())
throw std::runtime_error("Bad size index requested");

Expand Down Expand Up @@ -2390,7 +2390,7 @@ void NDArray<T>::applyTrueBroadcast(const NDArray<T>* other, NDArray<T>* target,
delete[] newShapeInfo;
}

// check whether min array have to be tiled
// check whether min array has to be tiled
if(!max->isSameShape(target)) {
// evaluate repeating dimensions for tile operation
std::vector<Nd4jLong> repeatMax(max->rankOf());
Expand All @@ -2401,7 +2401,7 @@ void NDArray<T>::applyTrueBroadcast(const NDArray<T>* other, NDArray<T>* target,
else
target->assign(max);

// check whether min array have to be tiled
// check whether min array has to be tiled
std::vector<Nd4jLong> repeatMin(min->rankOf());
int product = 1;
for(int i = min->rankOf(); i >=1 ; --i) {
Expand Down
2 changes: 1 addition & 1 deletion libnd4j/include/helpers/ShapeUtils.h
Expand Up @@ -95,7 +95,7 @@ namespace nd4j {
// evaluate shapeInfo for diagonal array which is made using input arr elements as diagonal
static Nd4jLong* evalDiagShapeInfo(const Nd4jLong* shapeInfo, nd4j::memory::Workspace* workspace);

static std::vector<int> evalBroadcastBackwardAxis(Nd4jLong *operand, Nd4jLong *result);
static std::vector<int> evalBroadcastBackwardAxis(const Nd4jLong *operand, const Nd4jLong *result);

// utility to calculate matrix product shape with give source shapes and additional params
// returns ShapeList pointer with result shape
Expand Down
47 changes: 10 additions & 37 deletions libnd4j/include/helpers/impl/ShapeUtils.cpp
Expand Up @@ -423,7 +423,7 @@ bool ShapeUtils<T>::evalBroadcastShapeInfo(const NDArray<T> &max, const NDArray<
}

template <typename T>
bool ShapeUtils<T>::evalBroadcastShapeInfo(Nd4jLong *max, Nd4jLong*min, const bool evalMinMax, Nd4jLong*& resultShapeInfo, nd4j::memory::Workspace* workspace) {
bool ShapeUtils<T>::evalBroadcastShapeInfo(Nd4jLong *max, Nd4jLong *min, const bool evalMinMax, Nd4jLong*& resultShapeInfo, nd4j::memory::Workspace* workspace) {

if ((shape::rank(max) == 0 && shape::isScalar(min))) {
// X is the driver here
Expand Down Expand Up @@ -699,44 +699,17 @@ Nd4jLong* ShapeUtils<T>::evalDiagShapeInfo(const Nd4jLong* shapeInfoConst, nd4j:
}

template<typename T>
std::vector<int> ShapeUtils<T>::evalBroadcastBackwardAxis(Nd4jLong *operand, Nd4jLong *result) {
const int xRank = shape::rank(operand);
const int zRank = shape::rank(result);
std::vector<int> ShapeUtils<T>::evalBroadcastBackwardAxis(const Nd4jLong *operandShapeInfo, const Nd4jLong *resultShapeInfo) {

// rRank >= oRank always !!
const int oRank = shape::rank(operandShapeInfo);
const int rRank = shape::rank(resultShapeInfo);
const int diff = rRank - oRank;
std::vector<int> axis;

auto xShape = shape::shapeOf(operand);
auto zShape = shape::shapeOf(result);

int minRank = nd4j::math::nd4j_min<int>(xRank, zRank);
int maxRank = nd4j::math::nd4j_max<int>(xRank, zRank);

if (xRank == zRank) {
for (int e = -1; e >= -minRank; e--) {
int o = shape::sizeAt(operand, e);
int r = shape::sizeAt(result, e);

if (o != r)
axis.emplace_back(e + maxRank);
}
} else if (xRank < zRank) {
for (int e = -1; e > -minRank; e--) {
int o = shape::sizeAt(operand, e);
int r = shape::sizeAt(result, e);

if (o != r)
axis.emplace_back(e + maxRank);
}

// adding inner dimensions
for (int e = 0; e < zRank - xRank; e++)
axis.emplace_back(e);
} else {
// this isn't possible
}

// FIXME: eventually we'd like to get rid of sort
if (axis.size() > 1)
std::sort(axis.begin(), axis.end());
for(int i = 0; i < rRank; ++i)
if(i < diff || shape::sizeAt(operandShapeInfo, i - diff) != shape::sizeAt(resultShapeInfo, i))
axis.push_back(i);

return axis;
}
Expand Down
8 changes: 4 additions & 4 deletions libnd4j/include/helpers/shape.h
Expand Up @@ -107,7 +107,7 @@ namespace shape {

ND4J_EXPORT _CUDA_HD bool equalsStrict(Nd4jLong *shapeA, Nd4jLong *shapeB);

ND4J_EXPORT _CUDA_HD int sizeAt(Nd4jLong *shape, int dim);
ND4J_EXPORT _CUDA_HD int sizeAt(const Nd4jLong *shape, const int dim);

template <typename T>
ND4J_EXPORT _CUDA_HD void fill(T* buffer, T value, Nd4jLong length);
Expand Down Expand Up @@ -438,7 +438,7 @@ namespace shape {
* Returns the rank portion of
* an information buffer
*/
ND4J_EXPORT _CUDA_HD int rank( Nd4jLong *buffer);
ND4J_EXPORT _CUDA_HD int rank(const Nd4jLong *buffer);

/**
* Converts a raw int buffer of the layout:
Expand Down Expand Up @@ -2594,7 +2594,7 @@ template <typename T>
* Returns the rank portion of
* an information buffer
*/
INLINEDEF _CUDA_HD int rank( Nd4jLong *buffer) {
INLINEDEF _CUDA_HD int rank(const Nd4jLong *buffer) {
return static_cast<int>(buffer[0]);
}

Expand Down Expand Up @@ -2977,7 +2977,7 @@ template <typename T>
return true;
}

INLINEDEF _CUDA_HD int sizeAt(Nd4jLong *shape, int dim) {
INLINEDEF _CUDA_HD int sizeAt(const Nd4jLong *shape, const int dim) {
if (dim >= 0)
return shape[1+dim];
else
Expand Down
Expand Up @@ -15,7 +15,7 @@
******************************************************************************/

//
// @author Yurii Shyrma, created on 24.07.2018
// @author Yurii Shyrma (iuriish@yahoo.com), created on 24.07.2018
//


Expand Down
Expand Up @@ -31,7 +31,7 @@ namespace nd4j {
auto y = INPUT_VARIABLE(1);
auto z = OUTPUT_VARIABLE(0);

auto tZ = BroadcastHelper<T>::template broadcast_apply<simdOps::Add<T>>(x, y, z);
auto tZ = BroadcastHelper<T>::template broadcastApply<simdOps::Add<T>>(x, y, z);
if (tZ == nullptr)
return ND4J_STATUS_KERNEL_FAILURE;
else if (tZ != z)
Expand Down
Expand Up @@ -31,7 +31,7 @@ namespace nd4j {
NDArray<T> *y = INPUT_VARIABLE(1);
NDArray<T> *z = OUTPUT_VARIABLE(0);

auto tZ = BroadcastHelper<T>::template broadcast_apply<simdOps::Copy<T>>(x, y, z);
auto tZ = BroadcastHelper<T>::template broadcastApply<simdOps::Copy<T>>(x, y, z);
if (tZ == nullptr)
return ND4J_STATUS_KERNEL_FAILURE;
else if (tZ != z) {
Expand Down
Expand Up @@ -30,7 +30,7 @@ namespace nd4j {
auto y = INPUT_VARIABLE(1);
auto z = OUTPUT_VARIABLE(0);

auto tZ = BroadcastHelper<T>::template broadcast_apply<simdOps::And<T>>(x, y, z);
auto tZ = BroadcastHelper<T>::template broadcastApply<simdOps::And<T>>(x, y, z);
if (tZ == nullptr)
return ND4J_STATUS_KERNEL_FAILURE;
else if (tZ != z)
Expand Down
Expand Up @@ -30,7 +30,7 @@ namespace nd4j {
auto y = INPUT_VARIABLE(1);
auto z = OUTPUT_VARIABLE(0);

auto tZ = BroadcastHelper<T>::template broadcast_apply<simdOps::Or<T>>(x, y, z);
auto tZ = BroadcastHelper<T>::template broadcastApply<simdOps::Or<T>>(x, y, z);
if (tZ == nullptr)
return ND4J_STATUS_KERNEL_FAILURE;
else if (tZ != z)
Expand Down
Expand Up @@ -31,7 +31,7 @@ namespace nd4j {
NDArray<T> *y = INPUT_VARIABLE(1);
NDArray<T> *z = this->getZ(block);

auto tZ = BroadcastHelper<T>::template broadcast_apply<simdOps::Divide<T>>(x, y, z);
auto tZ = BroadcastHelper<T>::template broadcastApply<simdOps::Divide<T>>(x, y, z);
if (tZ == nullptr)
return ND4J_STATUS_KERNEL_FAILURE;
else if (tZ != z) {
Expand Down
Expand Up @@ -27,7 +27,7 @@ namespace nd4j {
NDArray<T> *y = INPUT_VARIABLE(1);
NDArray<T> *z = OUTPUT_VARIABLE(0);

auto tZ = BroadcastHelper<T>::template broadcast_apply<simdOps::EqualTo<T>>(x, y, z);
auto tZ = BroadcastHelper<T>::template broadcastApply<simdOps::EqualTo<T>>(x, y, z);
if (tZ == nullptr)
return ND4J_STATUS_KERNEL_FAILURE;
else if (tZ != z) {
Expand Down
Expand Up @@ -31,7 +31,7 @@ namespace nd4j {
NDArray<T> *y = INPUT_VARIABLE(1);
NDArray<T> *z = OUTPUT_VARIABLE(0);

auto tZ = BroadcastHelper<T>::template broadcast_apply<simdOps::FloorDiv<T>>(x, y, z);
auto tZ = BroadcastHelper<T>::template broadcastApply<simdOps::FloorDiv<T>>(x, y, z);
if (tZ == nullptr)
return ND4J_STATUS_KERNEL_FAILURE;
else if (tZ != z) {
Expand Down
Expand Up @@ -31,7 +31,7 @@ namespace nd4j {
NDArray<T> *y = INPUT_VARIABLE(1);
NDArray<T> *z = this->getZ(block);

auto tZ = BroadcastHelper<T>::template broadcast_apply<simdOps::FloorMod<T>>(x, y, z);
auto tZ = BroadcastHelper<T>::template broadcastApply<simdOps::FloorMod<T>>(x, y, z);
if (tZ == nullptr)
return ND4J_STATUS_KERNEL_FAILURE;
else if (tZ != z) {
Expand Down
Expand Up @@ -28,7 +28,7 @@ namespace nd4j {
NDArray<T> *y = INPUT_VARIABLE(1);
NDArray<T> *z = OUTPUT_VARIABLE(0);

auto tZ = BroadcastHelper<T>::template broadcast_apply<simdOps::GreaterThan<T>>(x, y, z);
auto tZ = BroadcastHelper<T>::template broadcastApply<simdOps::GreaterThan<T>>(x, y, z);
if (tZ == nullptr)
return ND4J_STATUS_KERNEL_FAILURE;
else if (tZ != z) {
Expand Down
Expand Up @@ -27,7 +27,7 @@ namespace nd4j {
NDArray<T> *y = INPUT_VARIABLE(1);
NDArray<T> *z = OUTPUT_VARIABLE(0);

auto tZ = BroadcastHelper<T>::template broadcast_apply<simdOps::GreaterThanOrEqual<T>>(x, y, z);
auto tZ = BroadcastHelper<T>::template broadcastApply<simdOps::GreaterThanOrEqual<T>>(x, y, z);
if (tZ == nullptr)
return ND4J_STATUS_KERNEL_FAILURE;
else if (tZ != z) {
Expand Down
Expand Up @@ -27,7 +27,7 @@ namespace nd4j {
NDArray<T> *y = INPUT_VARIABLE(1);
NDArray<T> *z = OUTPUT_VARIABLE(0);

auto tZ = BroadcastHelper<T>::template broadcast_apply<simdOps::LessThan<T>>(x, y, z);
auto tZ = BroadcastHelper<T>::template broadcastApply<simdOps::LessThan<T>>(x, y, z);
if (tZ == nullptr)
return ND4J_STATUS_KERNEL_FAILURE;
else if (tZ != z) {
Expand Down
Expand Up @@ -27,7 +27,7 @@ namespace nd4j {
NDArray<T> *y = INPUT_VARIABLE(1);
NDArray<T> *z = OUTPUT_VARIABLE(0);

auto tZ = BroadcastHelper<T>::template broadcast_apply<simdOps::LessThanOrEqual<T>>(x, y, z);
auto tZ = BroadcastHelper<T>::template broadcastApply<simdOps::LessThanOrEqual<T>>(x, y, z);
if (tZ == nullptr)
return ND4J_STATUS_KERNEL_FAILURE;
else if (tZ != z) {
Expand Down
Expand Up @@ -32,7 +32,7 @@ namespace nd4j {

auto z = OUTPUT_VARIABLE(0);

auto tZ = BroadcastHelper<T>::template broadcast_apply<simdOps::Max<T>>(x, y, z);
auto tZ = BroadcastHelper<T>::template broadcastApply<simdOps::Max<T>>(x, y, z);
if (tZ == nullptr)
return ND4J_STATUS_KERNEL_FAILURE;
else if (tZ != z) {
Expand Down
Expand Up @@ -32,7 +32,7 @@ namespace nd4j {

auto z = OUTPUT_VARIABLE(0);

auto tZ = BroadcastHelper<T>::template broadcast_apply<simdOps::Min<T>>(x, y, z);
auto tZ = BroadcastHelper<T>::template broadcastApply<simdOps::Min<T>>(x, y, z);
if (tZ == nullptr)
return ND4J_STATUS_KERNEL_FAILURE;
else if (tZ != z) {
Expand Down
Expand Up @@ -31,7 +31,7 @@ namespace nd4j {
NDArray<T> *y = INPUT_VARIABLE(1);
NDArray<T> *z = this->getZ(block);

auto tZ = BroadcastHelper<T>::template broadcast_apply<simdOps::Mod<T>>(x, y, z);
auto tZ = BroadcastHelper<T>::template broadcastApply<simdOps::Mod<T>>(x, y, z);
if (tZ == nullptr)
return ND4J_STATUS_KERNEL_FAILURE;
else if (tZ != z) {
Expand Down

0 comments on commit d01f7dd

Please sign in to comment.