Skip to content

Commit

Permalink
Shyrma pad (#6443)
Browse files Browse the repository at this point in the history
* rewrite pad op

* add more tests for pad op

* testing and fixing bugs in new pad op

* start to write batchnorm_new

* finish implementation of batchnorm_new and testing it
  • Loading branch information
Yurii authored and raver119 committed Sep 14, 2018
1 parent 0a80076 commit 27b7268
Show file tree
Hide file tree
Showing 8 changed files with 733 additions and 32 deletions.
20 changes: 10 additions & 10 deletions libnd4j/include/helpers/shape.h
Expand Up @@ -1362,12 +1362,12 @@ __device__ INLINEDEF Nd4jLong *cuMalloc(Nd4jLong *buffer, long size) {
}


if (shape::isVector(shape, rank)) {
for (int i = 0; i < 2; i++)
stride[i] = 1;
return stride;
// if (shape::isVector(shape, rank)) {
// for (int i = 0; i < 2; i++)
// stride[i] = 1;
// return stride;

}
// }

int st = startNum;
for (int j = rank - 1; j >= 0; j--) {
Expand All @@ -1384,12 +1384,12 @@ __device__ INLINEDEF Nd4jLong *cuMalloc(Nd4jLong *buffer, long size) {
return ret;
}

if (shape::isVector(shape, rank)) {
for (int i = 0; i < 2; i++)
ret[i] = 1;
return ret;
// if (shape::isVector(shape, rank)) {
// for (int i = 0; i < 2; i++)
// ret[i] = 1;
// return ret;

}
// }

int st = startNum;
for (int j = rank - 1; j >= 0; j--) {
Expand Down
94 changes: 90 additions & 4 deletions libnd4j/include/ops/declarable/generic/nn/batchnorm.cpp
Expand Up @@ -76,12 +76,9 @@ CUSTOM_OP_IMPL(batchnorm, 3, 1, false, 1, 2) {
else
output->assign(inputMinusMean * sigmaInvGam);

STORE_RESULT(*output);

return Status::OK();
}

//////////////////////////////////////////////////////////////////////////
DECLARE_SHAPE_FN(batchnorm) {

std::vector<const NDArray<T>*> inArrs(block.width());
Expand All @@ -96,6 +93,95 @@ DECLARE_SHAPE_FN(batchnorm) {
return SHAPELIST(outShapeInfo);
}

//////////////////////////////////////////////////////////////////////////
CUSTOM_OP_IMPL(batchnorm_new, 3, 1, false, 1, 2) {

NDArray<T>* input = INPUT_VARIABLE(0);
NDArray<T>* mean = INPUT_VARIABLE(1);
NDArray<T>* variance = INPUT_VARIABLE(2);
NDArray<T>* gamma = nullptr;
NDArray<T>* beta = nullptr;

NDArray<T>* output = OUTPUT_VARIABLE(0);

const bool applyScale = (bool)INT_ARG(0);
const bool applyOffset = (bool)INT_ARG(1);
const T epsilon = T_ARG(0);

if(applyScale)
gamma = INPUT_VARIABLE(3);
if(applyOffset)
beta = INPUT_VARIABLE(3 + static_cast<int>(applyScale));

const int numOfIntArgs = block.getIArguments()->size();
const int inRank = input->rankOf();

// get axes args to normalize input array over
std::vector<int> axes;
if(numOfIntArgs > 2)
for(int i = 2; i < numOfIntArgs; ++i)
axes.push_back(INT_ARG(i));
else
axes.push_back(inRank-1); // default dimension to reduce along is last dimension

const int numOfAxes = axes.size();
REQUIRE_TRUE(numOfAxes <= inRank, 0, "BATCHNORM_NEW op: too big number of input axes to normalize over, expected number should be less or equal to rank of input array, but got %i and %i correspondingly !", numOfAxes, inRank);

// get, for example, something like {1, inDim1, 1, inDim3, 1} if axes = {1, 3}
std::vector<Nd4jLong> expShapeWithUnities(inRank, 1);
for(int i = 0; i < numOfAxes; ++i)
expShapeWithUnities[axes[i]] = input->sizeAt(axes[i]);

// evaluate expected shape for mean, variance and gamma. These 3 arrays should have identical shapes
// for example if input shape is {2,3,4,5,6} and axes = {1,3}, then expected shape would be {1,3,1,5,1}, and if axes = {3}, then expected shape would be {5}
std::vector<Nd4jLong> expShape = numOfAxes == 1 ? std::vector<Nd4jLong>(1, input->sizeAt(axes[0])) : expShapeWithUnities;
std::string expShapeStr = ShapeUtils<T>::shapeAsString(expShape);

REQUIRE_TRUE(ShapeUtils<T>::shapeAsString(mean) == expShapeStr, 0, "BATCHNORM_NEW op: wrong shape of mean array, expected is %s, but got %s instead !", expShapeStr.c_str(), ShapeUtils<T>::shapeAsString(mean).c_str());
REQUIRE_TRUE(ShapeUtils<T>::shapeAsString(variance) == expShapeStr, 0, "BATCHNORM_NEW op: wrong shape of variance array, expected is %s, but got %s instead !", expShapeStr.c_str(), ShapeUtils<T>::shapeAsString(variance).c_str());
if(gamma)
REQUIRE_TRUE(ShapeUtils<T>::shapeAsString(variance) == expShapeStr, 0, "BATCHNORM_NEW op: wrong shape of gamma array, expected is %s, but got %s instead !", expShapeStr.c_str(), ShapeUtils<T>::shapeAsString(gamma).c_str());
if(beta)
REQUIRE_TRUE(ShapeUtils<T>::shapeAsString(beta) == expShapeStr, 0, "BATCHNORM_NEW op: wrong shape of beta array, expected is %s, but got %s instead !", expShapeStr.c_str(), ShapeUtils<T>::shapeAsString(beta).c_str());

// normalized output = gamma * ((input - mean) / sqrt(variance + epsilon)) + beta

if(numOfAxes == 1 && inRank > 1) {
mean = mean->reshape(mean->ordering(), expShapeWithUnities);
variance = variance->reshape(variance->ordering(), expShapeWithUnities);
if(gamma)
gamma = gamma->reshape(gamma->ordering(), expShapeWithUnities);
if(beta)
beta = beta->reshape(beta->ordering(), expShapeWithUnities);
}

NDArray<T> sigmaInvGam = (*variance + epsilon).template transform<simdOps::RSqrt<T>>();
if(applyScale)
sigmaInvGam *= *gamma;

if (applyOffset)
output->assign((*input - *mean) * sigmaInvGam + *beta);
else
output->assign((*input - *mean) * sigmaInvGam);

if(numOfAxes == 1 && inRank > 1) {
delete mean;
delete variance;
delete gamma;
delete beta;
}

return Status::OK();
}

DECLARE_SHAPE_FN(batchnorm_new) {

Nd4jLong* outShapeInfo = nullptr;

COPY_SHAPE(inputShape->at(0), outShapeInfo); // output shape is identical to input shape

return SHAPELIST(outShapeInfo);
}

//////////////////////////////////////////////////////////////////////////
CUSTOM_OP_IMPL(batchnorm_bp, 4, 3, false, 1, 2) {
Expand Down Expand Up @@ -195,7 +281,7 @@ CUSTOM_OP_IMPL(batchnorm_bp, 4, 3, false, 1, 2) {
return Status::OK();
}

//////////////////////////////////////////////////////////////////////////

DECLARE_SHAPE_FN(batchnorm_bp) {

const bool applyScale = (bool)INT_ARG(0);
Expand Down
19 changes: 12 additions & 7 deletions libnd4j/include/ops/declarable/generic/transforms/pad.cpp
Expand Up @@ -34,7 +34,8 @@ CUSTOM_OP_IMPL(pad, 2, 1, false, 0, 1) {

NDArray<T>* input = INPUT_VARIABLE(0);
NDArray<T>* paddings = INPUT_VARIABLE(1);
NDArray<T>* output = OUTPUT_VARIABLE(0);
NDArray<T>* output = OUTPUT_VARIABLE(0);

std::vector<int>* argI = block.getIArguments();

const int rank = input->rankOf();
Expand All @@ -44,26 +45,30 @@ CUSTOM_OP_IMPL(pad, 2, 1, false, 0, 1) {
std::string currentPaddingsShape = ShapeUtils<T>::shapeAsString(paddings);
REQUIRE_TRUE(expectedPaddingsShape == currentPaddingsShape, 0, "PAD op: wrong shape of paddings array, expected is %s, but got %s instead !", expectedPaddingsShape.c_str(), currentPaddingsShape.c_str());
T padValue = T(0.f);

// in case of REFLECT and SYMMETRIC modes paddings must obey additional shape requirements
// REFLECT case
if (argI->at(0) == 0) { // CONSTAND mode
if (INT_ARG(0) == 0) { // CONSTAND mode
if (!block.getTArguments()->empty())
padValue = T_ARG(0);
}
else if(argI->at(0) == 1)
else if(INT_ARG(0) == 1) { // REFLECT mode
for(int dim=0; dim < rank; ++dim)
REQUIRE_TRUE((*paddings)(dim,0) <= (input->shapeOf()[dim]-1) && (*paddings)(dim,1) <= (input->shapeOf()[dim]-1), 0, "PAD op: wrong content of paddings array for REFLECT mode !");
// SYMMETRIC case
if(argI->at(0) == 2)
}
if(INT_ARG(0) == 2) { // SYMMETRIC mode
for(int dim=0; dim < rank; ++dim)
REQUIRE_TRUE((*paddings)(dim,0) <= input->shapeOf()[dim] && (*paddings)(dim,1) <= input->shapeOf()[dim], 0, "PAD op: wrong content of paddings array for SYMMETRIC mode !");
}

// CONSTANT->0, REFLECT->1, SYMMETRIC->2
REQUIRE_TRUE(!(argI->at(0) < 0 || argI->at(0) > 2), 0, "PAD op: unknown padding mode, there are only three possible legal values -> 0,1,2, but got %i instead !", argI->at(0));
REQUIRE_TRUE(INT_ARG(0) >= 0 && INT_ARG(0) <= 2, 0, "PAD op: unknown padding mode, there are only three possible legal values -> 0,1,2, but got %i instead !", INT_ARG(0));

std::vector<int> dimensions(input->rankOf());
std::iota(dimensions.begin(), dimensions.end(), 0); // fill with 0, 1, ... rank-1

helpers::recursiveLoopForPad<T>(argI->at(0), *input, *paddings, *output, dimensions, 0, 0, 0, padValue);
// helpers::recursiveLoopForPad<T>(INT_ARG(0), *input, *paddings, *output, dimensions, 0, 0, 0, padValue);
helpers::pad(INT_ARG(0), *input, *paddings, *output, padValue);

return Status::OK();
}
Expand Down
5 changes: 4 additions & 1 deletion libnd4j/include/ops/declarable/headers/nn.h
Expand Up @@ -107,6 +107,9 @@ namespace nd4j {
#if NOT_EXCLUDED(OP_batchnorm)
DECLARE_CUSTOM_OP(batchnorm, 3, 1, false, 1, 2);
#endif
#if NOT_EXCLUDED(OP_batchnorm)
DECLARE_CUSTOM_OP(batchnorm_new, 3, 1, false, 1, 2);
#endif

/**
* back prop in batch normalization
Expand Down Expand Up @@ -163,7 +166,7 @@ namespace nd4j {
* scale: 1D input array of scale factors, shape [iD]
* offset: 1D input array of offsets (shifts), shape [iD]
* mean: 1D input array of population mean used for inference, shape [iD], this array is required only if isTraining = false
* variance: 1D input array of population mean used for inference, shape [iD], this array is required only if isTraining = false
* variance: 1D input array of population mean used for inference, shape [iD], this array is required only if isTraining = false
*
* T input arguments:
* 0: epsilon, it is optional argument, default value is 0.001, this is small number to be added to the variance of x
Expand Down
150 changes: 144 additions & 6 deletions libnd4j/include/ops/declarable/helpers/cpu/transforms.cpp
Expand Up @@ -183,14 +183,148 @@ void randomShuffle(NDArray<T>& input, NDArray<T>& output, nd4j::random::RandomBu

}

//////////////////////////////////////////////////////////////////////////
template<typename T>
void pad(const int mode, const NDArray<T>& input, const NDArray<T>& paddings, NDArray<T>& output, const T padValue ) {

const int rank = output.rankOf();
std::vector<int> dimsToExclude(rank);
std::iota(dimsToExclude.begin(), dimsToExclude.end(), 0); // fill with 0, 1, ... rank-1

Nd4jLong numLeft = paddings(rank-1,0);
Nd4jLong numRight = paddings(rank-1,1);
Nd4jLong inDimSize = input.sizeAt(rank-1);
Nd4jLong outDimSize = output.sizeAt(rank-1);

std::vector<std::vector<Nd4jLong>> outIdx = { std::vector<Nd4jLong>(2*rank), {numLeft, numLeft + inDimSize}, {0, numLeft}, {numLeft + inDimSize, outDimSize} };

for(int i = 0; i < rank-1; ++i) {
outIdx[0][2*i] = paddings(i, 0);
outIdx[0][2*i + 1] = outIdx[0][2*i] + input.sizeAt(i);
}
outIdx[0][2*rank-1] = outIdx[0][2*rank-2] = 0;

// ***** populate innermost sub-arrays firstly ***** //
dimsToExclude.pop_back();

Nd4jLong startL = mode == 1 ? 1 : 0; // REFLECT or SYMMETRIC
Nd4jLong startR = mode == 1 ? inDimSize-2 : inDimSize-1; // REFLECT or SYMMETRIC

Nd4jLong numOfSubArrs = ShapeUtils<T>::getNumOfSubArrs(input.getShapeInfo(), dimsToExclude);

NDArray<T> outSubArr0 = output(outIdx[0], true);

#pragma omp parallel for schedule(guided)
for(Nd4jLong j = 0; j < numOfSubArrs; ++j) {

NDArray<T> outSubArr1 = outSubArr0(j, dimsToExclude);
NDArray<T> inSubArr = input(j, dimsToExclude);
NDArray<T> outSubArrMid = outSubArr1(outIdx[1]);

outSubArrMid.assign(inSubArr); // assign middle

if(mode == 0) { // CONSTANT
if(numLeft != 0) {
NDArray<T> temp = outSubArr1(outIdx[2]);
temp = padValue; // assign left
}
if(numRight != 0) {
NDArray<T> temp = outSubArr1(outIdx[3]);
temp = padValue; // assign right
}
}
else { // REFLECT or SYMMETRIC

for(Nd4jLong k = numLeft-1, e = startL; k >= 0; --k, ++e) // fill left side
outSubArr1(k) = inSubArr(e);

for(Nd4jLong k = numLeft + inDimSize, e = startR; k < outDimSize; ++k, --e) // fill right side
outSubArr1(k) = inSubArr(e);
}
}

// ***** fill rest of outer sub-arrays ***** //
std::vector<Nd4jLong> outIdxInner(2,0);
std::vector<Nd4jLong> outIdxOuter(2,0);

for(int i = rank - 2; i >= 0; --i) {

dimsToExclude.pop_back();

outIdxInner.push_back(0), outIdxInner.push_back(0);
outIdxOuter.push_back(0), outIdxOuter.push_back(0);

Nd4jLong numLeft = paddings(i,0);
Nd4jLong numRight = paddings(i,1);

if(numLeft == 0 && numRight == 0)
continue;

Nd4jLong inDimSize = input.sizeAt(i);
Nd4jLong outDimSize = output.sizeAt(i);

if(mode == 0) {
outIdxOuter[0] = 0; outIdxOuter[1] = numLeft;
outIdxInner[0] = numLeft + inDimSize; outIdxInner[1] = outDimSize;
}

startL = mode == 1 ? numLeft+1 : numLeft; // REFLECT or SYMMETRIC
startR = mode == 1 ? numLeft+inDimSize-2 : numLeft+inDimSize-1; // REFLECT or SYMMETRIC

numOfSubArrs = ShapeUtils<T>::getNumOfSubArrs(output.getShapeInfo(), dimsToExclude);

#pragma omp parallel for schedule(guided) firstprivate(outIdxOuter, outIdxInner)
for(Nd4jLong j = 0; j < numOfSubArrs; ++j) {

NDArray<T> outSubArr = output(j, dimsToExclude);

if(mode == 0) { // CONSTANT

if(numLeft != 0) {
NDArray<T> temp = outSubArr(outIdxOuter);
temp = padValue; // assign left
}

if(numRight != 0) {
NDArray<T> temp = outSubArr(outIdxInner);
temp = padValue; // assign right
}
}
else { // REFLECT or SYMMETRIC

for(Nd4jLong k = numLeft-1, e = startL; k >= 0; --k, ++e) { // fill left side
outIdxOuter[0] = k;
outIdxOuter[1] = k+1;
outIdxInner[0] = e;
outIdxInner[1] = e+1;
NDArray<T> outSubArrInner = outSubArr(outIdxInner);
NDArray<T> outSubArrOuter = outSubArr(outIdxOuter);
outSubArrOuter.assign(outSubArrInner);
}

for(Nd4jLong k = numLeft + inDimSize, e = startR; k < outDimSize; ++k, --e) { // fill right side
outIdxOuter[0] = k;
outIdxOuter[1] = k+1;
outIdxInner[0] = e;
outIdxInner[1] = e+1;
NDArray<T> outSubArrInner = outSubArr(outIdxInner);
NDArray<T> outSubArrOuter = outSubArr(outIdxOuter);
outSubArrOuter.assign(outSubArrInner);
}
}
}
}
}



////////////////////////////////////////////////////////////////////////
// initial values of inIdx, outIdx, dim must be equal to zero
/*// initial values of inIdx, outIdx, dim must be equal to zero
template<typename T>
void recursiveLoopForPad(const int mode, NDArray<T>& input, const NDArray<T>& paddings, NDArray<T>& output, std::vector<int> dimensions, int dim, int inIdx, int outIdx, T padValue ) {
int leftOffset;
// dimensions are array of input dimensions, it is sorted by increasing order
// dimensions are array of input dimensions, it is sorted in increasing order
// every time at the beginning we erase first element from it (not good idea to use vector for this purpose, but luckily it is small enough)
// then we use this array for tads building, every time while recursion the number of built tads becomes bigger
dimensions.erase(dimensions.begin());
Expand Down Expand Up @@ -322,7 +456,7 @@ void recursiveLoopForPad(const int mode, NDArray<T>& input, const NDArray<T>& pa
break;
}
}

*/

////////////////////////////////////////////////////////////////////////
template<typename T>
Expand Down Expand Up @@ -1011,9 +1145,13 @@ template void randomShuffle<float>(NDArray<float>& input, NDArray<float>& output
template void randomShuffle<float16>(NDArray<float16>& input, NDArray<float16>& output, nd4j::random::RandomBuffer& rng, const bool isInplace);
template void randomShuffle<double>(NDArray<double>& input, NDArray<double>& output, nd4j::random::RandomBuffer& rng, const bool isInplace);

template void recursiveLoopForPad<float>(const int mode, NDArray<float>& input, const NDArray<float>& paddings, NDArray<float>& output, std::vector<int> dimensions, int dim, int inIdx, int outIdx, float padValue);
template void recursiveLoopForPad<float16>(const int mode, NDArray<float16>& input, const NDArray<float16>& paddings, NDArray<float16>& output, std::vector<int> dimensions, int dim, int inIdx, int outIdx, float16 padValue);
template void recursiveLoopForPad<double>(const int mode, NDArray<double>& input, const NDArray<double>& paddings, NDArray<double>& output, std::vector<int> dimensions, int dim, int inIdx, int outIdx, double padValue);
// template void recursiveLoopForPad<float>(const int mode, NDArray<float>& input, const NDArray<float>& paddings, NDArray<float>& output, std::vector<int> dimensions, int dim, int inIdx, int outIdx, float padValue);
// template void recursiveLoopForPad<float16>(const int mode, NDArray<float16>& input, const NDArray<float16>& paddings, NDArray<float16>& output, std::vector<int> dimensions, int dim, int inIdx, int outIdx, float16 padValue);
// template void recursiveLoopForPad<double>(const int mode, NDArray<double>& input, const NDArray<double>& paddings, NDArray<double>& output, std::vector<int> dimensions, int dim, int inIdx, int outIdx, double padValue);

template void pad<float16>(const int mode, const NDArray<float16>& input, const NDArray<float16>& paddings, NDArray<float16>& output, const float16 padValue);
template void pad<float>(const int mode, const NDArray<float>& input, const NDArray<float>& paddings, NDArray<float>& output, const float padValue);
template void pad<double>(const int mode, const NDArray<double>& input, const NDArray<double>& paddings, NDArray<double>& output, const double padValue);

template void invertPermutation<float>(const NDArray<float>& input, NDArray<float>& output);
template void invertPermutation<float16>(const NDArray<float16>& input, NDArray<float16>& output);
Expand Down

0 comments on commit 27b7268

Please sign in to comment.