Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

gradient functions for losses #6844

Merged
merged 11 commits into from Dec 13, 2018
2 changes: 1 addition & 1 deletion libnd4j/blas/NDArray.h
Expand Up @@ -172,7 +172,7 @@ namespace nd4j {


/**
* constructor creates new NDArray using shape information from "shapeInfo", set all elements in new array to be zeros, if copyStrides is true then use stride values from "shapeInfo", else calculate strides independently
* constructor creates new NDArray using shape information from "shapeInfo", set all elements in new array to zeros, if copyStrides is true then use stride values from "shapeInfo", else calculate strides independently
*/
NDArray(Nd4jLong* shapeInfo, const bool copyStrides = false, nd4j::memory::Workspace* workspace = nullptr, const bool isShapeAlloc = false);

Expand Down
1 change: 0 additions & 1 deletion libnd4j/include/helpers/ShapeUtils.h
Expand Up @@ -119,7 +119,6 @@ namespace nd4j {
* possible cases: dot product (xRank=yRank=1), matrix-vector product (xRank=2, yRank=1), vector-matrix product (xRank=1, yRank=2), matrix-matrix product (xRank=yRank and rank >=2)
*/
static std::vector<Nd4jLong> evalShapeForMatmul(const Nd4jLong* xShapeInfo, const Nd4jLong* yShapeInfo, const bool transX, const bool transY);


/**
* evaluate number of sub-arrays along dimensions stored in dimsToExclude
Expand Down
31 changes: 31 additions & 0 deletions libnd4j/include/ops/declarable/generic/helpers/ScatterHelper.h
Expand Up @@ -198,6 +198,37 @@ static FORCEINLINE void scatterND(pairwise::Ops op, const NDArray& indices, cons
}


////////////////////////////////////////////////////////////////////////
static FORCEINLINE void scatterForLoss(const NDArray& indices, const NDArray& updates, NDArray& output, const bool calcGrad) {

// requirements for arrays
// shapes of updates and output must be the same
// shape of indices should be the same as updates shape with last dimension excluded
// for example if updates is {a,b,c} then indices should be {a,b}

const Nd4jLong indicesLen = indices.lengthOf();

std::vector<int> dimsToExclude = ShapeUtils::evalDimsToExclude(updates.rankOf(), {-1});

if(!calcGrad) {
#pragma omp parallel for schedule(guided)
for(Nd4jLong i = 0; i < indicesLen; ++i) {

auto subArr = updates(i, dimsToExclude);
output.p(i, subArr.e(indices.e<Nd4jLong>(i)));
}
}
else {
#pragma omp parallel for schedule(guided)
for(Nd4jLong i = 0; i < indicesLen; ++i) {

auto subArr = updates(i, dimsToExclude);
auto ind = indices.e<Nd4jLong>(i);
subArr.p(ind, subArr.e(ind) - 1.);
}
}
}

};


Expand Down
Expand Up @@ -20,9 +20,7 @@

#include <op_boilerplate.h>
#if NOT_EXCLUDED(OP_absolute_difference_loss)

#include <ops/declarable/CustomOperations.h>
#include <ops/declarable/helpers/losses.h>

namespace nd4j {
namespace ops {
Expand Down
325 changes: 253 additions & 72 deletions libnd4j/include/ops/declarable/generic/loss/cosineDistance.cpp

Large diffs are not rendered by default.

8 changes: 5 additions & 3 deletions libnd4j/include/ops/declarable/generic/loss/logLoss.cpp
Expand Up @@ -29,9 +29,11 @@ namespace ops {

//////////////////////////////////////////////////////////////////////////
CUSTOM_OP_IMPL(log_loss, 3, 1, false, 1, 1) {

auto predictions = INPUT_VARIABLE(0);
auto weights = INPUT_VARIABLE(1);
auto labels = INPUT_VARIABLE(2);

auto output = OUTPUT_VARIABLE(0);

int reductionMode = INT_ARG(0); // 0 - "none"; 1 - "weighted_sum"; 2 - "weighted_mean"; 3 - "weighted_sum_by_nonzero_weights"
Expand All @@ -42,12 +44,12 @@ CUSTOM_OP_IMPL(log_loss, 3, 1, false, 1, 1) {
REQUIRE_TRUE(labels->isSameShape(predictions), 0, "LOG_LOSS OP: labels and predictions arrays must have the same shapes, but got %s and %s correspondingly !", ShapeUtils::shapeAsString(labels).c_str(), ShapeUtils::shapeAsString(predictions).c_str());
// weights array can be single scalar or has the same rank as labels, and must be broadcastable to labels
REQUIRE_TRUE(weights->isScalar() || weights->rankOf() == labels->rankOf(), 0, "LOG_LOSS OP: weights array should be scalar or have the same rank as labels array, but got %i and %i correspondingly!", weights->rankOf(), labels->rankOf());
// check whether broadcast operation is possible for weights array
// check whether broadcast operation is possible for weights array
REQUIRE_TRUE(weights->isScalar() || ShapeUtils::areShapesBroadcastable(*weights, *labels), 0, "LOG_LOSS OP: shapes of weights and labels arrays should be broadcastable, but got weights = %s and labels = %s instead!", ShapeUtils::shapeAsString(weights).c_str(), ShapeUtils::shapeAsString(labels).c_str());
// only 4 possible reduction modes exist
REQUIRE_TRUE(reductionMode==0 || reductionMode==1 || reductionMode==2 || reductionMode==3, 0, "LOG_LOSS_GRAD OP: reduction mode value is not acceptable, possible values are 0, 1, 2, 3, but got %i instead!", reductionMode);

// perform weights broadcasting/tile to labels if needed
// perform weights broadcasting/tile to predictions if needed
auto weightsBroad = weights;
if(!weights->isScalar() && !weights->isSameShape(predictions))
weightsBroad = new NDArray(weights->tileToShape(predictions->getShapeInfo()));
Expand All @@ -69,7 +71,7 @@ CUSTOM_OP_IMPL(log_loss, 3, 1, false, 1, 1) {
case 2: { // 2 - "weighted_mean", output is scalar and equal to sum of all elements of E array divided by sum of all elements of weightsBroad array
NDArray sum;
if (weights->isScalar())
sum = (*weights) * E.lengthOf();
sum = *weights * E.lengthOf();
else
sum = weightsBroad->reduceNumber(reduce::Sum);

Expand Down
111 changes: 47 additions & 64 deletions libnd4j/include/ops/declarable/generic/loss/meanPairWsSqErr.cpp
Expand Up @@ -24,7 +24,6 @@
#include <ops/declarable/CustomOperations.h>
#include <numeric>
#include <iostream>
#include <ops/declarable/helpers/losses.h>

namespace nd4j {
namespace ops {
Expand All @@ -40,99 +39,83 @@ CUSTOM_OP_IMPL(mean_pairwssqerr_loss, 3, 1, false, 0, 0) {
// input validation
REQUIRE_TRUE(labels->isSameShape(predictions), 0, "MEAN_PAIRWSSQERR_LOSS OP: labels and predictions arrays must have the same shapes, but got %s and %s correspondingly !", ShapeUtils::shapeAsString(labels).c_str(), ShapeUtils::shapeAsString(predictions).c_str());
// weights array can be single scalar or has the same rank as labels, and must be broadcastable to labels
REQUIRE_TRUE(!(!weights->isScalar() && weights->rankOf() != labels->rankOf()), 0, "MEAN_PAIRWSSQERR_LOSS OP: weights array must have the same rank as labels array, but got %i and %i correspondingly!", weights->rankOf(), labels->rankOf());
// check whether broadcast operation is possible for weights array
if(!weights->isScalar())
for (int i = 0; i < weights->rankOf(); ++i)
REQUIRE_TRUE(!(weights->shapeOf()[i] != labels->shapeOf()[i] && weights->shapeOf()[i] != 1), 0, "MEAN_PAIRWSSQERR_LOSS OP: shape of weights array %s is not broadcastable to labels array shape %s !", ShapeUtils::shapeAsString(weights).c_str(), ShapeUtils::shapeAsString(labels).c_str());

// perform weights broadcasting/tile to labels if needed
REQUIRE_TRUE(weights->isScalar() || weights->rankOf() == labels->rankOf(), 0, "MEAN_PAIRWSSQERR_LOSS OP: weights array should be scalar or have the same rank as labels array, but got %i and %i correspondingly!", weights->rankOf(), labels->rankOf());
// check whether broadcast operation is possible for weights array
REQUIRE_TRUE(weights->isScalar() || ShapeUtils::areShapesBroadcastable(*weights, *labels), 0, "MEAN_PAIRWSSQERR_LOSS OP: shapes of weights and labels arrays should be broadcastable, but got weights = %s and labels = %s instead!", ShapeUtils::shapeAsString(weights).c_str(), ShapeUtils::shapeAsString(labels).c_str());

if(labels->rankOf() == 1) { // If labels and predictions are of rank 1, it means that all data entries are 0-tensor (scalar) so that the result of becomes always zero.
*output = 0.;
return Status::OK();
}

// perform weights broadcasting/tile to predictions if needed
auto weightsBroad = weights;
if(!weights->isScalar() && !weights->isSameShape(predictions)) {
// evaluate repeat dimensions for tile operation
std::vector<Nd4jLong> reps;
for(int i = 0; i < labels->rankOf(); ++i)
reps.emplace_back(labels->shapeOf()[i] / weights->shapeOf()[i]);
weightsBroad = new NDArray(weights->tile(reps));
}
if(!weights->isScalar() && !weights->isSameShape(predictions))
weightsBroad = new NDArray(weights->tileToShape(predictions->getShapeInfo()));

auto diffs = *predictions - *labels;
std::vector<int> reductionIdx(diffs.rankOf()-1);
std::iota(reductionIdx.begin(), reductionIdx.end(), 1);
auto sumSqrsDiffPerBatch = (diffs*diffs).reduceAlongDims(reduce::Sum, reductionIdx, true);
NDArray diffs = *predictions - *labels;

std::vector<int> reductionIdx = ShapeUtils::evalDimsToExclude(diffs.rankOf(), {0});
NDArray sumSqrsDiffPerBatch = (diffs*diffs).reduceAlongDims(reduce::Sum, reductionIdx, true);

NDArray numOfNonZeroWeights(sumSqrsDiffPerBatch.getShapeInfo(), block.getWorkspace());
NDArray numOfNonZeroWeights(sumSqrsDiffPerBatch.getShapeInfo(), nd4j::DataType::INT64, false, block.getWorkspace());
if(weights->isScalar()) {
if((*weights).e<double>(0) != 0.)
numOfNonZeroWeights.assign((labels->lengthOf()/labels->sizeAt(0)));
}
else {
Nd4jLong sizeAtRestDims = weightsBroad->lengthOf()/weightsBroad->sizeAt(0);
helpers::reduceZeroCountWeights(weightsBroad, sizeAtRestDims, numOfNonZeroWeights);
/*
for(int i = 0; i < numOfNonZeroWeights.lengthOf(); ++i)
for(int j = 0; j < sizeAtRestDims; ++j)
if((*weightsBroad)(i*sizeAtRestDims + j) != (T)0.)
++numOfNonZeroWeights(i);
*/
//throw std::runtime_error("Not implemented yet");
}
else
numOfNonZeroWeights.assign(weightsBroad->reduceAlongDims(reduce::CountNonZero, reductionIdx));

NDArray numOfNonZeroWeightsMinusOne = numOfNonZeroWeights - 1;

sumSqrsDiffPerBatch.applyPairwiseTransform(pairwise::SafeDivide, numOfNonZeroWeights, nullptr);
sumSqrsDiffPerBatch.applyPairwiseTransform(pairwise::SafeDivide, numOfNonZeroWeightsMinusOne, nullptr);

auto sumDiff = diffs.reduceAlongDims(reduce::Sum, reductionIdx, true);
auto nonZerosSquared = numOfNonZeroWeights*numOfNonZeroWeights;

auto nonZerosSquared = numOfNonZeroWeights*numOfNonZeroWeightsMinusOne;
(sumDiff*sumDiff).applyPairwiseTransform(pairwise::SafeDivide, &nonZerosSquared, &sumDiff, nullptr);

auto weightedLosses = (sumSqrsDiffPerBatch - sumDiff) * 2.;

// multiply weightedLosses on weights
weightedLosses *= (*weights);

if(numOfNonZeroWeights.reduceNumber(reduce::Sum).e<float>(0) == 0.f)
(*output) = 0.f;
else
(*output) = weightedLosses.reduceNumber(reduce::Sum);

auto E = (sumSqrsDiffPerBatch - sumDiff) * 2.;

STORE_RESULT(*output);
// multiply E on weights
E *= *weights;

if(numOfNonZeroWeights.reduceNumber(reduce::Sum).e<double>(0) == 0.)
*output = 0.;
else
*output = E.reduceNumber(reduce::Sum);

if(weightsBroad != weights)
delete weightsBroad;

return Status::OK();
}

DECLARE_TYPES(mean_pairwssqerr_loss) {
getOpDescriptor()
->setAllowedInputTypes(nd4j::DataType::ANY)
->setAllowedOutputTypes({ALL_FLOATS});
}

//////////////////////////////////////////////////////////////////////////
DECLARE_TYPES(mean_pairwssqerr_loss) {

getOpDescriptor()->setAllowedInputTypes(nd4j::DataType::ANY)->setAllowedOutputTypes({ALL_FLOATS});
}

//////////////////////////////////////////////////////////////////////////
DECLARE_SHAPE_FN(mean_pairwssqerr_loss) {

auto predictionsShapeInfo = inputShape->at(0);
auto weightsShapeInfo = inputShape->at(1);
auto labelsShapeInfo = inputShape->at(2);

// labels and predictions must have the same shapes
REQUIRE_TRUE(shape::shapeEquals(labelsShapeInfo, predictionsShapeInfo), 0, "MEAN_PAIRWSSQERR_LOSS OP: labels and predictions arrays must have the same shapes, but got %s and %s correspondingly !", ShapeUtils::shapeAsString(labelsShapeInfo).c_str(), ShapeUtils::shapeAsString(predictionsShapeInfo).c_str());

Nd4jLong* outShapeInfo = nullptr;
// output is scalar
ALLOCATE(outShapeInfo, block.getWorkspace(), shape::shapeInfoLength(2) /*rank=2*/, Nd4jLong);
outShapeInfo[0] = 2;
outShapeInfo[1] = outShapeInfo[2] = outShapeInfo[3] = outShapeInfo[4] = 1;
outShapeInfo[5] = 0;
outShapeInfo[6] = 1;
outShapeInfo[7] = 99;
ArrayOptions::setDataType(outShapeInfo, ArrayOptions::dataType(predictionsShapeInfo));
REQUIRE_TRUE(shape::shapeEquals(labelsShapeInfo, predictionsShapeInfo), 0, "MEAN_PAIRWSSQERR_LOSS OP: labels and predictions arrays must have the same shapes, but got %s and %s correspondingly !", ShapeUtils::shapeAsString(labelsShapeInfo).c_str(), ShapeUtils::shapeAsString(predictionsShapeInfo).c_str());
// weights array can be single scalar or has the same rank as labels, and must be broadcastable to labels
REQUIRE_TRUE(shape::isScalar(weightsShapeInfo) || shape::rank(weightsShapeInfo) == shape::rank(labelsShapeInfo), 0, "MEAN_PAIRWSSQERR_LOSS OP: weights array should be scalar or have the same rank as labels array, but got %i and %i correspondingly!", shape::rank(weightsShapeInfo), shape::rank(labelsShapeInfo));
// check whether broadcast operation is possible for weights array
REQUIRE_TRUE(shape::isScalar(weightsShapeInfo) || ShapeUtils::areShapesBroadcastable(weightsShapeInfo, labelsShapeInfo), 0, "MEAN_PAIRWSSQERR_LOSS OP: shapes of weights and labels arrays should be broadcastable, but got weights = %s and labels = %s instead!", ShapeUtils::shapeAsString(weightsShapeInfo).c_str(), ShapeUtils::shapeAsString(labelsShapeInfo).c_str());

DataType outType = DataTypeUtils::pickFloatingType(ArrayOptions::dataType(predictionsShapeInfo));
Nd4jLong* outShapeInfo = ShapeBuilders::createScalarShapeInfo(outType, block.getWorkspace());

return SHAPELIST(outShapeInfo);

}

// INT_ARG(0) - reduction mode



Expand Down