Skip to content

Commit

Permalink
Merge pull request #4 from alibaba/master
Browse files Browse the repository at this point in the history
merge MNN
  • Loading branch information
jokerz0624 committed Mar 3, 2021
2 parents 097b0c8 + 6bc0a0c commit 6ac98b5
Show file tree
Hide file tree
Showing 15 changed files with 349 additions and 118 deletions.
36 changes: 36 additions & 0 deletions 3rd_party/OpenCLHeaders/CL/cl2.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -2485,6 +2485,42 @@ class Platform : public detail::Wrapper<cl_platform_id>
return detail::errHandler(err, __GET_PLATFORM_IDS_ERR);
}

// more than one gpu card
if (n > 1) {
// first select nvidia gpu as discrete card, if multi gpu cards are available, x86_64 platform
//const char* integrate_gpu = "Intel";
const char* discrete_gpu = "NVIDIA";
for (cl_uint i = 0; i < n; ++i) {
// get the length of platform name
size_t platform_name_length = 0;
err = clGetPlatformInfo(ids[i], CL_PLATFORM_NAME, 0, 0, &platform_name_length);
if (err != CL_SUCCESS) {
return detail::errHandler(err, __GET_PLATFORM_INFO_ERR);
}
// get platform name
char* platform_name = new char[platform_name_length];
err = clGetPlatformInfo(ids[i], CL_PLATFORM_NAME, platform_name_length, platform_name, 0);
if (err != CL_SUCCESS) {
delete[] platform_name;
return detail::errHandler(err, __GET_PLATFORM_INFO_ERR);
}
// if nvidia card is detected, set it as default ids[0]
if (strstr(platform_name, discrete_gpu)) {
if (i == 0) {
delete[] platform_name;
break;
}
// swap
cl_platform_id tmp = ids[0];
ids[0] = ids[i];
ids[i] = tmp;
delete[] platform_name;
break;
}
delete[] platform_name;
}
}

if (platforms) {
platforms->resize(ids.size());

Expand Down
2 changes: 1 addition & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ option(MNN_USE_SSE "Use SSE optimization for x86 if possiable" ON)
option(MNN_BUILD_CODEGEN "Build with codegen" OFF)
option(MNN_ENABLE_COVERAGE "Build with coverage enable" OFF)

IF(NOT MNN_BUILD_SHARED_LIBS)
IF(NOT MNN_BUILD_SHARED_LIBS AND MNN_SEP_BUILD)
message(WARNING "Close MNN_SEP_BUILD for static library")
SET(MNN_SEP_BUILD OFF CACHE BOOL "<docstring>" FORCE)
ENDIF()
Expand Down
2 changes: 1 addition & 1 deletion project/android/testCommon.sh
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
#!/bin/bash
adb shell "cd /data/local/tmp/MNN&&export LD_LIBRARY_PATH=.:$LD_LIBRARY_PATH&& $1 $2 $3 $4 $5 $6 $7"
adb shell "cd /data/local/tmp/MNN&&export LD_LIBRARY_PATH=.:$LD_LIBRARY_PATH&& $1 $2 $3 $4 $5 $6 $7 $8 $9"
37 changes: 28 additions & 9 deletions schema/current/CaffeOp_generated.h
Original file line number Diff line number Diff line change
Expand Up @@ -436,6 +436,7 @@ struct Convolution2DCommonT : public flatbuffers::NativeTable {
bool relu6;
std::vector<int32_t> pads;
std::vector<int32_t> outPads;
bool hasOutputShape;
Convolution2DCommonT()
: padX(0),
padY(0),
Expand All @@ -450,7 +451,8 @@ struct Convolution2DCommonT : public flatbuffers::NativeTable {
outputCount(0),
inputCount(0),
relu(false),
relu6(false) {
relu6(false),
hasOutputShape(false) {
}
};

Expand All @@ -475,7 +477,8 @@ struct Convolution2DCommon FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
VT_RELU = 28,
VT_RELU6 = 30,
VT_PADS = 32,
VT_OUTPADS = 34
VT_OUTPADS = 34,
VT_HASOUTPUTSHAPE = 36
};
int32_t padX() const {
return GetField<int32_t>(VT_PADX, 0);
Expand Down Expand Up @@ -525,6 +528,9 @@ struct Convolution2DCommon FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
const flatbuffers::Vector<int32_t> *outPads() const {
return GetPointer<const flatbuffers::Vector<int32_t> *>(VT_OUTPADS);
}
bool hasOutputShape() const {
return GetField<uint8_t>(VT_HASOUTPUTSHAPE, 0) != 0;
}
bool Verify(flatbuffers::Verifier &verifier) const {
return VerifyTableStart(verifier) &&
VerifyField<int32_t>(verifier, VT_PADX) &&
Expand All @@ -545,6 +551,7 @@ struct Convolution2DCommon FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
verifier.VerifyVector(pads()) &&
VerifyOffset(verifier, VT_OUTPADS) &&
verifier.VerifyVector(outPads()) &&
VerifyField<uint8_t>(verifier, VT_HASOUTPUTSHAPE) &&
verifier.EndTable();
}
Convolution2DCommonT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
Expand Down Expand Up @@ -603,6 +610,9 @@ struct Convolution2DCommonBuilder {
void add_outPads(flatbuffers::Offset<flatbuffers::Vector<int32_t>> outPads) {
fbb_.AddOffset(Convolution2DCommon::VT_OUTPADS, outPads);
}
void add_hasOutputShape(bool hasOutputShape) {
fbb_.AddElement<uint8_t>(Convolution2DCommon::VT_HASOUTPUTSHAPE, static_cast<uint8_t>(hasOutputShape), 0);
}
explicit Convolution2DCommonBuilder(flatbuffers::FlatBufferBuilder &_fbb)
: fbb_(_fbb) {
start_ = fbb_.StartTable();
Expand Down Expand Up @@ -632,7 +642,8 @@ inline flatbuffers::Offset<Convolution2DCommon> CreateConvolution2DCommon(
bool relu = false,
bool relu6 = false,
flatbuffers::Offset<flatbuffers::Vector<int32_t>> pads = 0,
flatbuffers::Offset<flatbuffers::Vector<int32_t>> outPads = 0) {
flatbuffers::Offset<flatbuffers::Vector<int32_t>> outPads = 0,
bool hasOutputShape = false) {
Convolution2DCommonBuilder builder_(_fbb);
builder_.add_outPads(outPads);
builder_.add_pads(pads);
Expand All @@ -647,6 +658,7 @@ inline flatbuffers::Offset<Convolution2DCommon> CreateConvolution2DCommon(
builder_.add_kernelX(kernelX);
builder_.add_padY(padY);
builder_.add_padX(padX);
builder_.add_hasOutputShape(hasOutputShape);
builder_.add_relu6(relu6);
builder_.add_relu(relu);
builder_.add_padMode(padMode);
Expand All @@ -670,7 +682,8 @@ inline flatbuffers::Offset<Convolution2DCommon> CreateConvolution2DCommonDirect(
bool relu = false,
bool relu6 = false,
const std::vector<int32_t> *pads = nullptr,
const std::vector<int32_t> *outPads = nullptr) {
const std::vector<int32_t> *outPads = nullptr,
bool hasOutputShape = false) {
auto pads__ = pads ? _fbb.CreateVector<int32_t>(*pads) : 0;
auto outPads__ = outPads ? _fbb.CreateVector<int32_t>(*outPads) : 0;
return MNN::CreateConvolution2DCommon(
Expand All @@ -690,7 +703,8 @@ inline flatbuffers::Offset<Convolution2DCommon> CreateConvolution2DCommonDirect(
relu,
relu6,
pads__,
outPads__);
outPads__,
hasOutputShape);
}

flatbuffers::Offset<Convolution2DCommon> CreateConvolution2DCommon(flatbuffers::FlatBufferBuilder &_fbb, const Convolution2DCommonT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
Expand Down Expand Up @@ -4505,6 +4519,7 @@ inline void Convolution2DCommon::UnPackTo(Convolution2DCommonT *_o, const flatbu
{ auto _e = relu6(); _o->relu6 = _e; };
{ auto _e = pads(); if (_e) { _o->pads.resize(_e->size()); for (flatbuffers::uoffset_t _i = 0; _i < _e->size(); _i++) { _o->pads[_i] = _e->Get(_i); } } };
{ auto _e = outPads(); if (_e) { _o->outPads.resize(_e->size()); for (flatbuffers::uoffset_t _i = 0; _i < _e->size(); _i++) { _o->outPads[_i] = _e->Get(_i); } } };
{ auto _e = hasOutputShape(); _o->hasOutputShape = _e; };
}

inline flatbuffers::Offset<Convolution2DCommon> Convolution2DCommon::Pack(flatbuffers::FlatBufferBuilder &_fbb, const Convolution2DCommonT* _o, const flatbuffers::rehasher_function_t *_rehasher) {
Expand All @@ -4531,6 +4546,7 @@ inline flatbuffers::Offset<Convolution2DCommon> CreateConvolution2DCommon(flatbu
auto _relu6 = _o->relu6;
auto _pads = _o->pads.size() ? _fbb.CreateVector(_o->pads) : 0;
auto _outPads = _o->outPads.size() ? _fbb.CreateVector(_o->outPads) : 0;
auto _hasOutputShape = _o->hasOutputShape;
return MNN::CreateConvolution2DCommon(
_fbb,
_padX,
Expand All @@ -4548,7 +4564,8 @@ inline flatbuffers::Offset<Convolution2DCommon> CreateConvolution2DCommon(flatbu
_relu,
_relu6,
_pads,
_outPads);
_outPads,
_hasOutputShape);
}

inline Convolution3DCommonT *Convolution3DCommon::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
Expand Down Expand Up @@ -5950,7 +5967,8 @@ inline const flatbuffers::TypeTable *Convolution2DCommonTypeTable() {
{ flatbuffers::ET_BOOL, 0, -1 },
{ flatbuffers::ET_BOOL, 0, -1 },
{ flatbuffers::ET_INT, 1, -1 },
{ flatbuffers::ET_INT, 1, -1 }
{ flatbuffers::ET_INT, 1, -1 },
{ flatbuffers::ET_BOOL, 0, -1 }
};
static const flatbuffers::TypeFunction type_refs[] = {
PadModeTypeTable
Expand All @@ -5971,10 +5989,11 @@ inline const flatbuffers::TypeTable *Convolution2DCommonTypeTable() {
"relu",
"relu6",
"pads",
"outPads"
"outPads",
"hasOutputShape"
};
static const flatbuffers::TypeTable tt = {
flatbuffers::ST_TABLE, 16, type_codes, type_refs, nullptr, names
flatbuffers::ST_TABLE, 17, type_codes, type_refs, nullptr, names
};
return &tt;
}
Expand Down
1 change: 1 addition & 0 deletions schema/default/CaffeOp.fbs
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ table Convolution2DCommon {
relu6:bool=false;
pads:[int];
outPads:[int];
hasOutputShape:bool = false;
}

table Convolution3DCommon {
Expand Down
60 changes: 49 additions & 11 deletions source/backend/arm82/Arm82Relu.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@
// Copyright © 2018, Alibaba Group Holding Limited
//
#ifdef __aarch64__
#include <limits>

#include "backend/arm82/Arm82Relu.hpp"
#include "MNN_generated.h"
#include "backend/arm82/Arm82Backend.hpp"
Expand All @@ -20,7 +22,7 @@

namespace MNN {

static void _MNNArm82ReluWithChannel(FLOAT16 *dst, const FLOAT16 *src, const FLOAT16 *slope, size_t length) {
static void _MNNArm82PReluWithChannel(FLOAT16 *dst, const FLOAT16 *src, const FLOAT16 *slope, size_t length) {
#ifdef MNN_USE_NEON
float16x8_t value_0 = vmovq_n_f16(0);
float16x8_t slopeV = vld1q_f16(slope);
Expand Down Expand Up @@ -69,7 +71,30 @@ static void _MNNArm82LeakyReluWithChannel(FLOAT16 *dst, const FLOAT16 *src, cons
dst[index] = src[index];
}
}
#endif
}
}

static void _MNNArm82ReluWithChannel(FLOAT16 *dst, const FLOAT16 *src, size_t length) {
#ifdef MNN_USE_NEON
float16x8_t value_0 = vmovq_n_f16(0);
#endif

for (int i = 0; i < length; ++i) {
#ifdef MNN_USE_NEON
float16x8_t value = vld1q_f16(src + i * ARMV82_CHANNEL_UNIT);
float16x8_t lessThanZero = vcleq_f16(value, value_0);

vst1q_f16(dst + i * ARMV82_CHANNEL_UNIT, vbslq_f16(lessThanZero, value_0, value));
#else
for (int j = 0; j < ARMV82_CHANNEL_UNIT; ++j) {
int index = i * ARMV82_CHANNEL_UNIT + j;
if (src[index] < 0) {
dst[index] = 0;
} else {
dst[index] = src[index];
}
}
#endif
}
}
Expand All @@ -93,17 +118,30 @@ ErrorCode Arm82Relu::onExecute(const std::vector<Tensor *> &inputs, const std::v

const auto src = input->host<FLOAT16>();
auto dst = output->host<FLOAT16>();
FLOAT16 slopeHalf = half_float::half(mSlope);

mThreadNumbers = static_cast<Arm82Backend *>(backend())->numberThread();
MNN_CONCURRENCY_BEGIN(tId, mThreadNumbers)
for (int b = (int)tId; b < batchAndChannel; b += mThreadNumbers) {
_MNNArm82LeakyReluWithChannel(dst + b * plane * ARMV82_CHANNEL_UNIT,
src + b * plane * ARMV82_CHANNEL_UNIT,
slopeHalf,
plane);
if (abs(mSlope) < std::numeric_limits<float>::epsilon()) {
// relu
mThreadNumbers = static_cast<Arm82Backend *>(backend())->numberThread();
MNN_CONCURRENCY_BEGIN(tId, mThreadNumbers)
for (int b = (int)tId; b < batchAndChannel; b += mThreadNumbers) {
_MNNArm82ReluWithChannel(dst + b * plane * ARMV82_CHANNEL_UNIT,
src + b * plane * ARMV82_CHANNEL_UNIT,
plane);
}
MNN_CONCURRENCY_END();
} else {
// leakyrelu
FLOAT16 slopeHalf = half_float::half(mSlope);
mThreadNumbers = static_cast<Arm82Backend *>(backend())->numberThread();
MNN_CONCURRENCY_BEGIN(tId, mThreadNumbers)
for (int b = (int)tId; b < batchAndChannel; b += mThreadNumbers) {
_MNNArm82LeakyReluWithChannel(dst + b * plane * ARMV82_CHANNEL_UNIT,
src + b * plane * ARMV82_CHANNEL_UNIT,
slopeHalf,
plane);
}
MNN_CONCURRENCY_END();
}
MNN_CONCURRENCY_END();

return NO_ERROR;
}
Expand Down Expand Up @@ -144,7 +182,7 @@ ErrorCode Arm82PRelu::onExecute(const std::vector<Tensor *> &inputs, const std::
MNN_CONCURRENCY_BEGIN(tId, mThreadNumbers)
for (int b = tId; b < batchAndChannel; ++b) {
auto curChannel = b % channelDivUnit;
_MNNArm82ReluWithChannel(dstPtr + b * plane * ARMV82_CHANNEL_UNIT, srcPtr + b * plane * ARMV82_CHANNEL_UNIT,
_MNNArm82PReluWithChannel(dstPtr + b * plane * ARMV82_CHANNEL_UNIT, srcPtr + b * plane * ARMV82_CHANNEL_UNIT,
slopePtr + curChannel * ARMV82_CHANNEL_UNIT, plane);
}
MNN_CONCURRENCY_END();
Expand Down
1 change: 1 addition & 0 deletions source/backend/vulkan/execution/VulkanConvolutionImpl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -290,6 +290,7 @@ class VulkanConvolutionIm2Col : public VulkanBasicExecution {
mIm2Col->bind(cmdBuffer->get(), mIm2ColSet[index]->get());
vkCmdDispatch(cmdBuffer->get(), UP_DIV(totalNumberInput, VulkanConvolutionCommon::gImage2ColLocal),
1, 1);
cmdBuffer->barrierImageIfNeeded(colImage, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL);
}
mMultilers[index]->compute(cmdBuffer);
if (true) {
Expand Down
17 changes: 13 additions & 4 deletions source/geometry/GeometryConv2D.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -296,11 +296,20 @@ class GeometryConvTranspose2D : public GeometryConv2D {
}
virtual bool onCompute(const Op* op, const std::vector<Tensor*>& inputs, const std::vector<Tensor*>& outputs,
Context& context, CommandBuffer& res) const override {
if (inputs.size() == 1) {
// Origin convolution with format converter
return GeometryConvUtils::computeSingle(op, inputs, outputs, context, res);
if (op->main_as_Convolution2D()->common()->hasOutputShape()) {
const std::vector<Tensor*> newInputs(inputs.begin(), inputs.end() - 1);
if (newInputs.size() == 1) {
// Origin convolution with format converter
return GeometryConvUtils::computeSingle(op, newInputs, outputs, context, res);
}
return computeGEMM_Col2Im(op, newInputs, outputs, context, res);
} else {
if (inputs.size() == 1) {
// Origin convolution with format converter
return GeometryConvUtils::computeSingle(op, inputs, outputs, context, res);
}
return computeGEMM_Col2Im(op, inputs, outputs, context, res);
}
return computeGEMM_Col2Im(op, inputs, outputs, context, res);
}
};
static void _create() {
Expand Down
4 changes: 4 additions & 0 deletions source/shape/ShapeConvolution.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,10 @@ class ConvolutionSizeComputer : public SizeComputer {
int output_height = 1;

auto input = inputs[0];
if (input->dimensions() <= 1) {
// Convolution is not valid for dimension <= 1
return false;
}
// For Tensorflow Group Convolution, the inputCount is the size of filter's input count
if (layer->inputCount() > 0 && input->channel() % layer->inputCount() != 0 && OpType_Convolution == op->type()) {
MNN_ERROR("Error for compute convolution shape, need channel = %d, input channel = %d\n", layer->inputCount(), input->channel());
Expand Down
21 changes: 18 additions & 3 deletions source/shape/ShapeDeconvolution.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,18 @@ class DeconvolutionSizeComputer : public SizeComputer {
auto layer = op->main_as_Convolution2D()->common();

auto inputTensor = inputs[0];
int outputHeight = 0, outputWidth = 0;
if (layer->hasOutputShape()) {
MNN_ASSERT(inputs.size() >= 2);
auto outputShape = inputs.back();
if (outputShape->length(0) > 2) {
outputHeight = outputShape->host<int>()[1];
outputWidth = outputShape->host<int>()[2];
} else {
outputHeight = outputShape->host<int>()[0];
outputWidth = outputShape->host<int>()[1];
}
}

int input_width = inputTensor->width();
int input_height = inputTensor->height();
Expand All @@ -29,9 +41,12 @@ class DeconvolutionSizeComputer : public SizeComputer {
int dW = layer->dilateX();
int output_width;
int output_height;
auto format = TensorUtils::getDescribe(inputs[0])->dimensionFormat;
auto format = TensorUtils::getDescribe(inputTensor)->dimensionFormat;

if (layer->padMode() == PadMode_SAME) { // Tensorflow support
if (outputHeight > 0 && outputWidth > 0) {
output_width = outputWidth;
output_height = outputHeight;
} else if (layer->padMode() == PadMode_SAME) { // Tensorflow support
output_width = input_width * sW;
output_height = input_height * sH;
} else {
Expand All @@ -50,7 +65,7 @@ class DeconvolutionSizeComputer : public SizeComputer {
}

auto& outputBuffer = outputs[0]->buffer();
outputBuffer.type = inputs[0]->getType();
outputBuffer.type = inputTensor->getType();
outputBuffer.dimensions = inputTensor->buffer().dimensions;
outputBuffer.dim[0].extent = inputTensor->buffer().dim[0].extent;
if (MNN_DATA_FORMAT_NHWC == format) {
Expand Down

0 comments on commit 6ac98b5

Please sign in to comment.