Skip to content
This repository has been archived by the owner on Nov 17, 2023. It is now read-only.

TVM bridge support to JIT NDArray Function by TVM #9880

Merged
merged 6 commits into from Feb 27, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
2 changes: 1 addition & 1 deletion CMakeLists.txt
Expand Up @@ -234,6 +234,7 @@ include_directories("include")
include_directories("mshadow")
include_directories("3rdparty/cub")
include_directories("nnvm/include")
include_directories("nnvm/tvm/include")
include_directories("dmlc-core/include")
include_directories("dlpack/include")

Expand Down Expand Up @@ -696,4 +697,3 @@ endif()
set(LINT_DIRS "include src plugin cpp-package tests")
set(EXCLUDE_PATH "src/operator/contrib/ctc_include")
add_custom_target(mxnet_lint COMMAND ${CMAKE_COMMAND} -DMSVC=${MSVC} -DPYTHON_EXECUTABLE=${PYTHON_EXECUTABLE} -DLINT_DIRS=${LINT_DIRS} -DPROJECT_SOURCE_DIR=${CMAKE_CURRENT_SOURCE_DIR} -DPROJECT_NAME=mxnet -DEXCLUDE_PATH=${EXCLUDE_PATH} -P ${CMAKE_CURRENT_SOURCE_DIR}/dmlc-core/cmake/lint.cmake)

10 changes: 5 additions & 5 deletions Jenkinsfile
Expand Up @@ -37,12 +37,12 @@ def init_git() {
deleteDir()
retry(5) {
try {
// Make sure wait long enough for api.github.com request quota. Important: Don't increase the amount of
// Make sure wait long enough for api.github.com request quota. Important: Don't increase the amount of
// retries as this will increase the amount of requests and worsen the throttling
timeout(time: 15, unit: 'MINUTES') {
checkout scm
sh 'git submodule update --init'
sh 'git clean -d -f'
sh 'git submodule update --init --recursive'
sh 'git clean -d -f'
}
} catch (exc) {
deleteDir()
Expand All @@ -60,8 +60,8 @@ def init_git_win() {
// retries as this will increase the amount of requests and worsen the throttling
timeout(time: 15, unit: 'MINUTES') {
checkout scm
bat 'git submodule update --init'
bat 'git clean -d -f'
bat 'git submodule update --init --recursive'
bat 'git clean -d -f'
}
} catch (exc) {
deleteDir()
Expand Down
4 changes: 2 additions & 2 deletions Makefile
Expand Up @@ -91,7 +91,7 @@ ifeq ($(DEBUG), 1)
else
CFLAGS += -O3 -DNDEBUG=1
endif
CFLAGS += -I$(ROOTDIR)/mshadow/ -I$(ROOTDIR)/dmlc-core/include -fPIC -I$(NNVM_PATH)/include -I$(DLPACK_PATH)/include -Iinclude $(MSHADOW_CFLAGS)
CFLAGS += -I$(ROOTDIR)/mshadow/ -I$(ROOTDIR)/dmlc-core/include -fPIC -I$(NNVM_PATH)/include -I$(DLPACK_PATH)/include -I$(NNVM_PATH)/tvm/include -Iinclude $(MSHADOW_CFLAGS)
LDFLAGS = -pthread $(MSHADOW_LDFLAGS) $(DMLC_LDFLAGS)
ifeq ($(DEBUG), 1)
NVCCFLAGS += -std=c++11 -Xcompiler -D_FORCE_INLINES -g -G -O0 -ccbin $(CXX) $(MSHADOW_NVCCFLAGS)
Expand Down Expand Up @@ -356,7 +356,7 @@ ifeq ($(USE_CUDA), 1)
LDFLAGS += -lcuda -lnvrtc
CFLAGS += -DMXNET_ENABLE_CUDA_RTC=1
endif
# Make sure to add stubs as fallback in order to be able to build
# Make sure to add stubs as fallback in order to be able to build
# without full CUDA install (especially if run without nvidia-docker)
LDFLAGS += -L/usr/local/cuda/lib64/stubs
SCALA_PKG_PROFILE := $(SCALA_PKG_PROFILE)-gpu
Expand Down
10 changes: 10 additions & 0 deletions include/mxnet/tensor_blob.h
Expand Up @@ -36,8 +36,18 @@
#include <utility>
#include <algorithm>
#include "./base.h"

namespace mxnet {

// redefine DLPack enumeration to be backward compatible.
constexpr const int kCPU = kDLCPU;
constexpr const int kGPU = kDLGPU;
// extension type code under TVM function.
// Currently NNVM reserved 16 to 19 type code from TVM
// 16, 17, 18 is used by NNVM compiler already.
// Pick code 19 for MXNet NDArray
constexpr const int kTVMNDArrayTypeCode = 19;

/* Forward declaration for friend declaration in TBlob */
class NDArray;

Expand Down
2 changes: 1 addition & 1 deletion nnvm
Submodule nnvm updated 73 files
+7 −0 .gitignore
+1 −0 CMakeLists.txt
+2 −2 Makefile
+1 −1 dmlc-core
+6 −1 docs/api/python/frontend.rst
+26 −1 docs/top.rst
+11 −10 include/nnvm/c_api.h
+33 −0 include/nnvm/compiler/util.h
+2 −1 include/nnvm/node.h
+2 −1 include/nnvm/op.h
+1 −1 include/nnvm/symbolic.h
+33 −0 include/nnvm/top/nn.h
+103 −11 include/nnvm/top/tensor.h
+3 −0 make/config.mk
+2 −2 python/nnvm/_base.py
+7 −4 python/nnvm/compiler/build_module.py
+6 −0 python/nnvm/compiler/graph_attr.py
+57 −0 python/nnvm/compiler/graph_util.py
+1 −0 python/nnvm/frontend/__init__.py
+35 −0 python/nnvm/frontend/common.py
+1 −40 python/nnvm/frontend/coreml.py
+498 −0 python/nnvm/frontend/keras.py
+7 −0 python/nnvm/frontend/mxnet.py
+119 −10 python/nnvm/frontend/onnx.py
+4 −15 python/nnvm/testing/resnet.py
+3 −10 python/nnvm/testing/vgg.py
+16 −0 python/nnvm/top/attr_dict.py
+31 −72 python/nnvm/top/nn.py
+0 −3 python/nnvm/top/reduction.py
+1 −1 python/nnvm/top/registry.py
+0 −35 python/nnvm/top/tensor.py
+1 −42 python/nnvm/top/transform.py
+89 −27 src/compiler/compile_engine.cc
+7 −4 src/compiler/compile_engine.h
+1 −1 src/compiler/fold_scale_axis.cc
+9 −3 src/compiler/graph_fuse.cc
+1 −1 src/compiler/graph_hash.cc
+1 −0 src/compiler/packed_func_ext.cc
+9 −0 src/core/symbolic.cc
+8 −3 src/pass/gradient.cc
+63 −0 src/top/elemwise_op_common.h
+38 −3 src/top/nn/convolution.cc
+221 −3 src/top/nn/nn.cc
+21 −7 src/top/nn/nn_common.h
+77 −0 src/top/nn/pooling.cc
+54 −0 src/top/nn/upsampling.cc
+96 −2 src/top/op_common.h
+22 −0 src/top/tensor/broadcast.cc
+515 −19 src/top/tensor/elemwise.cc
+138 −0 src/top/tensor/matrix_op.cc
+101 −14 src/top/tensor/reduce.cc
+188 −5 src/top/tensor/transform.cc
+3 −0 tests/ci_build/Dockerfile.gpu
+1 −0 tests/ci_build/install/ubuntu_install_keras.sh
+1 −1 tests/cpp/tuple_test.cc
+56 −0 tests/python/compiler/test_nhwc_layout.py
+2 −2 tests/python/compiler/test_rpc_exec.py
+179 −112 tests/python/compiler/test_top_level1.py
+20 −0 tests/python/compiler/test_top_level2.py
+159 −0 tests/python/frontend/keras/test_forward.py
+2 −1 tests/python/frontend/onnx/model_zoo/__init__.py
+8 −4 tests/python/frontend/onnx/test_forward.py
+19 −0 tests/python/unittest/test_graph.py
+136 −0 tests/python/unittest/test_graph_gradient.py
+16 −0 tests/python/unittest/test_infer_shape.py
+3 −0 tests/scripts/task_frontend_test.sh
+193 −0 tutorials/define_and_compile_model.py
+235 −0 tutorials/deploy_model_on_mali_gpu.py
+9 −12 tutorials/deploy_model_on_rasp.py
+4 −2 tutorials/from_coreml.py
+114 −0 tutorials/from_keras.py
+218 −0 tutorials/using_external_lib.py
+1 −1 tvm
7 changes: 7 additions & 0 deletions python/mxnet/ndarray/ndarray.py
Expand Up @@ -174,8 +174,15 @@ class NDArray(NDArrayBase):
__slots__ = []
# make numpy functions return NDArray instead of numpy object array
__array_priority__ = 1000.0
# Extension type code for TVM function.
# See C++ side of definition(kTVMNDArrayTypeCode) at include/mxmet/tensor_blob.h
_tvm_tcode = 19
# pylint: disable= no-member, undefined-variable

@property
def _tvm_handle(self):
return self.handle.value
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

what's this for?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is a handle exposed for PackedFunc convention interface of TVM, to allow arbitrary positional arguments calls without adding new C API. Specifically, the wrapped function is a TVM PackedFunc that will recognize NDArray as an extension object, and pass the address of NDArray handles correctly to the arguments.

It is later received in here https://github.com/apache/incubator-mxnet/pull/9880/files#diff-3aa2a3c799e125e086769bc1d5f6490aR74


def __repr__(self):
"""Returns a string representation of the array."""
shape_info = 'x'.join(['%d' % x for x in self.shape])
Expand Down
180 changes: 180 additions & 0 deletions src/nnvm/tvm_bridge.cc
@@ -0,0 +1,180 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/

/*!
* \file tvm_bridge.cc
* \brief Bridge to run TVM's PackedFunc in MXNet's async engine.
*
* This bridge is mainly used to expose MXNet's async engine push to
* TVM. It only uses TVM runtime in aheader only mode, which means
* there is no link dependencies.
*
* Support for TVM is optional even when this code
* is always compiled and built with the project.
* We choose this strategy because we do not yet want
* llvm as dependency(which TVM uses). So instead we expose hook
* to TVM and let user use this feature when they have TVM installed.
*
* We do require TVM and MXNet to be built with same C++ ABI of std::function
*/
#define TVM_RUNTIME_HEADER_ONLY 1
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

should you undefine it at the end?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

since it is in cc file, this is not necessary

#include <tvm/runtime/packed_func.h>
#include <mxnet/c_api.h>
#include <mxnet/ndarray.h>
#include <mxnet/engine.h>

#include <memory>

namespace mxnet {

using tvm::runtime::PackedFunc;
using tvm::runtime::TVMArgs;
using tvm::runtime::TVMRetValue;

/*!
* \brief Async functor object
* calling argument of the function.
*/
class TVMFunctor {
public:
// constructor
explicit TVMFunctor(PackedFunc func, PackedFunc fset_stream)
: func_(func), fset_stream_(fset_stream) {}

void Init(const TVMArgs& args,
const std::vector<int>& const_loc,
std::vector<Engine::VarHandle>* const_vars,
std::vector<Engine::VarHandle>* mutate_vars) {
values_.clear();
type_codes_.clear();
values_.insert(values_.end(), args.values, args.values + args.size());
type_codes_.insert(
type_codes_.end(), args.type_codes, args.type_codes + args.size());

size_t const_loc_ptr = 0;
for (int i = 0; i < args.size(); ++i) {
if (args.type_codes[i] == kTVMNDArrayTypeCode) {
const NDArray& nd =
static_cast<NDArray*>(args.values[i].v_handle)[0];
// We cannot set the value until
type_codes_[i] = kArrayHandle;
array_data_.push_back(nd);
array_loc_.push_back(i);
// check if there is read or mutate
// by default assume we mutate the array.
if (const_loc_ptr < const_loc.size() &&
i == const_loc[const_loc_ptr]) {
const_vars->push_back(nd.var());
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

is this called a lot in performance-sensitive areas? should we do a reserve()?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

(for all vectors here)

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

we don't know the size of vector before hand

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ik

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ok

++const_loc_ptr;
} else {
mutate_vars->push_back(nd.var());
}
} else {
CHECK_LT(args.type_codes[i], kTVMType)
<< "Only allow POD type in mxnet async call";
}
}
}

Context ctx() {
return array_data_[0].ctx();
}

void Run(const RunContext& rctx) {
// setup DLTensor
for (size_t i = 0; i < array_loc_.size(); ++i) {
values_[array_loc_[i]].v_handle =
const_cast<DLTensor*>(&(array_data_[i].data().dltensor()));
}
// run the packed function
TVMRetValue rv;
TVMArgs args(&values_[0], &type_codes_[0], values_.size());
if (ctx().dev_type == Context::kGPU) {
#if MXNET_USE_CUDA
// pass stream via last argument.
void* strm = static_cast<void*>(rctx.get_stream<gpu>()->stream_);
int dev_type = kDLGPU;
fset_stream_(dev_type, rctx.ctx.dev_id, strm);
func_.CallPacked(args, &rv);
fset_stream_(dev_type, rctx.ctx.dev_id, nullptr);
#else
LOG(FATAL) << "Please compile with CUDA enabled for cuda features";
#endif
} else {
func_.CallPacked(args, &rv);
}
}

private:
/*! \brief The function */
PackedFunc func_;
/*! \brief Set stream */
PackedFunc fset_stream_;
/*! \brief Values field */
std::vector<TVMValue> values_;
/*! \brief type code field */
std::vector<int> type_codes_;
/*! \brief arrays field */
std::vector<NDArray> array_data_;
/*! \brief position of array in arguments */
std::vector<int> array_loc_;
};


// Wrap a TVM function to a function that invokes MXNet's Engine
// It does two things: call the engine properly
// set up the NDArray to DLTensor during invocation.
void WrapAsyncCall(TVMArgs wrap_args, TVMRetValue* wrap_rv) {
PackedFunc f = wrap_args[0];
PackedFunc fset_stream = wrap_args[1];
int num_const = wrap_args[2];

// sorted position of constant arguments
std::vector<int> const_loc;
for (int i = 0; i < num_const; ++i) {
const_loc.push_back(wrap_args[i + 3].operator int());
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

reserve?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is not on critical path(function construction instead of running)

}
std::sort(const_loc.begin(), const_loc.end());
// wrapped function
// This is the function that called by the user.
auto wrapped = [f, fset_stream, const_loc](TVMArgs args, TVMRetValue* rv) {
std::shared_ptr<TVMFunctor> func =
std::make_shared<TVMFunctor>(f, fset_stream);
std::vector<Engine::VarHandle> const_vars, mutate_vars;
func->Init(args, const_loc, &const_vars, &mutate_vars);
Engine *engine = Engine::Get();
engine->DeduplicateVarHandle(&const_vars, &mutate_vars);
engine->PushSync([func](RunContext ctx) {
func->Run(ctx);
}, func->ctx(), const_vars, mutate_vars);
};
*wrap_rv = PackedFunc(wrapped);
}

} // namespace mxnet

// C callback that can be used by TVM to extract
// the WrapAsyncCall function.
extern "C" MXNET_DLL int MXTVMBridge(TVMFunctionHandle pregister) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this a CAPI? Should it be put in the c api folder?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this is queried by TVM, so not publicly facing C API. I feel that it is better to put in here. but we can move to c_api folder

using tvm::runtime::PackedFunc;
const PackedFunc& fregister =
*static_cast<PackedFunc*>(pregister);
fregister("WrapAsyncCall", PackedFunc(mxnet::WrapAsyncCall));
return 0;
}
6 changes: 6 additions & 0 deletions tests/ci_build/Dockerfile.gpu
Expand Up @@ -12,3 +12,9 @@ COPY install/ubuntu_install_r.sh /install/
RUN /install/ubuntu_install_r.sh
COPY install/ubuntu_install_perl.sh /install/
RUN /install/ubuntu_install_perl.sh

COPY install/ubuntu_install_llvm.sh /install/
RUN /install/ubuntu_install_llvm.sh

COPY install/ubuntu_install_tvm.sh /install/
RUN /install/ubuntu_install_tvm.sh
28 changes: 28 additions & 0 deletions tests/ci_build/install/ubuntu_install_llvm.sh
@@ -0,0 +1,28 @@
#!/usr/bin/env bash

# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.



echo deb http://apt.llvm.org/xenial/ llvm-toolchain-xenial-5.0 main\
>> /etc/apt/sources.list.d/llvm.list
echo deb-src http://apt.llvm.org/xenial/ llvm-toolchain-xenial-5.0 main\
>> /etc/apt/sources.list.d/llvm.list

wget -O - http://apt.llvm.org/llvm-snapshot.gpg.key|sudo apt-key add -
apt-get update && apt-get install -y --force-yes llvm-5.0
44 changes: 44 additions & 0 deletions tests/ci_build/install/ubuntu_install_tvm.sh
@@ -0,0 +1,44 @@
#!/usr/bin/env bash

# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.

# Build and install TVM
cd /tmp
git clone https://github.com/dmlc/tvm/ --recursive
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Are you aware that the result of this script is being cached indefinitely? In that case, it would be better to specify a stable version instead of Master as otherwise environments may differ on different slaves

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

i am aware of that, change to used a fixed tag

cd tvm

# This is a stable tag that support MXNet TVM bridge.
# We use this since support for mxnet bridge just checked
# into master and there is yet a version tag
git checkout 30eaf463e34d7c301357c31a010945d11df16537

cp make/config.mk
echo USE_CUDA=1 >> config.mk
echo LLVM_CONFIG=llvm-config-5.0 >> config.mk
echo USE_RPC=1 >> config.mk
echo USE_GRAPH_RUNTIME=1 >> config.mk
echo CUDA_PATH=/usr/local/cuda >> config.mk
make -j`nproc`

cd python
python setup.py install
cd -

cd topi/python
python setup.py install
cd -