Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use RTLD_DEEPBIND for TF+ZenDNN #111

Merged
merged 4 commits into from
Jan 12, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
12 changes: 5 additions & 7 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ endif()
project(
amdinfer
VERSION ${ver_major}.${ver_minor}.${ver_patch}
LANGUAGES CXX
LANGUAGES C CXX
DESCRIPTION "AMDinfer inference library and server"
)

Expand All @@ -54,12 +54,10 @@ if(CMAKE_PROJECT_NAME STREQUAL PROJECT_NAME)
set(CMAKE_POSITION_INDEPENDENT_CODE ON)
set(CMAKE_CXX_LINKER_WRAPPER_FLAG "-Wl,")
add_link_options("LINKER:--as-needed")
if(NOT SKBUILD)
# For manylinux builds, the Python extension is intentionally not linked
# against Python at compile time so we need to allow undefined symbols
add_link_options("LINKER:--no-undefined")
add_link_options("LINKER:--no-allow-shlib-undefined")
endif()
# cannot use linker options like --no-undefined and --no-allow-shlib-undefined
# because for manylinux builds, the Python extension is intentionally not
# linked against Python at compile time so we need to allow undefined symbols.
# The workers are also not fully linked either.
set(CMAKE_CONFIGURATION_TYPES "Debug;Release;Coverage"
CACHE STRING "Available build-types: Debug, Release and Coverage"
)
Expand Down
8 changes: 4 additions & 4 deletions docker/generate.py
Original file line number Diff line number Diff line change
Expand Up @@ -496,8 +496,8 @@ def build_tfzendnn():
&& mkdir -p ${COPY_DIR}/usr/include/tfzendnn/ \\
&& mkdir -p ${COPY_DIR}/usr/lib \\
# copy and list files that are copied
&& cp -rv include/* ${COPY_DIR}/usr/include/tfzendnn | cut -d"'" -f 4 > ${MANIFESTS_DIR}/tfzendnn.txt \\
&& cp -rv lib/*.so* ${COPY_DIR}/usr/lib | cut -d"'" -f 4 >> ${MANIFESTS_DIR}/tfzendnn.txt"""
&& cp -rv include/* ${COPY_DIR}/usr/include/tfzendnn | cut -d"'" -f 2 | sed 's/include/\/usr\/include\/tfzendnn/' > ${MANIFESTS_DIR}/tfzendnn.txt \\
&& cp -rv lib/*.so* ${COPY_DIR}/usr/lib | cut -d"'" -f 2 | sed 's/lib/\/usr\/lib/' >> ${MANIFESTS_DIR}/tfzendnn.txt"""
)


Expand All @@ -514,8 +514,8 @@ def build_ptzendnn():
&& mkdir -p ${COPY_DIR}/usr/include/ptzendnn/ \\
&& mkdir -p ${COPY_DIR}/usr/lib \\
# copy and list files that are copied
&& cp -rv include/* ${COPY_DIR}/usr/include/ptzendnn | cut -d"'" -f 4 > ${MANIFESTS_DIR}/ptzendnn.txt \\
&& cp -rv lib/*.so* ${COPY_DIR}/usr/lib | cut -d"'" -f 4 >> ${MANIFESTS_DIR}/ptzendnn.txt
&& cp -rv include/* ${COPY_DIR}/usr/include/ptzendnn | cut -d"'" -f 2 | sed 's/include/\/usr\/include\/ptzendnn/' > ${MANIFESTS_DIR}/ptzendnn.txt \\
&& cp -rv lib/*.so* ${COPY_DIR}/usr/lib | cut -d"'" -f 2 | sed 's/lib/\/usr\/lib/' >> ${MANIFESTS_DIR}/ptzendnn.txt

# build jemalloc 5.3.0. Build uses autoconf implicitly
RUN VERSION=5.3.0 \\
Expand Down
19 changes: 19 additions & 0 deletions docker/get_dynamic_dependencies.sh
Original file line number Diff line number Diff line change
Expand Up @@ -197,6 +197,19 @@ add_migraphx_deps() {
done
}

add_tfzendnn_deps() {
# these files are opened with dlopen in the tfzendnn worker and so don't show
# up with ldd
other_files=(
/lib/libiomp5.so
/lib/libtensorflow_cc.so
)

for file in ${other_files[@]}; do
get_dependencies $file
done
}

add_other_bins() {
# any other binary dependencies needed

Expand Down Expand Up @@ -234,6 +247,7 @@ main() {
COPY=""
VITIS=""
MIGRAPHX=""
TFZENDNN=""

# Parse Options
while true; do
Expand All @@ -248,6 +262,7 @@ main() {
"-c" | "--copy" ) COPY=$2 ; shift 2 ;;
"--vitis" ) VITIS=$2 ; shift 2 ;;
"--migraphx" ) MIGRAPHX=$2; shift 2 ;;
"--tfzendnn" ) TFZENDNN=$2; shift 2 ;;
*) break;;
esac
done
Expand All @@ -265,6 +280,10 @@ main() {
add_migraphx_deps
fi

if [[ $TFZENDNN == "yes" ]]; then
add_tfzendnn_deps
fi

add_other_bins

remove_duplicates
Expand Down
6 changes: 4 additions & 2 deletions docker/template.dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -602,6 +602,7 @@ ARG MANIFESTS_DIR
ARG AMDINFER_ROOT
ARG ENABLE_VITIS
ARG ENABLE_MIGRAPHX
ARG ENABLE_TFZENDNN

COPY . $AMDINFER_ROOT

Expand All @@ -615,8 +616,9 @@ RUN ldconfig \
&& ./amdinfer install --get-manifest > ${MANIFESTS_DIR}/amdinfer.txt \
# get all the runtime shared library dependencies for the server
&& cd ${AMDINFER_ROOT} \
&& ./docker/get_dynamic_dependencies.sh --vitis ${ENABLE_VITIS} > ${MANIFESTS_DIR}/prod.txt \
&& ./docker/get_dynamic_dependencies.sh --copy ${COPY_DIR} --vitis ${ENABLE_VITIS}
# --migraphx is not passed since it's installed from debians below
&& ./docker/get_dynamic_dependencies.sh --vitis ${ENABLE_VITIS} --tfzendnn ${ENABLE_TFZENDNN} > ${MANIFESTS_DIR}/prod.txt \
&& ./docker/get_dynamic_dependencies.sh --copy ${COPY_DIR} --vitis ${ENABLE_VITIS} --tfzendnn ${ENABLE_TFZENDNN}

FROM ${BASE_IMAGE} AS vitis_installer_prod_yes

Expand Down
42 changes: 9 additions & 33 deletions examples/resnet50/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -15,19 +15,25 @@
list(APPEND examples)

if(${AMDINFER_ENABLE_VITIS})
list(APPEND examples vitis)
if(${AMDINFER_ENABLE_REST})
list(APPEND examples vitis)
endif()
endif()

if(${AMDINFER_ENABLE_PTZENDNN})
list(APPEND examples ptzendnn)
endif()

if(${AMDINFER_ENABLE_TFZENDNN})
list(APPEND examples tfzendnn)
if(${AMDINFER_ENABLE_GRPC})
list(APPEND examples tfzendnn)
endif()
endif()

if(${AMDINFER_ENABLE_MIGRAPHX})
list(APPEND examples migraphx)
if(${AMDINFER_ENABLE_REST})
list(APPEND examples migraphx)
endif()
endif()

foreach(example ${examples})
Expand All @@ -37,33 +43,3 @@ foreach(example ${examples})
${target} PRIVATE opencv_imgcodecs opencv_imgproc opencv_core
)
endforeach()

# if(${AMDINFER_ENABLE_TFZENDNN})
# add_executable(tf_zendnn_client tf_zendnn_client.cpp)

# target_link_libraries(
# tf_zendnn_client PRIVATE amdinfer util opencv_imgcodecs opencv_imgproc
# opencv_core
# )
# target_include_directories(tf_zendnn_client PRIVATE ${AMDINFER_INCLUDE_DIRS})
# endif()

# if(${AMDINFER_ENABLE_PTZENDNN})
# add_executable(pt_zendnn_client pt_zendnn_client.cpp)

# target_link_libraries(
# pt_zendnn_client PRIVATE amdinfer util opencv_imgcodecs opencv_imgproc
# opencv_core
# )
# target_include_directories(pt_zendnn_client PRIVATE ${AMDINFER_INCLUDE_DIRS})
# endif()

# if(${AMDINFER_ENABLE_MIGRAPHX})
# add_executable(migraphx_client migraphx_client.cpp)

# target_link_libraries(
# migraphx_client PRIVATE amdinfer util opencv_imgcodecs opencv_imgproc
# opencv_core migraphx::c
# )
# target_include_directories(migraphx_client PRIVATE ${AMDINFER_INCLUDE_DIRS})
# endif()
2 changes: 1 addition & 1 deletion examples/resnet50/tfzendnn.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -115,7 +115,7 @@ std::string load(const amdinfer::Client* client, const Args& args) {
// a particular backend. This guard checks to make sure the server does
// support the requested backend. If you already know it's supported, you can
// skip this check.
if (!serverHasExtension(client, "ptzendnn")) {
if (!serverHasExtension(client, "tfzendnn")) {
std::cerr
<< "TF+ZenDNN is not enabled. Please recompile with it enabled to "
<< "run this example\n";
Expand Down
22 changes: 14 additions & 8 deletions src/amdinfer/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ set_target_properties(
)

target_link_libraries(
amdinfer-server-exe PRIVATE batching clients core observation util servers
amdinfer-server-exe PRIVATE amdinfer Jsoncpp_lib Drogon::Drogon
)

add_subdirectory(batching)
Expand Down Expand Up @@ -55,23 +55,29 @@ add_library(
${HELPER_OBJS}
${OBSERVATION_OBJS}
${SERVER_OBJS}
${UTIL_OBJS}
)
target_link_libraries(
amdinfer PRIVATE batching clients core observation util servers
amdinfer PRIVATE batching clients core observation servers util
)

add_library(
amdinfer-server ${TYPE} ${BATCHING_OBJS} ${CORE_OBJS} ${HELPER_OBJS}
${OBSERVATION_OBJS} ${SERVER_OBJS}
)
target_link_libraries(
amdinfer-server PRIVATE batching core observation util servers
amdinfer-server
${TYPE}
${BATCHING_OBJS}
${CORE_OBJS}
${HELPER_OBJS}
${OBSERVATION_OBJS}
${SERVER_OBJS}
${UTIL_OBJS}
)
target_link_libraries(amdinfer-server PRIVATE batching core observation servers)

add_library(
amdinfer-client ${TYPE} ${CLIENT_OBJS} ${CORE_OBJS} ${OBSERVATION_OBJS}
${UTIL_OBJS}
)
target_link_libraries(amdinfer-client PRIVATE clients core observation util)
target_link_libraries(amdinfer-client PRIVATE clients core observation)

set(output_libraries amdinfer amdinfer-server amdinfer-client)
foreach(lib ${output_libraries})
Expand Down
5 changes: 1 addition & 4 deletions src/amdinfer/bindings/python/clients/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -29,10 +29,7 @@ foreach(target ${targets})
${target}_py PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/..
${AMDINFER_INCLUDE_DIRS} ${Python_INCLUDE_DIRS}
)
target_link_libraries(
${target}_py PRIVATE ${Python_LIBRARIES} ${target}
$<TARGET_OBJECTS:${target}>
)
target_link_libraries(${target}_py PRIVATE ${Python_LIBRARIES} amdinfer)
add_dependencies(${target}_py pybind11Mkdoc)

set(LINK_TARGETS ${LINK_TARGETS} ${target}_py)
Expand Down
5 changes: 1 addition & 4 deletions src/amdinfer/bindings/python/core/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -33,10 +33,7 @@ foreach(target ${TARGETS})
${target}_py PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/..
${AMDINFER_INCLUDE_DIRS} ${Python_INCLUDE_DIRS}
)
target_link_libraries(
${target}_py PRIVATE ${Python_LIBRARIES} ${target}
$<TARGET_OBJECTS:${target}>
)
target_link_libraries(${target}_py PRIVATE ${Python_LIBRARIES} amdinfer)
add_dependencies(${target}_py pybind11Mkdoc)

set(LINK_TARGETS ${LINK_TARGETS} ${target}_py)
Expand Down
5 changes: 1 addition & 4 deletions src/amdinfer/bindings/python/servers/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -23,10 +23,7 @@ foreach(target ${TARGETS})
${target}_py PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/..
${AMDINFER_INCLUDE_DIRS} ${Python_INCLUDE_DIRS}
)
target_link_libraries(
${target}_py PRIVATE ${Python_LIBRARIES} ${target}
$<TARGET_OBJECTS:${target}>
)
target_link_libraries(${target}_py PRIVATE ${Python_LIBRARIES} amdinfer)
add_dependencies(${target}_py pybind11Mkdoc)

set(LINK_TARGETS ${LINK_TARGETS} ${target}_py)
Expand Down
12 changes: 12 additions & 0 deletions src/amdinfer/bindings/python/src/amdinfer/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,11 +15,23 @@

import functools
import multiprocessing as mp
import os
import sys
import time

# By default, Python uses only RTLD_NOW as the dlopen flag. When using
# RTLD_DEEPBIND to open shared libraries from the inference server, specifying
# RTLD_GLOBAL is needed for the loaded library to resolve symbols from the main
# library. RTLD_LAZY is added to match the settings used for dlopen in C++.
# RTLD_DEEPBIND cannot be added here. You get a segmentation fault from Pybind11
# if you do so.
flags = sys.getdlopenflags()
sys.setdlopenflags(os.RTLD_GLOBAL | os.RTLD_LAZY)

from ._amdinfer import *

sys.setdlopenflags(flags)


def _set_data(input_n, image):
if input_n.datatype == DataType.UINT8:
Expand Down
18 changes: 17 additions & 1 deletion src/amdinfer/core/worker_info.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,23 @@ void* findFunc(const std::string& func, const std::string& so_path) {
// reset errors
dlerror();

/* open the needed object */
/*
Open the needed object. The dlopen flags used here:
- RTLD_LOCAL: the symbols are not made available to other loaded libs
- RTLD_LAZY: resolve symbols as needed. We only need one anyway
Adding RTLD_DEEPBIND here creates problems:
- Cannot use std::cout in the library
(https://gcc.gnu.org/bugzilla/show_bug.cgi?id=42679)
- std::regex gives a segfault
There are many SO posts reporting problems related to issues with DEEPBIND:
- https://stackoverflow.com/a/49018967
- https://stackoverflow.com/q/63666660
The motivation to add DEEPBIND is to isolate the loaded workers. For example,
if the library is using a different version of a library that we are already
using, it can link to the wrong version. Another option for isolating the
workers is dlmopen but that also should not be used here due to its own set of
issues (https://stackoverflow.com/a/70043234).
*/
void* handle = dlopen(so_path.c_str(), RTLD_LOCAL | RTLD_LAZY);
if (handle == nullptr) {
const char* error_str = dlerror();
Expand Down
2 changes: 1 addition & 1 deletion src/amdinfer/servers/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ foreach(target ${targets})
list(APPEND SERVER_OBJS $<TARGET_OBJECTS:${target}>)
endforeach()

target_link_libraries(server PRIVATE Jsoncpp_lib core)
target_link_libraries(server PRIVATE Jsoncpp_lib core observation)

if(${AMDINFER_ENABLE_AKS})
target_link_libraries(server INTERFACE aks)
Expand Down
18 changes: 13 additions & 5 deletions src/amdinfer/servers/grpc_server.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -645,12 +645,17 @@ class GrpcServer final {
~GrpcServer() {
server_->Shutdown();
// Always shutdown the completion queues after the server.
for (auto& cq : cq_) {
for (const auto& cq : cq_) {
cq->Shutdown();
// drain the completion queue to prevent assertion errors in grpc
void* tag = nullptr;
bool ok = false;
while (cq->Next(&tag, &ok)) {
// drain the completion queue to prevent assertion errors in grpc
}
}
for (auto& thread : threads_) {
if (thread.joinable()) {
thread.join();
}
}
}
Expand All @@ -676,14 +681,12 @@ class GrpcServer final {
// Start threads to handle incoming RPCs
for (auto i = 0; i < cq_count; i++) {
threads_.emplace_back(&GrpcServer::handleRpcs, this, i);
// just detach threads for now to simplify shutdown
threads_.back().detach();
}
}

// This can be run in multiple threads if needed.
void handleRpcs(int index) {
auto& my_cq = cq_.at(index);
const auto& my_cq = cq_.at(index);

// Spawn a new CallData instance to serve new clients.
new CallDataServerLive(&service_, my_cq.get());
Expand All @@ -702,6 +705,11 @@ class GrpcServer final {
void* tag = nullptr; // uniquely identifies a request.
bool ok = false;
while (true) {
// the gRPC is shutting down in this case
if (my_cq == nullptr) {
return;
}

// Block waiting to read the next event from the completion queue. The
// event is uniquely identified by its tag, which in this case is the
// memory address of a CallDataBase instance.
Expand Down
2 changes: 2 additions & 0 deletions src/amdinfer/util/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -43,3 +43,5 @@ set_target_properties(util PROPERTIES OUTPUT_NAME amdinfer-util)
set_target_properties(
util PROPERTIES LIBRARY_OUTPUT_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/..
)

set(UTIL_OBJS ${UTIL_OBJS} PARENT_SCOPE)