diff --git a/Dockerfile b/Dockerfile index efa95301db..8adcd6c24b 100644 --- a/Dockerfile +++ b/Dockerfile @@ -30,6 +30,7 @@ RUN apt-get update && apt-get install -y --no-install-recommends \ unzip \ python \ python-pip \ + python3-pip \ libopencv-core-dev \ libopencv-highgui-dev \ libopencv-imgproc-dev \ @@ -42,9 +43,10 @@ RUN apt-get update && apt-get install -y --no-install-recommends \ RUN pip install --upgrade setuptools RUN pip install future +RUN pip3 install six # Install bazel -ARG BAZEL_VERSION=0.26.1 +ARG BAZEL_VERSION=1.1.0 RUN mkdir /bazel && \ wget --no-check-certificate -O /bazel/installer.sh "https://github.com/bazelbuild/bazel/releases/download/${BAZEL_VERSION}/b\ azel-${BAZEL_VERSION}-installer-linux-x86_64.sh" && \ diff --git a/WORKSPACE b/WORKSPACE index 000ff70f9e..c09ca03a53 100644 --- a/WORKSPACE +++ b/WORKSPACE @@ -12,17 +12,21 @@ http_archive( load("@bazel_skylib//lib:versions.bzl", "versions") versions.check(minimum_bazel_version = "0.24.1") -# ABSL cpp library. +# ABSL cpp library lts_2019_08_08. http_archive( name = "com_google_absl", - # Head commit on 2019-04-12. - # TODO: Switch to the latest absl version when the problem gets - # fixed. urls = [ - "https://github.com/abseil/abseil-cpp/archive/a02f62f456f2c4a7ecf2be3104fe0c6e16fbad9a.tar.gz", + "https://github.com/abseil/abseil-cpp/archive/20190808.tar.gz", ], - sha256 = "d437920d1434c766d22e85773b899c77c672b8b4865d5dc2cd61a29fdff3cf03", - strip_prefix = "abseil-cpp-a02f62f456f2c4a7ecf2be3104fe0c6e16fbad9a", + # Remove after https://github.com/abseil/abseil-cpp/issues/326 is solved. + patches = [ + "@//third_party:com_google_absl_f863b622fe13612433fdf43f76547d5edda0c93001.diff" + ], + patch_args = [ + "-p1", + ], + strip_prefix = "abseil-cpp-20190808", + sha256 = "8100085dada279bf3ee00cd064d43b5f55e5d913be0dfe2906f06f8f28d5b37e" ) http_archive( @@ -103,9 +107,9 @@ http_archive( ], ) -# 2019-11-12 -_TENSORFLOW_GIT_COMMIT = "a5f9bcd64453ff3d1f64cb4da4786db3d2da7f82" -_TENSORFLOW_SHA256= "f2b6f2ab2ffe63e86eccd3ce4bea6b7197383d726638dfeeebcdc1e7de73f075" +# 2019-11-21 +_TENSORFLOW_GIT_COMMIT = "f482488b481a799ca07e7e2d153cf47b8e91a60c" +_TENSORFLOW_SHA256= "8d9118c2ce186c7e1403f04b96982fe72c184060c7f7a93e30a28dca358694f0" http_archive( name = "org_tensorflow", urls = [ diff --git a/mediapipe/calculators/core/BUILD b/mediapipe/calculators/core/BUILD index 86cb285220..6de3c38288 100644 --- a/mediapipe/calculators/core/BUILD +++ b/mediapipe/calculators/core/BUILD @@ -691,6 +691,7 @@ cc_library( ":split_vector_calculator_cc_proto", "//mediapipe/framework:calculator_framework", "//mediapipe/framework/formats:landmark_cc_proto", + "//mediapipe/framework/formats:rect_cc_proto", "//mediapipe/framework/port:ret_check", "//mediapipe/framework/port:status", "//mediapipe/util:resource_util", diff --git a/mediapipe/calculators/core/begin_loop_calculator.cc b/mediapipe/calculators/core/begin_loop_calculator.cc index cc7f6b85ee..6c1ac20bf0 100644 --- a/mediapipe/calculators/core/begin_loop_calculator.cc +++ b/mediapipe/calculators/core/begin_loop_calculator.cc @@ -21,16 +21,10 @@ namespace mediapipe { -// A calculator to process std::vector. -typedef BeginLoopCalculator> - BeginLoopNormalizedLandmarkCalculator; -REGISTER_CALCULATOR(BeginLoopNormalizedLandmarkCalculator); - -// A calculator to process std::vector>. -typedef BeginLoopCalculator< - std::vector>> - BeginLoopNormalizedLandmarksVectorCalculator; -REGISTER_CALCULATOR(BeginLoopNormalizedLandmarksVectorCalculator); +// A calculator to process std::vector. +typedef BeginLoopCalculator> + BeginLoopNormalizedLandmarkListVectorCalculator; +REGISTER_CALCULATOR(BeginLoopNormalizedLandmarkListVectorCalculator); // A calculator to process std::vector. typedef BeginLoopCalculator> diff --git a/mediapipe/calculators/core/concatenate_vector_calculator.cc b/mediapipe/calculators/core/concatenate_vector_calculator.cc index c4144990e1..0f6bb759cf 100644 --- a/mediapipe/calculators/core/concatenate_vector_calculator.cc +++ b/mediapipe/calculators/core/concatenate_vector_calculator.cc @@ -19,7 +19,7 @@ #include "mediapipe/framework/formats/landmark.pb.h" #include "tensorflow/lite/interpreter.h" -#if !defined(MEDIAPIPE_DISABLE_GPU) && !defined(__APPLE__) +#if !defined(MEDIAPIPE_DISABLE_GL_COMPUTE) #include "tensorflow/lite/delegates/gpu/gl/gl_buffer.h" #endif // !MEDIAPIPE_DISABLE_GPU @@ -50,7 +50,7 @@ typedef ConcatenateVectorCalculator<::mediapipe::NormalizedLandmark> ConcatenateLandmarkVectorCalculator; REGISTER_CALCULATOR(ConcatenateLandmarkVectorCalculator); -#if !defined(MEDIAPIPE_DISABLE_GPU) && !defined(__APPLE__) +#if !defined(MEDIAPIPE_DISABLE_GL_COMPUTE) typedef ConcatenateVectorCalculator<::tflite::gpu::gl::GlBuffer> ConcatenateGlBufferVectorCalculator; REGISTER_CALCULATOR(ConcatenateGlBufferVectorCalculator); diff --git a/mediapipe/calculators/core/end_loop_calculator.cc b/mediapipe/calculators/core/end_loop_calculator.cc index 8991e97f17..e27ab11ea9 100644 --- a/mediapipe/calculators/core/end_loop_calculator.cc +++ b/mediapipe/calculators/core/end_loop_calculator.cc @@ -26,14 +26,9 @@ typedef EndLoopCalculator> EndLoopNormalizedRectCalculator; REGISTER_CALCULATOR(EndLoopNormalizedRectCalculator); -typedef EndLoopCalculator> - EndLoopNormalizedLandmarkCalculator; -REGISTER_CALCULATOR(EndLoopNormalizedLandmarkCalculator); - -typedef EndLoopCalculator< - std::vector>> - EndLoopNormalizedLandmarksVectorCalculator; -REGISTER_CALCULATOR(EndLoopNormalizedLandmarksVectorCalculator); +typedef EndLoopCalculator> + EndLoopNormalizedLandmarkListVectorCalculator; +REGISTER_CALCULATOR(EndLoopNormalizedLandmarkListVectorCalculator); typedef EndLoopCalculator> EndLoopBooleanCalculator; REGISTER_CALCULATOR(EndLoopBooleanCalculator); diff --git a/mediapipe/calculators/core/previous_loopback_calculator.cc b/mediapipe/calculators/core/previous_loopback_calculator.cc index 5f109f4138..8c470ef7da 100644 --- a/mediapipe/calculators/core/previous_loopback_calculator.cc +++ b/mediapipe/calculators/core/previous_loopback_calculator.cc @@ -74,6 +74,10 @@ class PreviousLoopbackCalculator : public CalculatorBase { } ::mediapipe::Status Process(CalculatorContext* cc) final { + Packet& main_packet = cc->Inputs().Get(main_id_).Value(); + if (!main_packet.IsEmpty()) { + main_ts_.push_back(main_packet.Timestamp()); + } Packet& loopback_packet = cc->Inputs().Get(loop_id_).Value(); if (!loopback_packet.IsEmpty()) { loopback_packets_.push_back(loopback_packet); @@ -83,23 +87,6 @@ class PreviousLoopbackCalculator : public CalculatorBase { } } - Packet& main_packet = cc->Inputs().Get(main_id_).Value(); - if (!main_packet.IsEmpty()) { - main_ts_.push_back(main_packet.Timestamp()); - - // In case of an empty "LOOP" input, truncate timestamp is set to the - // lowest possible timestamp for a successive non-empty "LOOP" input. This - // truncates main_ts_ as soon as possible, and produces the highest legal - // output timestamp bound. - if (loopback_packet.IsEmpty() && - loopback_packet.Timestamp() != Timestamp::Unstarted()) { - while (!main_ts_.empty() && - main_ts_.front() <= loopback_packet.Timestamp() + 1) { - main_ts_.pop_front(); - } - } - } - while (!main_ts_.empty() && !loopback_packets_.empty()) { Timestamp main_timestamp = main_ts_.front(); main_ts_.pop_front(); diff --git a/mediapipe/calculators/core/previous_loopback_calculator_test.cc b/mediapipe/calculators/core/previous_loopback_calculator_test.cc index 0756f01fde..09456514b0 100644 --- a/mediapipe/calculators/core/previous_loopback_calculator_test.cc +++ b/mediapipe/calculators/core/previous_loopback_calculator_test.cc @@ -93,14 +93,19 @@ TEST(PreviousLoopbackCalculator, CorrectTimestamps) { EXPECT_EQ(TimestampValues(in_prev), (std::vector{1})); EXPECT_EQ(pair_values(in_prev.back()), std::make_pair(1, -1)); + send_packet("in", 2); + MP_EXPECT_OK(graph_.WaitUntilIdle()); + EXPECT_EQ(TimestampValues(in_prev), (std::vector{1, 2})); + EXPECT_EQ(pair_values(in_prev.back()), std::make_pair(2, 1)); + send_packet("in", 5); MP_EXPECT_OK(graph_.WaitUntilIdle()); - EXPECT_EQ(TimestampValues(in_prev), (std::vector{1, 5})); - EXPECT_EQ(pair_values(in_prev.back()), std::make_pair(5, 1)); + EXPECT_EQ(TimestampValues(in_prev), (std::vector{1, 2, 5})); + EXPECT_EQ(pair_values(in_prev.back()), std::make_pair(5, 2)); send_packet("in", 15); MP_EXPECT_OK(graph_.WaitUntilIdle()); - EXPECT_EQ(TimestampValues(in_prev), (std::vector{1, 5, 15})); + EXPECT_EQ(TimestampValues(in_prev), (std::vector{1, 2, 5, 15})); EXPECT_EQ(pair_values(in_prev.back()), std::make_pair(15, 5)); MP_EXPECT_OK(graph_.CloseAllInputStreams()); @@ -182,78 +187,23 @@ TEST(PreviousLoopbackCalculator, ClosesCorrectly) { MP_EXPECT_OK(graph_.WaitUntilIdle()); EXPECT_EQ(TimestampValues(outputs), (std::vector{1})); + send_packet("in", 2); + MP_EXPECT_OK(graph_.WaitUntilIdle()); + EXPECT_EQ(TimestampValues(outputs), (std::vector{1, 2})); + send_packet("in", 5); MP_EXPECT_OK(graph_.WaitUntilIdle()); - EXPECT_EQ(TimestampValues(outputs), (std::vector{1, 5})); + EXPECT_EQ(TimestampValues(outputs), (std::vector{1, 2, 5})); send_packet("in", 15); MP_EXPECT_OK(graph_.WaitUntilIdle()); - EXPECT_EQ(TimestampValues(outputs), (std::vector{1, 5, 15})); + EXPECT_EQ(TimestampValues(outputs), (std::vector{1, 2, 5, 15})); MP_EXPECT_OK(graph_.CloseAllInputStreams()); MP_EXPECT_OK(graph_.WaitUntilIdle()); EXPECT_EQ(TimestampValues(outputs), - (std::vector{1, 5, 15, Timestamp::Max().Value()})); - - MP_EXPECT_OK(graph_.WaitUntilDone()); -} + (std::vector{1, 2, 5, 15, Timestamp::Max().Value()})); -// Demonstrates that downstream calculators won't be blocked by -// always-empty-LOOP-stream. -TEST(PreviousLoopbackCalculator, EmptyLoopForever) { - std::vector outputs; - CalculatorGraphConfig graph_config_ = - ParseTextProtoOrDie(R"( - input_stream: 'in' - node { - calculator: 'PreviousLoopbackCalculator' - input_stream: 'MAIN:in' - input_stream: 'LOOP:previous' - input_stream_info: { tag_index: 'LOOP' back_edge: true } - output_stream: 'PREV_LOOP:previous' - } - # This calculator synchronizes its inputs as normal, so it is used - # to check that both "in" and "previous" are ready. - node { - calculator: 'PassThroughCalculator' - input_stream: 'in' - input_stream: 'previous' - output_stream: 'out' - output_stream: 'previous2' - } - node { - calculator: 'PacketOnCloseCalculator' - input_stream: 'out' - output_stream: 'close_out' - } - )"); - tool::AddVectorSink("close_out", &graph_config_, &outputs); - - CalculatorGraph graph_; - MP_ASSERT_OK(graph_.Initialize(graph_config_, {})); - MP_ASSERT_OK(graph_.StartRun({})); - - auto send_packet = [&graph_](const std::string& input_name, int n) { - MP_EXPECT_OK(graph_.AddPacketToInputStream( - input_name, MakePacket(n).At(Timestamp(n)))); - }; - - send_packet("in", 0); - MP_EXPECT_OK(graph_.WaitUntilIdle()); - EXPECT_EQ(TimestampValues(outputs), (std::vector{0})); - - for (int main_ts = 1; main_ts < 50; ++main_ts) { - send_packet("in", main_ts); - MP_EXPECT_OK(graph_.WaitUntilIdle()); - std::vector ts_values = TimestampValues(outputs); - EXPECT_EQ(ts_values.size(), main_ts); - for (int j = 0; j < main_ts; ++j) { - CHECK_EQ(ts_values[j], j); - } - } - - MP_EXPECT_OK(graph_.CloseAllInputStreams()); - MP_EXPECT_OK(graph_.WaitUntilIdle()); MP_EXPECT_OK(graph_.WaitUntilDone()); } diff --git a/mediapipe/calculators/core/split_vector_calculator.cc b/mediapipe/calculators/core/split_vector_calculator.cc index 3e60b8072a..2a5a74f8b6 100644 --- a/mediapipe/calculators/core/split_vector_calculator.cc +++ b/mediapipe/calculators/core/split_vector_calculator.cc @@ -17,6 +17,7 @@ #include #include "mediapipe/framework/formats/landmark.pb.h" +#include "mediapipe/framework/formats/rect.pb.h" #include "tensorflow/lite/interpreter.h" namespace mediapipe { @@ -41,4 +42,8 @@ REGISTER_CALCULATOR(SplitTfLiteTensorVectorCalculator); typedef SplitVectorCalculator<::mediapipe::NormalizedLandmark> SplitLandmarkVectorCalculator; REGISTER_CALCULATOR(SplitLandmarkVectorCalculator); + +typedef SplitVectorCalculator<::mediapipe::NormalizedRect> + SplitNormalizedRectVectorCalculator; +REGISTER_CALCULATOR(SplitNormalizedRectVectorCalculator); } // namespace mediapipe diff --git a/mediapipe/calculators/image/BUILD b/mediapipe/calculators/image/BUILD index 5a0a756198..c6f2b38afc 100644 --- a/mediapipe/calculators/image/BUILD +++ b/mediapipe/calculators/image/BUILD @@ -356,13 +356,13 @@ cc_library( "//mediapipe/framework/port:opencv_imgproc", "//mediapipe/framework/port:ret_check", "//mediapipe/framework/port:status", - "//mediapipe/gpu:gpu_buffer", ] + select({ "//mediapipe/gpu:disable_gpu": [], "//conditions:default": [ "//mediapipe/gpu:gl_calculator_helper", "//mediapipe/gpu:gl_simple_shaders", "//mediapipe/gpu:gl_quad_renderer", + "//mediapipe/gpu:gpu_buffer", "//mediapipe/gpu:shader_util", ], }), diff --git a/mediapipe/calculators/image/image_transformation_calculator.cc b/mediapipe/calculators/image/image_transformation_calculator.cc index 5eb34c3c0a..c5bf4262a1 100644 --- a/mediapipe/calculators/image/image_transformation_calculator.cc +++ b/mediapipe/calculators/image/image_transformation_calculator.cc @@ -400,7 +400,7 @@ ::mediapipe::Status ImageTransformationCalculator::RenderGpu( QuadRenderer* renderer = nullptr; GlTexture src1; -#if defined(__APPLE__) && !TARGET_OS_OSX +#if defined(MEDIAPIPE_IOS) if (input.format() == GpuBufferFormat::kBiPlanar420YpCbCr8VideoRange || input.format() == GpuBufferFormat::kBiPlanar420YpCbCr8FullRange) { if (!yuv_renderer_) { diff --git a/mediapipe/calculators/tensorflow/tensorflow_inference_calculator.cc b/mediapipe/calculators/tensorflow/tensorflow_inference_calculator.cc index 80f54d5542..d3f77b063f 100644 --- a/mediapipe/calculators/tensorflow/tensorflow_inference_calculator.cc +++ b/mediapipe/calculators/tensorflow/tensorflow_inference_calculator.cc @@ -34,7 +34,7 @@ #include "tensorflow/core/framework/tensor_shape.h" #include "tensorflow/core/framework/tensor_util.h" -#if !defined(__ANDROID__) && !defined(__APPLE__) +#if !defined(MEDIAPIPE_MOBILE) && !defined(__APPLE__) #include "tensorflow/core/profiler/lib/traceme.h" #endif @@ -441,7 +441,7 @@ class TensorFlowInferenceCalculator : public CalculatorBase { const int64 run_start_time = absl::ToUnixMicros(clock_->TimeNow()); tf::Status tf_status; { -#if !defined(__ANDROID__) && !defined(__APPLE__) +#if !defined(MEDIAPIPE_MOBILE) && !defined(__APPLE__) tensorflow::profiler::TraceMe trace(absl::string_view(cc->NodeName())); #endif tf_status = session_->Run(input_tensors, output_tensor_names, diff --git a/mediapipe/calculators/tensorflow/tensorflow_session_from_frozen_graph_calculator.cc b/mediapipe/calculators/tensorflow/tensorflow_session_from_frozen_graph_calculator.cc index b6d678b6b0..77b22571f6 100644 --- a/mediapipe/calculators/tensorflow/tensorflow_session_from_frozen_graph_calculator.cc +++ b/mediapipe/calculators/tensorflow/tensorflow_session_from_frozen_graph_calculator.cc @@ -31,8 +31,7 @@ #include "mediapipe/framework/tool/status_util.h" #include "tensorflow/core/public/session_options.h" -#if defined(MEDIAPIPE_LITE) || defined(__ANDROID__) || \ - defined(__APPLE__) && !TARGET_OS_OSX +#if defined(MEDIAPIPE_MOBILE) #include "mediapipe/util/android/file/base/helpers.h" #else #include "mediapipe/framework/port/file_helpers.h" diff --git a/mediapipe/calculators/tflite/BUILD b/mediapipe/calculators/tflite/BUILD index 89b4d980bf..50531a58b4 100644 --- a/mediapipe/calculators/tflite/BUILD +++ b/mediapipe/calculators/tflite/BUILD @@ -13,12 +13,12 @@ # limitations under the License. # +load("//mediapipe/framework/port:build_config.bzl", "mediapipe_cc_proto_library") + licenses(["notice"]) # Apache 2.0 package(default_visibility = ["//visibility:private"]) -load("//mediapipe/framework/port:build_config.bzl", "mediapipe_cc_proto_library") - proto_library( name = "ssd_anchors_calculator_proto", srcs = ["ssd_anchors_calculator.proto"], @@ -249,6 +249,11 @@ cc_library( "@org_tensorflow//tensorflow/lite/delegates/gpu/gl:gl_program", "@org_tensorflow//tensorflow/lite/delegates/gpu/gl:gl_shader", ], + }) + select({ + "//conditions:default": [], + "//mediapipe:android": [ + "@org_tensorflow//tensorflow/lite/delegates/nnapi:nnapi_delegate", + ], }), alwayslink = 1, ) diff --git a/mediapipe/calculators/tflite/tflite_converter_calculator.cc b/mediapipe/calculators/tflite/tflite_converter_calculator.cc index a9dccaed89..b2d69d9bbc 100644 --- a/mediapipe/calculators/tflite/tflite_converter_calculator.cc +++ b/mediapipe/calculators/tflite/tflite_converter_calculator.cc @@ -25,8 +25,7 @@ #include "tensorflow/lite/error_reporter.h" #include "tensorflow/lite/interpreter.h" -#if !defined(MEDIAPIPE_DISABLE_GPU) && !defined(__EMSCRIPTEN__) && \ - !defined(__APPLE__) +#if !defined(MEDIAPIPE_DISABLE_GL_COMPUTE) #include "mediapipe/gpu/gl_calculator_helper.h" #include "mediapipe/gpu/gpu_buffer.h" #include "tensorflow/lite/delegates/gpu/gl/gl_buffer.h" @@ -35,7 +34,7 @@ #include "tensorflow/lite/delegates/gpu/gl_delegate.h" #endif // !MEDIAPIPE_DISABLE_GPU -#if defined(__APPLE__) && !TARGET_OS_OSX // iOS +#if defined(MEDIAPIPE_IOS) #import #import #import @@ -46,10 +45,9 @@ #include "tensorflow/lite/delegates/gpu/metal_delegate.h" #endif // iOS -#if !defined(MEDIAPIPE_DISABLE_GPU) && !defined(__EMSCRIPTEN__) && \ - !defined(__APPLE__) +#if !defined(MEDIAPIPE_DISABLE_GL_COMPUTE) typedef ::tflite::gpu::gl::GlBuffer GpuTensor; -#elif defined(__APPLE__) && !TARGET_OS_OSX // iOS +#elif defined(MEDIAPIPE_IOS) typedef id GpuTensor; #endif @@ -69,8 +67,7 @@ typedef Eigen::Matrix namespace mediapipe { -#if !defined(MEDIAPIPE_DISABLE_GPU) && !defined(__EMSCRIPTEN__) && \ - !defined(__APPLE__) +#if !defined(MEDIAPIPE_DISABLE_GL_COMPUTE) using ::tflite::gpu::gl::CreateReadWriteShaderStorageBuffer; using ::tflite::gpu::gl::GlProgram; using ::tflite::gpu::gl::GlShader; @@ -80,7 +77,7 @@ struct GPUData { GlShader shader; GlProgram program; }; -#elif defined(__APPLE__) && !TARGET_OS_OSX // iOS +#elif defined(MEDIAPIPE_IOS) struct GPUData { int elements = 1; GpuTensor buffer; @@ -149,11 +146,10 @@ class TfLiteConverterCalculator : public CalculatorBase { std::unique_ptr interpreter_ = nullptr; -#if !defined(MEDIAPIPE_DISABLE_GPU) && !defined(__EMSCRIPTEN__) && \ - !defined(__APPLE__) +#if !defined(MEDIAPIPE_DISABLE_GL_COMPUTE) mediapipe::GlCalculatorHelper gpu_helper_; std::unique_ptr gpu_data_out_; -#elif defined(__APPLE__) && !TARGET_OS_OSX // iOS +#elif defined(MEDIAPIPE_IOS) MPPMetalHelper* gpu_helper_ = nullptr; std::unique_ptr gpu_data_out_; #endif @@ -202,10 +198,9 @@ ::mediapipe::Status TfLiteConverterCalculator::GetContract( #endif // !MEDIAPIPE_DISABLE_GPU if (use_gpu) { -#if !defined(MEDIAPIPE_DISABLE_GPU) && !defined(__EMSCRIPTEN__) && \ - !defined(__APPLE__) +#if !defined(MEDIAPIPE_DISABLE_GL_COMPUTE) MP_RETURN_IF_ERROR(mediapipe::GlCalculatorHelper::UpdateContract(cc)); -#elif defined(__APPLE__) && !TARGET_OS_OSX // iOS +#elif defined(MEDIAPIPE_IOS) MP_RETURN_IF_ERROR([MPPMetalHelper updateContract:cc]); #endif } @@ -236,10 +231,9 @@ ::mediapipe::Status TfLiteConverterCalculator::Open(CalculatorContext* cc) { cc->Outputs().HasTag("TENSORS_GPU")); // Cannot use quantization. use_quantized_tensors_ = false; -#if !defined(MEDIAPIPE_DISABLE_GPU) && !defined(__EMSCRIPTEN__) && \ - !defined(__APPLE__) +#if !defined(MEDIAPIPE_DISABLE_GL_COMPUTE) MP_RETURN_IF_ERROR(gpu_helper_.Open(cc)); -#elif defined(__APPLE__) && !TARGET_OS_OSX // iOS +#elif defined(MEDIAPIPE_IOS) gpu_helper_ = [[MPPMetalHelper alloc] initWithCalculatorContext:cc]; RET_CHECK(gpu_helper_); #endif @@ -270,11 +264,10 @@ ::mediapipe::Status TfLiteConverterCalculator::Process(CalculatorContext* cc) { } ::mediapipe::Status TfLiteConverterCalculator::Close(CalculatorContext* cc) { -#if !defined(MEDIAPIPE_DISABLE_GPU) && !defined(__EMSCRIPTEN__) && \ - !defined(__APPLE__) +#if !defined(MEDIAPIPE_DISABLE_GL_COMPUTE) gpu_helper_.RunInGlContext([this] { gpu_data_out_.reset(); }); #endif -#if defined(__APPLE__) && !TARGET_OS_OSX // iOS +#if defined(MEDIAPIPE_IOS) gpu_data_out_.reset(); #endif return ::mediapipe::OkStatus(); @@ -390,8 +383,7 @@ ::mediapipe::Status TfLiteConverterCalculator::ProcessCPU( ::mediapipe::Status TfLiteConverterCalculator::ProcessGPU( CalculatorContext* cc) { -#if !defined(MEDIAPIPE_DISABLE_GPU) && !defined(__EMSCRIPTEN__) && \ - !defined(__APPLE__) +#if !defined(MEDIAPIPE_DISABLE_GL_COMPUTE) // GpuBuffer to tflite::gpu::GlBuffer conversion. const auto& input = cc->Inputs().Tag("IMAGE_GPU").Get(); MP_RETURN_IF_ERROR( @@ -427,7 +419,7 @@ ::mediapipe::Status TfLiteConverterCalculator::ProcessGPU( cc->Outputs() .Tag("TENSORS_GPU") .Add(output_tensors.release(), cc->InputTimestamp()); -#elif defined(__APPLE__) && !TARGET_OS_OSX // iOS +#elif defined(MEDIAPIPE_IOS) // GpuBuffer to id conversion. const auto& input = cc->Inputs().Tag("IMAGE_GPU").Get(); { @@ -493,8 +485,7 @@ ::mediapipe::Status TfLiteConverterCalculator::InitGpu(CalculatorContext* cc) { RET_CHECK_FAIL() << "Num input channels is less than desired output."; #endif // !MEDIAPIPE_DISABLE_GPU -#if !defined(MEDIAPIPE_DISABLE_GPU) && !defined(__EMSCRIPTEN__) && \ - !defined(__APPLE__) +#if !defined(MEDIAPIPE_DISABLE_GL_COMPUTE) MP_RETURN_IF_ERROR(gpu_helper_.RunInGlContext( [this, &include_alpha, &input, &single_channel]() -> ::mediapipe::Status { // Device memory. @@ -538,7 +529,9 @@ ::mediapipe::Status TfLiteConverterCalculator::InitGpu(CalculatorContext* cc) { &gpu_data_out_->program)); return ::mediapipe::OkStatus(); })); -#elif defined(__APPLE__) && !TARGET_OS_OSX // iOS + +#elif defined(MEDIAPIPE_IOS) + RET_CHECK(include_alpha) << "iOS GPU inference currently accepts only RGBA input."; @@ -619,7 +612,7 @@ ::mediapipe::Status TfLiteConverterCalculator::LoadOptions( CHECK_GE(max_num_channels_, 1); CHECK_LE(max_num_channels_, 4); CHECK_NE(max_num_channels_, 2); -#if defined(__APPLE__) && !TARGET_OS_OSX // iOS +#if defined(MEDIAPIPE_IOS) if (cc->Inputs().HasTag("IMAGE_GPU")) // Currently on iOS, tflite gpu input tensor must be 4 channels, // so input image must be 4 channels also (checked in InitGpu). diff --git a/mediapipe/calculators/tflite/tflite_inference_calculator.cc b/mediapipe/calculators/tflite/tflite_inference_calculator.cc index 9ca1c281fc..de693865bb 100644 --- a/mediapipe/calculators/tflite/tflite_inference_calculator.cc +++ b/mediapipe/calculators/tflite/tflite_inference_calculator.cc @@ -27,7 +27,7 @@ #include "tensorflow/lite/kernels/register.h" #include "tensorflow/lite/model.h" -#if !defined(MEDIAPIPE_DISABLE_GPU) && !defined(MEDIAPIPE_DISABLE_GL_COMPUTE) +#if !defined(MEDIAPIPE_DISABLE_GL_COMPUTE) #include "mediapipe/gpu/gl_calculator_helper.h" #include "mediapipe/gpu/gpu_buffer.h" #include "tensorflow/lite/delegates/gpu/common/shape.h" @@ -35,9 +35,9 @@ #include "tensorflow/lite/delegates/gpu/gl/gl_program.h" #include "tensorflow/lite/delegates/gpu/gl/gl_shader.h" #include "tensorflow/lite/delegates/gpu/gl_delegate.h" -#endif // !MEDIAPIPE_DISABLE_GPU +#endif // !MEDIAPIPE_DISABLE_GL_COMPUTE -#if defined(__APPLE__) && !TARGET_OS_OSX // iOS +#if defined(MEDIAPIPE_IOS) #import #import #import @@ -51,12 +51,15 @@ #include "tensorflow/lite/delegates/gpu/metal_delegate_internal.h" #endif // iOS +#if defined(MEDIAPIPE_ANDROID) +#include "tensorflow/lite/delegates/nnapi/nnapi_delegate.h" +#endif // ANDROID + namespace { -#if !defined(MEDIAPIPE_DISABLE_GPU) && !defined(__EMSCRIPTEN__) && \ - !defined(__APPLE__) +#if !defined(MEDIAPIPE_DISABLE_GL_COMPUTE) typedef ::tflite::gpu::gl::GlBuffer GpuTensor; -#elif defined(__APPLE__) && !TARGET_OS_OSX // iOS +#elif defined(MEDIAPIPE_IOS) typedef id GpuTensor; #endif @@ -92,8 +95,7 @@ std::unique_ptr BuildEdgeTpuInterpreter( // * Aux namespace mediapipe { -#if !defined(MEDIAPIPE_DISABLE_GPU) && !defined(__EMSCRIPTEN__) && \ - !defined(__APPLE__) +#if !defined(MEDIAPIPE_DISABLE_GL_COMPUTE) using ::tflite::gpu::gl::CopyBuffer; using ::tflite::gpu::gl::CreateReadWriteShaderStorageBuffer; using ::tflite::gpu::gl::GlBuffer; @@ -172,12 +174,11 @@ class TfLiteInferenceCalculator : public CalculatorBase { std::unique_ptr model_; TfLiteDelegate* delegate_ = nullptr; -#if !defined(MEDIAPIPE_DISABLE_GPU) && !defined(__EMSCRIPTEN__) && \ - !defined(__APPLE__) +#if !defined(MEDIAPIPE_DISABLE_GL_COMPUTE) mediapipe::GlCalculatorHelper gpu_helper_; std::unique_ptr gpu_data_in_; std::vector> gpu_data_out_; -#elif defined(__APPLE__) && !TARGET_OS_OSX // iOS +#elif defined(MEDIAPIPE_IOS) MPPMetalHelper* gpu_helper_ = nullptr; std::unique_ptr gpu_data_in_; std::vector> gpu_data_out_; @@ -237,10 +238,9 @@ ::mediapipe::Status TfLiteInferenceCalculator::GetContract( use_gpu |= options.use_gpu(); if (use_gpu) { -#if !defined(MEDIAPIPE_DISABLE_GPU) && !defined(__EMSCRIPTEN__) && \ - !defined(__APPLE__) +#if !defined(MEDIAPIPE_DISABLE_GL_COMPUTE) MP_RETURN_IF_ERROR(mediapipe::GlCalculatorHelper::UpdateContract(cc)); -#elif defined(__APPLE__) && !TARGET_OS_OSX // iOS +#elif defined(MEDIAPIPE_IOS) MP_RETURN_IF_ERROR([MPPMetalHelper updateContract:cc]); #endif } @@ -280,26 +280,24 @@ ::mediapipe::Status TfLiteInferenceCalculator::Open(CalculatorContext* cc) { MP_RETURN_IF_ERROR(LoadModel(cc)); if (gpu_inference_) { -#if !defined(MEDIAPIPE_DISABLE_GPU) && !defined(__EMSCRIPTEN__) && \ - !defined(__APPLE__) +#if !defined(MEDIAPIPE_DISABLE_GL_COMPUTE) MP_RETURN_IF_ERROR(gpu_helper_.Open(cc)); -#elif defined(__APPLE__) && !TARGET_OS_OSX // iOS +#elif defined(MEDIAPIPE_IOS) gpu_helper_ = [[MPPMetalHelper alloc] initWithCalculatorContext:cc]; RET_CHECK(gpu_helper_); #endif -#if !defined(MEDIAPIPE_DISABLE_GPU) && !defined(__EMSCRIPTEN__) && \ - !defined(__APPLE__) + +#if !defined(MEDIAPIPE_DISABLE_GL_COMPUTE) MP_RETURN_IF_ERROR(gpu_helper_.RunInGlContext( [this, &cc]() -> ::mediapipe::Status { return LoadDelegate(cc); })); #else MP_RETURN_IF_ERROR(LoadDelegate(cc)); #endif + } else { +#if defined(__EMSCRIPTEN__) || defined(MEDIAPIPE_ANDROID) + MP_RETURN_IF_ERROR(LoadDelegate(cc)); +#endif // __EMSCRIPTEN__ || ANDROID } - -#if defined(__EMSCRIPTEN__) - MP_RETURN_IF_ERROR(LoadDelegate(cc)); -#endif // __EMSCRIPTEN__ - return ::mediapipe::OkStatus(); } @@ -307,8 +305,7 @@ ::mediapipe::Status TfLiteInferenceCalculator::Process(CalculatorContext* cc) { // 1. Receive pre-processed tensor inputs. if (gpu_input_) { // Read GPU input into SSBO. -#if !defined(MEDIAPIPE_DISABLE_GPU) && !defined(__EMSCRIPTEN__) && \ - !defined(__APPLE__) +#if !defined(MEDIAPIPE_DISABLE_GL_COMPUTE) const auto& input_tensors = cc->Inputs().Tag("TENSORS_GPU").Get>(); RET_CHECK_EQ(input_tensors.size(), 1); @@ -318,7 +315,7 @@ ::mediapipe::Status TfLiteInferenceCalculator::Process(CalculatorContext* cc) { RET_CHECK_CALL(CopyBuffer(input_tensors[0], gpu_data_in_->buffer)); return ::mediapipe::OkStatus(); })); -#elif defined(__APPLE__) && !TARGET_OS_OSX // iOS +#elif defined(MEDIAPIPE_IOS) const auto& input_tensors = cc->Inputs().Tag("TENSORS_GPU").Get>(); RET_CHECK_EQ(input_tensors.size(), 1); @@ -354,14 +351,13 @@ ::mediapipe::Status TfLiteInferenceCalculator::Process(CalculatorContext* cc) { // 2. Run inference. if (gpu_inference_) { -#if !defined(MEDIAPIPE_DISABLE_GPU) && !defined(__EMSCRIPTEN__) && \ - !defined(__APPLE__) +#if !defined(MEDIAPIPE_DISABLE_GL_COMPUTE) MP_RETURN_IF_ERROR( gpu_helper_.RunInGlContext([this]() -> ::mediapipe::Status { RET_CHECK_EQ(interpreter_->Invoke(), kTfLiteOk); return ::mediapipe::OkStatus(); })); -#elif defined(__APPLE__) && !TARGET_OS_OSX // iOS +#elif defined(MEDIAPIPE_IOS) RET_CHECK_EQ(interpreter_->Invoke(), kTfLiteOk); #endif } else { @@ -370,8 +366,7 @@ ::mediapipe::Status TfLiteInferenceCalculator::Process(CalculatorContext* cc) { // 3. Output processed tensors. if (gpu_output_) { -#if !defined(MEDIAPIPE_DISABLE_GPU) && !defined(__EMSCRIPTEN__) && \ - !defined(__APPLE__) +#if !defined(MEDIAPIPE_DISABLE_GL_COMPUTE) // Output result tensors (GPU). auto output_tensors = absl::make_unique>(); MP_RETURN_IF_ERROR(gpu_helper_.RunInGlContext( @@ -388,7 +383,7 @@ ::mediapipe::Status TfLiteInferenceCalculator::Process(CalculatorContext* cc) { cc->Outputs() .Tag("TENSORS_GPU") .Add(output_tensors.release(), cc->InputTimestamp()); -#elif defined(__APPLE__) && !TARGET_OS_OSX // iOS +#elif defined(MEDIAPIPE_IOS) // Output result tensors (GPU). auto output_tensors = absl::make_unique>(); output_tensors->resize(gpu_data_out_.size()); @@ -433,23 +428,24 @@ ::mediapipe::Status TfLiteInferenceCalculator::Process(CalculatorContext* cc) { ::mediapipe::Status TfLiteInferenceCalculator::Close(CalculatorContext* cc) { if (delegate_) { -#if !defined(MEDIAPIPE_DISABLE_GPU) && !defined(__EMSCRIPTEN__) && \ - !defined(__APPLE__) - MP_RETURN_IF_ERROR(gpu_helper_.RunInGlContext([this]() -> Status { - TfLiteGpuDelegateDelete(delegate_); + if (gpu_inference_) { +#if !defined(MEDIAPIPE_DISABLE_GL_COMPUTE) + MP_RETURN_IF_ERROR(gpu_helper_.RunInGlContext([this]() -> Status { + TfLiteGpuDelegateDelete(delegate_); + gpu_data_in_.reset(); + for (int i = 0; i < gpu_data_out_.size(); ++i) { + gpu_data_out_[i].reset(); + } + return ::mediapipe::OkStatus(); + })); +#elif defined(MEDIAPIPE_IOS) + TFLGpuDelegateDelete(delegate_); gpu_data_in_.reset(); for (int i = 0; i < gpu_data_out_.size(); ++i) { gpu_data_out_[i].reset(); } - return ::mediapipe::OkStatus(); - })); -#elif defined(__APPLE__) && !TARGET_OS_OSX // iOS - TFLGpuDelegateDelete(delegate_); - gpu_data_in_.reset(); - for (int i = 0; i < gpu_data_out_.size(); ++i) { - gpu_data_out_[i].reset(); - } #endif + } delegate_ = nullptr; } #if defined(MEDIAPIPE_EDGE_TPU) @@ -522,8 +518,22 @@ ::mediapipe::Status TfLiteInferenceCalculator::LoadModel( ::mediapipe::Status TfLiteInferenceCalculator::LoadDelegate( CalculatorContext* cc) { -#if !defined(MEDIAPIPE_DISABLE_GPU) && !defined(__EMSCRIPTEN__) && \ - !defined(__APPLE__) +#if defined(MEDIAPIPE_ANDROID) + if (!gpu_inference_) { + if (cc->Options() + .use_nnapi()) { + // Attempt to use NNAPI. + // If not supported, the default CPU delegate will be created and used. + interpreter_->SetAllowFp16PrecisionForFp32(1); + delegate_ = tflite::NnApiDelegate(); + RET_CHECK_EQ(interpreter_->ModifyGraphWithDelegate(delegate_), kTfLiteOk); + } + // Return, no need for GPU delegate below. + return ::mediapipe::OkStatus(); + } +#endif // ANDROID + +#if !defined(MEDIAPIPE_DISABLE_GL_COMPUTE) // Configure and create the delegate. TfLiteGpuDelegateOptions options = TfLiteGpuDelegateOptionsDefault(); options.compile_options.precision_loss_allowed = 1; @@ -583,7 +593,7 @@ ::mediapipe::Status TfLiteInferenceCalculator::LoadDelegate( RET_CHECK_EQ(interpreter_->ModifyGraphWithDelegate(delegate_), kTfLiteOk); #endif // OpenGL -#if defined(__APPLE__) && !TARGET_OS_OSX // iOS +#if defined(MEDIAPIPE_IOS) // Configure and create the delegate. TFLGpuDelegateOptions options; options.allow_precision_loss = false; // Must match converter, F=float/T=half diff --git a/mediapipe/calculators/tflite/tflite_inference_calculator.proto b/mediapipe/calculators/tflite/tflite_inference_calculator.proto index a2950add35..8a862f3dfc 100644 --- a/mediapipe/calculators/tflite/tflite_inference_calculator.proto +++ b/mediapipe/calculators/tflite/tflite_inference_calculator.proto @@ -45,4 +45,9 @@ message TfLiteInferenceCalculatorOptions { // input tensors are on CPU. For input tensors on GPU, GPU backend is always // used. optional bool use_gpu = 2 [default = false]; + + // Android only. When true, an NNAPI delegate will be used for inference. + // If NNAPI is not available, then the default CPU delegate will be used + // automatically. + optional bool use_nnapi = 3 [default = false]; } diff --git a/mediapipe/calculators/tflite/tflite_tensors_to_classification_calculator.cc b/mediapipe/calculators/tflite/tflite_tensors_to_classification_calculator.cc index 906b4242fc..6e1c6e1e68 100644 --- a/mediapipe/calculators/tflite/tflite_tensors_to_classification_calculator.cc +++ b/mediapipe/calculators/tflite/tflite_tensors_to_classification_calculator.cc @@ -24,8 +24,7 @@ #include "mediapipe/framework/port/ret_check.h" #include "mediapipe/util/resource_util.h" #include "tensorflow/lite/interpreter.h" -#if defined(__EMSCRIPTEN__) || defined(__ANDROID__) || \ - (defined(__APPLE__) && !TARGET_OS_OSX) +#if defined(MEDIAPIPE_MOBILE) #include "mediapipe/util/android/file/base/file.h" #include "mediapipe/util/android/file/base/helpers.h" #else diff --git a/mediapipe/calculators/tflite/tflite_tensors_to_detections_calculator.cc b/mediapipe/calculators/tflite/tflite_tensors_to_detections_calculator.cc index bac852f44f..371c7862ef 100644 --- a/mediapipe/calculators/tflite/tflite_tensors_to_detections_calculator.cc +++ b/mediapipe/calculators/tflite/tflite_tensors_to_detections_calculator.cc @@ -27,8 +27,7 @@ #include "mediapipe/framework/port/ret_check.h" #include "tensorflow/lite/interpreter.h" -#if !defined(MEDIAPIPE_DISABLE_GPU) && !defined(__EMSCRIPTEN__) && \ - !defined(__APPLE__) +#if !defined(MEDIAPIPE_DISABLE_GL_COMPUTE) #include "mediapipe/gpu/gl_calculator_helper.h" #include "tensorflow/lite/delegates/gpu/gl/gl_buffer.h" #include "tensorflow/lite/delegates/gpu/gl/gl_program.h" @@ -36,7 +35,7 @@ #include "tensorflow/lite/delegates/gpu/gl_delegate.h" #endif // !MEDIAPIPE_DISABLE_GPU -#if defined(__APPLE__) && !TARGET_OS_OSX // iOS +#if defined(MEDIAPIPE_IOS) #import #import #import @@ -56,17 +55,15 @@ constexpr int kNumCoordsPerBox = 4; namespace mediapipe { -#if !defined(MEDIAPIPE_DISABLE_GPU) && !defined(__EMSCRIPTEN__) && \ - !defined(__APPLE__) +#if !defined(MEDIAPIPE_DISABLE_GL_COMPUTE) using ::tflite::gpu::gl::CreateReadWriteShaderStorageBuffer; using ::tflite::gpu::gl::GlShader; #endif -#if !defined(MEDIAPIPE_DISABLE_GPU) && !defined(__EMSCRIPTEN__) && \ - !defined(__APPLE__) +#if !defined(MEDIAPIPE_DISABLE_GL_COMPUTE) typedef ::tflite::gpu::gl::GlBuffer GpuTensor; typedef ::tflite::gpu::gl::GlProgram GpuProgram; -#elif defined(__APPLE__) && !TARGET_OS_OSX // iOS +#elif defined(MEDIAPIPE_IOS) typedef id GpuTensor; typedef id GpuProgram; #endif @@ -183,11 +180,10 @@ class TfLiteTensorsToDetectionsCalculator : public CalculatorBase { std::vector anchors_; bool side_packet_anchors_{}; -#if !defined(MEDIAPIPE_DISABLE_GPU) && !defined(__EMSCRIPTEN__) && \ - !defined(__APPLE__) +#if !defined(MEDIAPIPE_DISABLE_GL_COMPUTE) mediapipe::GlCalculatorHelper gpu_helper_; std::unique_ptr gpu_data_; -#elif defined(__APPLE__) && !TARGET_OS_OSX // iOS +#elif defined(MEDIAPIPE_IOS) MPPMetalHelper* gpu_helper_ = nullptr; std::unique_ptr gpu_data_; #endif @@ -226,10 +222,9 @@ ::mediapipe::Status TfLiteTensorsToDetectionsCalculator::GetContract( } if (use_gpu) { -#if !defined(MEDIAPIPE_DISABLE_GPU) && !defined(__EMSCRIPTEN__) && \ - !defined(__APPLE__) +#if !defined(MEDIAPIPE_DISABLE_GL_COMPUTE) MP_RETURN_IF_ERROR(mediapipe::GlCalculatorHelper::UpdateContract(cc)); -#elif defined(__APPLE__) && !TARGET_OS_OSX // iOS +#elif defined(MEDIAPIPE_IOS) MP_RETURN_IF_ERROR([MPPMetalHelper updateContract:cc]); #endif } @@ -243,10 +238,9 @@ ::mediapipe::Status TfLiteTensorsToDetectionsCalculator::Open( if (cc->Inputs().HasTag("TENSORS_GPU")) { gpu_input_ = true; -#if !defined(MEDIAPIPE_DISABLE_GPU) && !defined(__EMSCRIPTEN__) && \ - !defined(__APPLE__) +#if !defined(MEDIAPIPE_DISABLE_GL_COMPUTE) MP_RETURN_IF_ERROR(gpu_helper_.Open(cc)); -#elif defined(__APPLE__) && !TARGET_OS_OSX // iOS +#elif defined(MEDIAPIPE_IOS) gpu_helper_ = [[MPPMetalHelper alloc] initWithCalculatorContext:cc]; RET_CHECK(gpu_helper_); #endif @@ -406,8 +400,7 @@ ::mediapipe::Status TfLiteTensorsToDetectionsCalculator::ProcessCPU( } ::mediapipe::Status TfLiteTensorsToDetectionsCalculator::ProcessGPU( CalculatorContext* cc, std::vector* output_detections) { -#if !defined(MEDIAPIPE_DISABLE_GPU) && !defined(__EMSCRIPTEN__) && \ - !defined(__APPLE__) +#if !defined(MEDIAPIPE_DISABLE_GL_COMPUTE) const auto& input_tensors = cc->Inputs().Tag("TENSORS_GPU").Get>(); RET_CHECK_GE(input_tensors.size(), 2); @@ -470,7 +463,7 @@ ::mediapipe::Status TfLiteTensorsToDetectionsCalculator::ProcessGPU( return ::mediapipe::OkStatus(); })); -#elif defined(__APPLE__) && !TARGET_OS_OSX // iOS +#elif defined(MEDIAPIPE_IOS) const auto& input_tensors = cc->Inputs().Tag("TENSORS_GPU").Get>(); @@ -569,12 +562,11 @@ ::mediapipe::Status TfLiteTensorsToDetectionsCalculator::ProcessGPU( ::mediapipe::Status TfLiteTensorsToDetectionsCalculator::Close( CalculatorContext* cc) { -#if !defined(MEDIAPIPE_DISABLE_GPU) && !defined(__EMSCRIPTEN__) && \ - !defined(__APPLE__) +#if !defined(MEDIAPIPE_DISABLE_GL_COMPUTE) gpu_helper_.RunInGlContext([this] { gpu_data_.reset(); }); -#elif defined(__APPLE__) && !TARGET_OS_OSX // iOS +#elif defined(MEDIAPIPE_IOS) gpu_data_.reset(); -#endif // !MEDIAPIPE_DISABLE_GPU +#endif return ::mediapipe::OkStatus(); } @@ -723,8 +715,7 @@ Detection TfLiteTensorsToDetectionsCalculator::ConvertToDetection( ::mediapipe::Status TfLiteTensorsToDetectionsCalculator::GpuInit( CalculatorContext* cc) { -#if !defined(MEDIAPIPE_DISABLE_GPU) && !defined(__EMSCRIPTEN__) && \ - !defined(__APPLE__) +#if !defined(MEDIAPIPE_DISABLE_GL_COMPUTE) MP_RETURN_IF_ERROR(gpu_helper_.RunInGlContext([this]() -> ::mediapipe::Status { gpu_data_ = absl::make_unique(); @@ -937,8 +928,7 @@ void main() { return ::mediapipe::OkStatus(); })); -#elif defined(__APPLE__) && !TARGET_OS_OSX // iOS - // TODO consolidate Metal and OpenGL shaders via vulkan. +#elif defined(MEDIAPIPE_IOS) gpu_data_ = absl::make_unique(); id device = gpu_helper_.mtlDevice; @@ -1168,7 +1158,7 @@ kernel void scoreKernel( CHECK_LT(num_classes_, max_wg_size) << "# classes must be <" << max_wg_size; } -#endif // __ANDROID__ or iOS +#endif // !defined(MEDIAPIPE_DISABLE_GL_COMPUTE) return ::mediapipe::OkStatus(); } diff --git a/mediapipe/calculators/tflite/tflite_tensors_to_landmarks_calculator.cc b/mediapipe/calculators/tflite/tflite_tensors_to_landmarks_calculator.cc index 996b1fa351..f6cffee407 100644 --- a/mediapipe/calculators/tflite/tflite_tensors_to_landmarks_calculator.cc +++ b/mediapipe/calculators/tflite/tflite_tensors_to_landmarks_calculator.cc @@ -76,11 +76,11 @@ ::mediapipe::Status TfLiteTensorsToLandmarksCalculator::GetContract( } if (cc->Outputs().HasTag("LANDMARKS")) { - cc->Outputs().Tag("LANDMARKS").Set>(); + cc->Outputs().Tag("LANDMARKS").Set(); } if (cc->Outputs().HasTag("NORM_LANDMARKS")) { - cc->Outputs().Tag("NORM_LANDMARKS").Set>(); + cc->Outputs().Tag("NORM_LANDMARKS").Set(); } return ::mediapipe::OkStatus(); @@ -127,54 +127,55 @@ ::mediapipe::Status TfLiteTensorsToLandmarksCalculator::Process( const float* raw_landmarks = raw_tensor->data.f; - auto output_landmarks = absl::make_unique>(); + LandmarkList output_landmarks; for (int ld = 0; ld < num_landmarks_; ++ld) { const int offset = ld * num_dimensions; - Landmark landmark; + Landmark* landmark = output_landmarks.add_landmark(); if (options_.flip_horizontally()) { - landmark.set_x(options_.input_image_width() - raw_landmarks[offset]); + landmark->set_x(options_.input_image_width() - raw_landmarks[offset]); } else { - landmark.set_x(raw_landmarks[offset]); + landmark->set_x(raw_landmarks[offset]); } if (num_dimensions > 1) { if (options_.flip_vertically()) { - landmark.set_y(options_.input_image_height() - - raw_landmarks[offset + 1]); + landmark->set_y(options_.input_image_height() - + raw_landmarks[offset + 1]); } else { - landmark.set_y(raw_landmarks[offset + 1]); + landmark->set_y(raw_landmarks[offset + 1]); } } if (num_dimensions > 2) { - landmark.set_z(raw_landmarks[offset + 2]); + landmark->set_z(raw_landmarks[offset + 2]); } - output_landmarks->push_back(landmark); } // Output normalized landmarks if required. if (cc->Outputs().HasTag("NORM_LANDMARKS")) { - auto output_norm_landmarks = - absl::make_unique>(); - for (const auto& landmark : *output_landmarks) { - NormalizedLandmark norm_landmark; - norm_landmark.set_x(static_cast(landmark.x()) / - options_.input_image_width()); - norm_landmark.set_y(static_cast(landmark.y()) / - options_.input_image_height()); - norm_landmark.set_z(landmark.z() / options_.normalize_z()); - - output_norm_landmarks->push_back(norm_landmark); + NormalizedLandmarkList output_norm_landmarks; + // for (const auto& landmark : output_landmarks) { + for (int i = 0; i < output_landmarks.landmark_size(); ++i) { + const Landmark& landmark = output_landmarks.landmark(i); + NormalizedLandmark* norm_landmark = output_norm_landmarks.add_landmark(); + norm_landmark->set_x(static_cast(landmark.x()) / + options_.input_image_width()); + norm_landmark->set_y(static_cast(landmark.y()) / + options_.input_image_height()); + norm_landmark->set_z(landmark.z() / options_.normalize_z()); } cc->Outputs() .Tag("NORM_LANDMARKS") - .Add(output_norm_landmarks.release(), cc->InputTimestamp()); + .AddPacket(MakePacket(output_norm_landmarks) + .At(cc->InputTimestamp())); } + // Output absolute landmarks. if (cc->Outputs().HasTag("LANDMARKS")) { cc->Outputs() .Tag("LANDMARKS") - .Add(output_landmarks.release(), cc->InputTimestamp()); + .AddPacket(MakePacket(output_landmarks) + .At(cc->InputTimestamp())); } return ::mediapipe::OkStatus(); diff --git a/mediapipe/calculators/tflite/tflite_tensors_to_segmentation_calculator.cc b/mediapipe/calculators/tflite/tflite_tensors_to_segmentation_calculator.cc index 55279308a8..7fde032247 100644 --- a/mediapipe/calculators/tflite/tflite_tensors_to_segmentation_calculator.cc +++ b/mediapipe/calculators/tflite/tflite_tensors_to_segmentation_calculator.cc @@ -28,8 +28,7 @@ #include "mediapipe/util/resource_util.h" #include "tensorflow/lite/interpreter.h" -#if !defined(MEDIAPIPE_DISABLE_GPU) && !defined(__EMSCRIPTEN__) && \ - !defined(__APPLE__) +#if !defined(MEDIAPIPE_DISABLE_GL_COMPUTE) #include "mediapipe/gpu/gl_calculator_helper.h" #include "mediapipe/gpu/gl_simple_shaders.h" #include "mediapipe/gpu/shader_util.h" @@ -54,8 +53,7 @@ float Clamp(float val, float min, float max) { namespace mediapipe { -#if !defined(MEDIAPIPE_DISABLE_GPU) && !defined(__EMSCRIPTEN__) && \ - !defined(__APPLE__) +#if !defined(MEDIAPIPE_DISABLE_GL_COMPUTE) using ::tflite::gpu::gl::CopyBuffer; using ::tflite::gpu::gl::CreateReadWriteRgbaImageTexture; using ::tflite::gpu::gl::CreateReadWriteShaderStorageBuffer; @@ -131,8 +129,7 @@ class TfLiteTensorsToSegmentationCalculator : public CalculatorBase { int tensor_channels_ = 0; bool use_gpu_ = false; -#if !defined(MEDIAPIPE_DISABLE_GPU) && !defined(__EMSCRIPTEN__) && \ - !defined(__APPLE__) +#if !defined(MEDIAPIPE_DISABLE_GL_COMPUTE) mediapipe::GlCalculatorHelper gpu_helper_; std::unique_ptr mask_program_with_prev_; std::unique_ptr mask_program_no_prev_; @@ -162,8 +159,7 @@ ::mediapipe::Status TfLiteTensorsToSegmentationCalculator::GetContract( } // Inputs GPU. -#if !defined(MEDIAPIPE_DISABLE_GPU) && !defined(__EMSCRIPTEN__) && \ - !defined(__APPLE__) +#if !defined(MEDIAPIPE_DISABLE_GL_COMPUTE) if (cc->Inputs().HasTag("TENSORS_GPU")) { cc->Inputs().Tag("TENSORS_GPU").Set>(); use_gpu |= true; @@ -182,8 +178,7 @@ ::mediapipe::Status TfLiteTensorsToSegmentationCalculator::GetContract( if (cc->Outputs().HasTag("MASK")) { cc->Outputs().Tag("MASK").Set(); } -#if !defined(MEDIAPIPE_DISABLE_GPU) && !defined(__EMSCRIPTEN__) && \ - !defined(__APPLE__) +#if !defined(MEDIAPIPE_DISABLE_GL_COMPUTE) if (cc->Outputs().HasTag("MASK_GPU")) { cc->Outputs().Tag("MASK_GPU").Set(); use_gpu |= true; @@ -191,8 +186,7 @@ ::mediapipe::Status TfLiteTensorsToSegmentationCalculator::GetContract( #endif // !MEDIAPIPE_DISABLE_GPU if (use_gpu) { -#if !defined(MEDIAPIPE_DISABLE_GPU) && !defined(__EMSCRIPTEN__) && \ - !defined(__APPLE__) +#if !defined(MEDIAPIPE_DISABLE_GL_COMPUTE) MP_RETURN_IF_ERROR(mediapipe::GlCalculatorHelper::UpdateContract(cc)); #endif // !MEDIAPIPE_DISABLE_GPU } @@ -205,8 +199,7 @@ ::mediapipe::Status TfLiteTensorsToSegmentationCalculator::Open( if (cc->Inputs().HasTag("TENSORS_GPU")) { use_gpu_ = true; -#if !defined(MEDIAPIPE_DISABLE_GPU) && !defined(__EMSCRIPTEN__) && \ - !defined(__APPLE__) +#if !defined(MEDIAPIPE_DISABLE_GL_COMPUTE) MP_RETURN_IF_ERROR(gpu_helper_.Open(cc)); #endif // !MEDIAPIPE_DISABLE_GPU } @@ -214,8 +207,7 @@ ::mediapipe::Status TfLiteTensorsToSegmentationCalculator::Open( MP_RETURN_IF_ERROR(LoadOptions(cc)); if (use_gpu_) { -#if !defined(MEDIAPIPE_DISABLE_GPU) && !defined(__EMSCRIPTEN__) && \ - !defined(__APPLE__) +#if !defined(MEDIAPIPE_DISABLE_GL_COMPUTE) MP_RETURN_IF_ERROR( gpu_helper_.RunInGlContext([this, cc]() -> ::mediapipe::Status { MP_RETURN_IF_ERROR(InitGpu(cc)); @@ -232,8 +224,7 @@ ::mediapipe::Status TfLiteTensorsToSegmentationCalculator::Open( ::mediapipe::Status TfLiteTensorsToSegmentationCalculator::Process( CalculatorContext* cc) { if (use_gpu_) { -#if !defined(MEDIAPIPE_DISABLE_GPU) && !defined(__EMSCRIPTEN__) && \ - !defined(__APPLE__) +#if !defined(MEDIAPIPE_DISABLE_GL_COMPUTE) MP_RETURN_IF_ERROR( gpu_helper_.RunInGlContext([this, cc]() -> ::mediapipe::Status { MP_RETURN_IF_ERROR(ProcessGpu(cc)); @@ -249,8 +240,7 @@ ::mediapipe::Status TfLiteTensorsToSegmentationCalculator::Process( ::mediapipe::Status TfLiteTensorsToSegmentationCalculator::Close( CalculatorContext* cc) { -#if !defined(MEDIAPIPE_DISABLE_GPU) && !defined(__EMSCRIPTEN__) && \ - !defined(__APPLE__) +#if !defined(MEDIAPIPE_DISABLE_GL_COMPUTE) gpu_helper_.RunInGlContext([this] { if (upsample_program_) glDeleteProgram(upsample_program_); upsample_program_ = 0; @@ -377,8 +367,7 @@ ::mediapipe::Status TfLiteTensorsToSegmentationCalculator::ProcessGpu( if (cc->Inputs().Tag("TENSORS_GPU").IsEmpty()) { return ::mediapipe::OkStatus(); } -#if !defined(MEDIAPIPE_DISABLE_GPU) && !defined(__EMSCRIPTEN__) && \ - !defined(__APPLE__) +#if !defined(MEDIAPIPE_DISABLE_GL_COMPUTE) // Get input streams. const auto& input_tensors = cc->Inputs().Tag("TENSORS_GPU").Get>(); @@ -464,8 +453,7 @@ ::mediapipe::Status TfLiteTensorsToSegmentationCalculator::ProcessGpu( } void TfLiteTensorsToSegmentationCalculator::GlRender() { -#if !defined(MEDIAPIPE_DISABLE_GPU) && !defined(__EMSCRIPTEN__) && \ - !defined(__APPLE__) +#if !defined(MEDIAPIPE_DISABLE_GL_COMPUTE) static const GLfloat square_vertices[] = { -1.0f, -1.0f, // bottom left 1.0f, -1.0f, // bottom right @@ -537,8 +525,7 @@ ::mediapipe::Status TfLiteTensorsToSegmentationCalculator::LoadOptions( ::mediapipe::Status TfLiteTensorsToSegmentationCalculator::InitGpu( CalculatorContext* cc) { -#if !defined(MEDIAPIPE_DISABLE_GPU) && !defined(__EMSCRIPTEN__) && \ - !defined(__APPLE__) +#if !defined(MEDIAPIPE_DISABLE_GL_COMPUTE) MP_RETURN_IF_ERROR(gpu_helper_.RunInGlContext([this]() -> ::mediapipe::Status { // A shader to process a segmentation tensor into an output mask, diff --git a/mediapipe/calculators/util/detection_label_id_to_text_calculator.cc b/mediapipe/calculators/util/detection_label_id_to_text_calculator.cc index 7e8beadf16..4ac09e5afe 100644 --- a/mediapipe/calculators/util/detection_label_id_to_text_calculator.cc +++ b/mediapipe/calculators/util/detection_label_id_to_text_calculator.cc @@ -19,8 +19,7 @@ #include "mediapipe/framework/port/status.h" #include "mediapipe/util/resource_util.h" -#if defined(MEDIAPIPE_LITE) || defined(__EMSCRIPTEN__) || \ - defined(__ANDROID__) || (defined(__APPLE__) && !TARGET_OS_OSX) +#if defined(MEDIAPIPE_MOBILE) #include "mediapipe/util/android/file/base/file.h" #include "mediapipe/util/android/file/base/helpers.h" #else diff --git a/mediapipe/calculators/util/filter_collection_calculator.cc b/mediapipe/calculators/util/filter_collection_calculator.cc index f86de04f01..e110afe7d1 100644 --- a/mediapipe/calculators/util/filter_collection_calculator.cc +++ b/mediapipe/calculators/util/filter_collection_calculator.cc @@ -27,8 +27,8 @@ typedef FilterCollectionCalculator> REGISTER_CALCULATOR(FilterNormalizedRectCollectionCalculator); typedef FilterCollectionCalculator< - std::vector>> - FilterLandmarksCollectionCalculator; -REGISTER_CALCULATOR(FilterLandmarksCollectionCalculator); + std::vector<::mediapipe::NormalizedLandmarkList>> + FilterLandmarkListCollectionCalculator; +REGISTER_CALCULATOR(FilterLandmarkListCollectionCalculator); } // namespace mediapipe diff --git a/mediapipe/calculators/util/landmark_letterbox_removal_calculator.cc b/mediapipe/calculators/util/landmark_letterbox_removal_calculator.cc index fd22cf191d..aca312c30d 100644 --- a/mediapipe/calculators/util/landmark_letterbox_removal_calculator.cc +++ b/mediapipe/calculators/util/landmark_letterbox_removal_calculator.cc @@ -49,7 +49,7 @@ constexpr char kLetterboxPaddingTag[] = "LETTERBOX_PADDING"; // corresponding input image before letterboxing. // // Input: -// LANDMARKS: An std::vector representing landmarks on an +// LANDMARKS: A NormalizedLandmarkList representing landmarks on an // letterboxed image. // // LETTERBOX_PADDING: An std::array representing the letterbox @@ -57,7 +57,7 @@ constexpr char kLetterboxPaddingTag[] = "LETTERBOX_PADDING"; // image, normalized to [0.f, 1.f] by the letterboxed image dimensions. // // Output: -// LANDMARKS: An std::vector representing landmarks with +// LANDMARKS: An NormalizedLandmarkList proto representing landmarks with // their locations adjusted to the letterbox-removed (non-padded) image. // // Usage example: @@ -74,10 +74,10 @@ class LandmarkLetterboxRemovalCalculator : public CalculatorBase { cc->Inputs().HasTag(kLetterboxPaddingTag)) << "Missing one or more input streams."; - cc->Inputs().Tag(kLandmarksTag).Set>(); + cc->Inputs().Tag(kLandmarksTag).Set(); cc->Inputs().Tag(kLetterboxPaddingTag).Set>(); - cc->Outputs().Tag(kLandmarksTag).Set>(); + cc->Outputs().Tag(kLandmarksTag).Set(); return ::mediapipe::OkStatus(); } @@ -94,8 +94,8 @@ class LandmarkLetterboxRemovalCalculator : public CalculatorBase { return ::mediapipe::OkStatus(); } - const auto& input_landmarks = - cc->Inputs().Tag(kLandmarksTag).Get>(); + const NormalizedLandmarkList& input_landmarks = + cc->Inputs().Tag(kLandmarksTag).Get(); const auto& letterbox_padding = cc->Inputs().Tag(kLetterboxPaddingTag).Get>(); @@ -104,24 +104,23 @@ class LandmarkLetterboxRemovalCalculator : public CalculatorBase { const float left_and_right = letterbox_padding[0] + letterbox_padding[2]; const float top_and_bottom = letterbox_padding[1] + letterbox_padding[3]; - auto output_landmarks = - absl::make_unique>(); - for (const auto& landmark : input_landmarks) { - NormalizedLandmark new_landmark; + NormalizedLandmarkList output_landmarks; + for (int i = 0; i < input_landmarks.landmark_size(); ++i) { + const NormalizedLandmark& landmark = input_landmarks.landmark(i); + NormalizedLandmark* new_landmark = output_landmarks.add_landmark(); const float new_x = (landmark.x() - left) / (1.0f - left_and_right); const float new_y = (landmark.y() - top) / (1.0f - top_and_bottom); - new_landmark.set_x(new_x); - new_landmark.set_y(new_y); + new_landmark->set_x(new_x); + new_landmark->set_y(new_y); // Keep z-coord as is. - new_landmark.set_z(landmark.z()); - - output_landmarks->emplace_back(new_landmark); + new_landmark->set_z(landmark.z()); } cc->Outputs() .Tag(kLandmarksTag) - .Add(output_landmarks.release(), cc->InputTimestamp()); + .AddPacket(MakePacket(output_landmarks) + .At(cc->InputTimestamp())); return ::mediapipe::OkStatus(); } }; diff --git a/mediapipe/calculators/util/landmark_letterbox_removal_calculator_test.cc b/mediapipe/calculators/util/landmark_letterbox_removal_calculator_test.cc index 33724890e3..7723c0d896 100644 --- a/mediapipe/calculators/util/landmark_letterbox_removal_calculator_test.cc +++ b/mediapipe/calculators/util/landmark_letterbox_removal_calculator_test.cc @@ -43,10 +43,10 @@ CalculatorGraphConfig::Node GetDefaultNode() { TEST(LandmarkLetterboxRemovalCalculatorTest, PaddingLeftRight) { CalculatorRunner runner(GetDefaultNode()); - auto landmarks = absl::make_unique>(); - landmarks->push_back(CreateLandmark(0.5f, 0.5f)); - landmarks->push_back(CreateLandmark(0.2f, 0.2f)); - landmarks->push_back(CreateLandmark(0.7f, 0.7f)); + auto landmarks = absl::make_unique(); + *landmarks->add_landmark() = CreateLandmark(0.5f, 0.5f); + *landmarks->add_landmark() = CreateLandmark(0.2f, 0.2f); + *landmarks->add_landmark() = CreateLandmark(0.7f, 0.7f); runner.MutableInputs() ->Tag("LANDMARKS") .packets.push_back( @@ -61,26 +61,28 @@ TEST(LandmarkLetterboxRemovalCalculatorTest, PaddingLeftRight) { MP_ASSERT_OK(runner.Run()) << "Calculator execution failed."; const std::vector& output = runner.Outputs().Tag("LANDMARKS").packets; ASSERT_EQ(1, output.size()); - const auto& output_landmarks = - output[0].Get>(); + const auto& output_landmarks = output[0].Get(); - EXPECT_EQ(output_landmarks.size(), 3); + EXPECT_EQ(output_landmarks.landmark_size(), 3); - EXPECT_THAT(output_landmarks[0].x(), testing::FloatNear(0.6f, 1e-5)); - EXPECT_THAT(output_landmarks[0].y(), testing::FloatNear(0.5f, 1e-5)); - EXPECT_THAT(output_landmarks[1].x(), testing::FloatNear(0.0f, 1e-5)); - EXPECT_THAT(output_landmarks[1].y(), testing::FloatNear(0.2f, 1e-5)); - EXPECT_THAT(output_landmarks[2].x(), testing::FloatNear(1.0f, 1e-5)); - EXPECT_THAT(output_landmarks[2].y(), testing::FloatNear(0.7f, 1e-5)); + EXPECT_THAT(output_landmarks.landmark(0).x(), testing::FloatNear(0.6f, 1e-5)); + EXPECT_THAT(output_landmarks.landmark(0).y(), testing::FloatNear(0.5f, 1e-5)); + EXPECT_THAT(output_landmarks.landmark(1).x(), testing::FloatNear(0.0f, 1e-5)); + EXPECT_THAT(output_landmarks.landmark(1).y(), testing::FloatNear(0.2f, 1e-5)); + EXPECT_THAT(output_landmarks.landmark(2).x(), testing::FloatNear(1.0f, 1e-5)); + EXPECT_THAT(output_landmarks.landmark(2).y(), testing::FloatNear(0.7f, 1e-5)); } TEST(LandmarkLetterboxRemovalCalculatorTest, PaddingTopBottom) { CalculatorRunner runner(GetDefaultNode()); - auto landmarks = absl::make_unique>(); - landmarks->push_back(CreateLandmark(0.5f, 0.5f)); - landmarks->push_back(CreateLandmark(0.2f, 0.2f)); - landmarks->push_back(CreateLandmark(0.7f, 0.7f)); + auto landmarks = absl::make_unique(); + NormalizedLandmark* landmark = landmarks->add_landmark(); + *landmark = CreateLandmark(0.5f, 0.5f); + landmark = landmarks->add_landmark(); + *landmark = CreateLandmark(0.2f, 0.2f); + landmark = landmarks->add_landmark(); + *landmark = CreateLandmark(0.7f, 0.7f); runner.MutableInputs() ->Tag("LANDMARKS") .packets.push_back( @@ -95,17 +97,16 @@ TEST(LandmarkLetterboxRemovalCalculatorTest, PaddingTopBottom) { MP_ASSERT_OK(runner.Run()) << "Calculator execution failed."; const std::vector& output = runner.Outputs().Tag("LANDMARKS").packets; ASSERT_EQ(1, output.size()); - const auto& output_landmarks = - output[0].Get>(); + const auto& output_landmarks = output[0].Get(); - EXPECT_EQ(output_landmarks.size(), 3); + EXPECT_EQ(output_landmarks.landmark_size(), 3); - EXPECT_THAT(output_landmarks[0].x(), testing::FloatNear(0.5f, 1e-5)); - EXPECT_THAT(output_landmarks[0].y(), testing::FloatNear(0.6f, 1e-5)); - EXPECT_THAT(output_landmarks[1].x(), testing::FloatNear(0.2f, 1e-5)); - EXPECT_THAT(output_landmarks[1].y(), testing::FloatNear(0.0f, 1e-5)); - EXPECT_THAT(output_landmarks[2].x(), testing::FloatNear(0.7f, 1e-5)); - EXPECT_THAT(output_landmarks[2].y(), testing::FloatNear(1.0f, 1e-5)); + EXPECT_THAT(output_landmarks.landmark(0).x(), testing::FloatNear(0.5f, 1e-5)); + EXPECT_THAT(output_landmarks.landmark(0).y(), testing::FloatNear(0.6f, 1e-5)); + EXPECT_THAT(output_landmarks.landmark(1).x(), testing::FloatNear(0.2f, 1e-5)); + EXPECT_THAT(output_landmarks.landmark(1).y(), testing::FloatNear(0.0f, 1e-5)); + EXPECT_THAT(output_landmarks.landmark(2).x(), testing::FloatNear(0.7f, 1e-5)); + EXPECT_THAT(output_landmarks.landmark(2).y(), testing::FloatNear(1.0f, 1e-5)); } } // namespace mediapipe diff --git a/mediapipe/calculators/util/landmark_projection_calculator.cc b/mediapipe/calculators/util/landmark_projection_calculator.cc index 39ac61f2eb..9c868fd50e 100644 --- a/mediapipe/calculators/util/landmark_projection_calculator.cc +++ b/mediapipe/calculators/util/landmark_projection_calculator.cc @@ -47,13 +47,13 @@ constexpr char kRectTag[] = "NORM_RECT"; // Projects normalized landmarks in a rectangle to its original coordinates. The // rectangle must also be in normalized coordinates. // Input: -// NORM_LANDMARKS: An std::vector representing landmarks +// NORM_LANDMARKS: A NormalizedLandmarkList representing landmarks // in a normalized rectangle. // NORM_RECT: An NormalizedRect representing a normalized rectangle in image // coordinates. // // Output: -// NORM_LANDMARKS: An std::vector representing landmarks +// NORM_LANDMARKS: A NormalizedLandmarkList representing landmarks // with their locations adjusted to the image. // // Usage example: @@ -70,10 +70,10 @@ class LandmarkProjectionCalculator : public CalculatorBase { cc->Inputs().HasTag(kRectTag)) << "Missing one or more input streams."; - cc->Inputs().Tag(kLandmarksTag).Set>(); + cc->Inputs().Tag(kLandmarksTag).Set(); cc->Inputs().Tag(kRectTag).Set(); - cc->Outputs().Tag(kLandmarksTag).Set>(); + cc->Outputs().Tag(kLandmarksTag).Set(); return ::mediapipe::OkStatus(); } @@ -92,14 +92,14 @@ class LandmarkProjectionCalculator : public CalculatorBase { return ::mediapipe::OkStatus(); } - const auto& input_landmarks = - cc->Inputs().Tag(kLandmarksTag).Get>(); + const NormalizedLandmarkList& input_landmarks = + cc->Inputs().Tag(kLandmarksTag).Get(); const auto& input_rect = cc->Inputs().Tag(kRectTag).Get(); - auto output_landmarks = - absl::make_unique>(); - for (const auto& landmark : input_landmarks) { - NormalizedLandmark new_landmark; + NormalizedLandmarkList output_landmarks; + for (int i = 0; i < input_landmarks.landmark_size(); ++i) { + const NormalizedLandmark& landmark = input_landmarks.landmark(i); + NormalizedLandmark* new_landmark = output_landmarks.add_landmark(); const float x = landmark.x() - 0.5f; const float y = landmark.y() - 0.5f; @@ -110,17 +110,16 @@ class LandmarkProjectionCalculator : public CalculatorBase { new_x = new_x * input_rect.width() + input_rect.x_center(); new_y = new_y * input_rect.height() + input_rect.y_center(); - new_landmark.set_x(new_x); - new_landmark.set_y(new_y); + new_landmark->set_x(new_x); + new_landmark->set_y(new_y); // Keep z-coord as is. - new_landmark.set_z(landmark.z()); - - output_landmarks->emplace_back(new_landmark); + new_landmark->set_z(landmark.z()); } cc->Outputs() .Tag(kLandmarksTag) - .Add(output_landmarks.release(), cc->InputTimestamp()); + .AddPacket(MakePacket(output_landmarks) + .At(cc->InputTimestamp())); return ::mediapipe::OkStatus(); } }; diff --git a/mediapipe/calculators/util/landmarks_to_detection_calculator.cc b/mediapipe/calculators/util/landmarks_to_detection_calculator.cc index ca71ac3773..5f429cabfe 100644 --- a/mediapipe/calculators/util/landmarks_to_detection_calculator.cc +++ b/mediapipe/calculators/util/landmarks_to_detection_calculator.cc @@ -28,8 +28,7 @@ namespace { constexpr char kDetectionTag[] = "DETECTION"; constexpr char kNormalizedLandmarksTag[] = "NORM_LANDMARKS"; -Detection ConvertLandmarksToDetection( - const std::vector& landmarks) { +Detection ConvertLandmarksToDetection(const NormalizedLandmarkList& landmarks) { Detection detection; LocationData* location_data = detection.mutable_location_data(); @@ -37,7 +36,8 @@ Detection ConvertLandmarksToDetection( float x_max = std::numeric_limits::min(); float y_min = std::numeric_limits::max(); float y_max = std::numeric_limits::min(); - for (const auto& landmark : landmarks) { + for (int i = 0; i < landmarks.landmark_size(); ++i) { + const NormalizedLandmark& landmark = landmarks.landmark(i); x_min = std::min(x_min, landmark.x()); x_max = std::max(x_max, landmark.x()); y_min = std::min(y_min, landmark.y()); @@ -67,7 +67,7 @@ Detection ConvertLandmarksToDetection( // to specify a subset of landmarks for creating the detection. // // Input: -// NOMR_LANDMARKS: A vector of NormalizedLandmark. +// NOMR_LANDMARKS: A NormalizedLandmarkList proto. // // Output: // DETECTION: A Detection proto. @@ -95,9 +95,7 @@ ::mediapipe::Status LandmarksToDetectionCalculator::GetContract( RET_CHECK(cc->Inputs().HasTag(kNormalizedLandmarksTag)); RET_CHECK(cc->Outputs().HasTag(kDetectionTag)); // TODO: Also support converting Landmark to Detection. - cc->Inputs() - .Tag(kNormalizedLandmarksTag) - .Set>(); + cc->Inputs().Tag(kNormalizedLandmarksTag).Set(); cc->Outputs().Tag(kDetectionTag).Set(); return ::mediapipe::OkStatus(); @@ -113,19 +111,20 @@ ::mediapipe::Status LandmarksToDetectionCalculator::Open( ::mediapipe::Status LandmarksToDetectionCalculator::Process( CalculatorContext* cc) { - const auto& landmarks = cc->Inputs() - .Tag(kNormalizedLandmarksTag) - .Get>(); - RET_CHECK_GT(landmarks.size(), 0) << "Input landmark vector is empty."; + const auto& landmarks = + cc->Inputs().Tag(kNormalizedLandmarksTag).Get(); + RET_CHECK_GT(landmarks.landmark_size(), 0) + << "Input landmark vector is empty."; auto detection = absl::make_unique(); if (options_.selected_landmark_indices_size()) { - std::vector subset_landmarks( - options_.selected_landmark_indices_size()); - for (int i = 0; i < subset_landmarks.size(); ++i) { - RET_CHECK_LT(options_.selected_landmark_indices(i), landmarks.size()) + NormalizedLandmarkList subset_landmarks; + for (int i = 0; i < options_.selected_landmark_indices_size(); ++i) { + RET_CHECK_LT(options_.selected_landmark_indices(i), + landmarks.landmark_size()) << "Index of landmark subset is out of range."; - subset_landmarks[i] = landmarks[options_.selected_landmark_indices(i)]; + *subset_landmarks.add_landmark() = + landmarks.landmark(options_.selected_landmark_indices(i)); } *detection = ConvertLandmarksToDetection(subset_landmarks); } else { diff --git a/mediapipe/calculators/util/landmarks_to_floats_calculator.cc b/mediapipe/calculators/util/landmarks_to_floats_calculator.cc index 09ab4b575f..b86542dd5a 100644 --- a/mediapipe/calculators/util/landmarks_to_floats_calculator.cc +++ b/mediapipe/calculators/util/landmarks_to_floats_calculator.cc @@ -48,7 +48,7 @@ constexpr char kMatrixTag[] = "MATRIX"; // Converts a vector of landmarks to a vector of floats or a matrix. // Input: -// NORM_LANDMARKS: An std::vector. +// NORM_LANDMARKS: A NormalizedLandmarkList proto. // // Output: // FLOATS(optional): A vector of floats from flattened landmarks. @@ -63,7 +63,7 @@ constexpr char kMatrixTag[] = "MATRIX"; class LandmarksToFloatsCalculator : public CalculatorBase { public: static ::mediapipe::Status GetContract(CalculatorContract* cc) { - cc->Inputs().Tag(kLandmarksTag).Set>(); + cc->Inputs().Tag(kLandmarksTag).Set(); RET_CHECK(cc->Outputs().HasTag(kFloatsTag) || cc->Outputs().HasTag(kMatrixTag)); if (cc->Outputs().HasTag(kFloatsTag)) { @@ -94,11 +94,12 @@ class LandmarksToFloatsCalculator : public CalculatorBase { } const auto& input_landmarks = - cc->Inputs().Tag(kLandmarksTag).Get>(); + cc->Inputs().Tag(kLandmarksTag).Get(); if (cc->Outputs().HasTag(kFloatsTag)) { auto output_floats = absl::make_unique>(); - for (const auto& landmark : input_landmarks) { + for (int i = 0; i < input_landmarks.landmark_size(); ++i) { + const NormalizedLandmark& landmark = input_landmarks.landmark(i); output_floats->emplace_back(landmark.x()); if (num_dimensions_ > 1) { output_floats->emplace_back(landmark.y()); @@ -113,14 +114,14 @@ class LandmarksToFloatsCalculator : public CalculatorBase { .Add(output_floats.release(), cc->InputTimestamp()); } else { auto output_matrix = absl::make_unique(); - output_matrix->setZero(num_dimensions_, input_landmarks.size()); - for (int i = 0; i < input_landmarks.size(); ++i) { - (*output_matrix)(0, i) = input_landmarks[i].x(); + output_matrix->setZero(num_dimensions_, input_landmarks.landmark_size()); + for (int i = 0; i < input_landmarks.landmark_size(); ++i) { + (*output_matrix)(0, i) = input_landmarks.landmark(i).x(); if (num_dimensions_ > 1) { - (*output_matrix)(1, i) = input_landmarks[i].y(); + (*output_matrix)(1, i) = input_landmarks.landmark(i).y(); } if (num_dimensions_ > 2) { - (*output_matrix)(2, i) = input_landmarks[i].z(); + (*output_matrix)(2, i) = input_landmarks.landmark(i).z(); } } cc->Outputs() diff --git a/mediapipe/calculators/util/landmarks_to_render_data_calculator.cc b/mediapipe/calculators/util/landmarks_to_render_data_calculator.cc index 25ffb67ef0..c2b318a3df 100644 --- a/mediapipe/calculators/util/landmarks_to_render_data_calculator.cc +++ b/mediapipe/calculators/util/landmarks_to_render_data_calculator.cc @@ -46,12 +46,13 @@ inline float Remap(float x, float lo, float hi, float scale) { return (x - lo) / (hi - lo + 1e-6) * scale; } -template -inline void GetMinMaxZ(const std::vector& landmarks, float* z_min, +template +inline void GetMinMaxZ(const LandmarkListType& landmarks, float* z_min, float* z_max) { *z_min = std::numeric_limits::max(); *z_max = std::numeric_limits::min(); - for (const auto& landmark : landmarks) { + for (int i = 0; i < landmarks.landmark_size(); ++i) { + const LandmarkType& landmark = landmarks.landmark(i); *z_min = std::min(landmark.z(), *z_min); *z_max = std::max(landmark.z(), *z_max); } @@ -73,7 +74,7 @@ void SetColorSizeValueFromZ(float z, float z_min, float z_max, } // namespace // A calculator that converts Landmark proto to RenderData proto for -// visualization. The input should be std::vector. It is also possible +// visualization. The input should be LandmarkList proto. It is also possible // to specify the connections between landmarks. // // Example config: @@ -121,11 +122,11 @@ class LandmarksToRenderDataCalculator : public CalculatorBase { const LandmarksToRenderDataCalculatorOptions& options, bool normalized, int gray_val1, int gray_val2, RenderData* render_data); - template - void AddConnections(const std::vector& landmarks, - bool normalized, RenderData* render_data); - template - void AddConnectionsWithDepth(const std::vector& landmarks, + template + void AddConnections(const LandmarkListType& landmarks, bool normalized, + RenderData* render_data); + template + void AddConnectionsWithDepth(const LandmarkListType& landmarks, bool normalized, float min_z, float max_z, RenderData* render_data); @@ -144,10 +145,10 @@ ::mediapipe::Status LandmarksToRenderDataCalculator::GetContract( "normalized landmarks."; if (cc->Inputs().HasTag(kLandmarksTag)) { - cc->Inputs().Tag(kLandmarksTag).Set>(); + cc->Inputs().Tag(kLandmarksTag).Set(); } if (cc->Inputs().HasTag(kNormLandmarksTag)) { - cc->Inputs().Tag(kNormLandmarksTag).Set>(); + cc->Inputs().Tag(kNormLandmarksTag).Set(); } cc->Outputs().Tag(kRenderDataTag).Set(); return ::mediapipe::OkStatus(); @@ -169,16 +170,17 @@ ::mediapipe::Status LandmarksToRenderDataCalculator::Process( float z_max = 0.f; if (cc->Inputs().HasTag(kLandmarksTag)) { - const auto& landmarks = - cc->Inputs().Tag(kLandmarksTag).Get>(); + const LandmarkList& landmarks = + cc->Inputs().Tag(kLandmarksTag).Get(); RET_CHECK_EQ(options_.landmark_connections_size() % 2, 0) << "Number of entries in landmark connections must be a multiple of 2"; if (visualize_depth) { - GetMinMaxZ(landmarks, &z_min, &z_max); + GetMinMaxZ(landmarks, &z_min, &z_max); } // Only change rendering if there are actually z values other than 0. visualize_depth &= ((z_max - z_min) > 1e-3); - for (const auto& landmark : landmarks) { + for (int i = 0; i < landmarks.landmark_size(); ++i) { + const Landmark& landmark = landmarks.landmark(i); auto* landmark_data_render = AddPointRenderData(options_, render_data.get()); if (visualize_depth) { @@ -191,25 +193,27 @@ ::mediapipe::Status LandmarksToRenderDataCalculator::Process( landmark_data->set_y(landmark.y()); } if (visualize_depth) { - AddConnectionsWithDepth(landmarks, /*normalized=*/false, z_min, z_max, - render_data.get()); + AddConnectionsWithDepth(landmarks, /*normalized=*/false, + z_min, z_max, render_data.get()); } else { - AddConnections(landmarks, /*normalized=*/false, render_data.get()); + AddConnections(landmarks, /*normalized=*/false, + render_data.get()); } } if (cc->Inputs().HasTag(kNormLandmarksTag)) { - const auto& landmarks = cc->Inputs() - .Tag(kNormLandmarksTag) - .Get>(); + const NormalizedLandmarkList& landmarks = + cc->Inputs().Tag(kNormLandmarksTag).Get(); RET_CHECK_EQ(options_.landmark_connections_size() % 2, 0) << "Number of entries in landmark connections must be a multiple of 2"; if (visualize_depth) { - GetMinMaxZ(landmarks, &z_min, &z_max); + GetMinMaxZ(landmarks, &z_min, + &z_max); } // Only change rendering if there are actually z values other than 0. visualize_depth &= ((z_max - z_min) > 1e-3); - for (const auto& landmark : landmarks) { + for (int i = 0; i < landmarks.landmark_size(); ++i) { + const NormalizedLandmark& landmark = landmarks.landmark(i); auto* landmark_data_render = AddPointRenderData(options_, render_data.get()); if (visualize_depth) { @@ -222,10 +226,11 @@ ::mediapipe::Status LandmarksToRenderDataCalculator::Process( landmark_data->set_y(landmark.y()); } if (visualize_depth) { - AddConnectionsWithDepth(landmarks, /*normalized=*/true, z_min, z_max, - render_data.get()); + AddConnectionsWithDepth( + landmarks, /*normalized=*/true, z_min, z_max, render_data.get()); } else { - AddConnections(landmarks, /*normalized=*/true, render_data.get()); + AddConnections(landmarks, /*normalized=*/true, + render_data.get()); } } @@ -235,13 +240,13 @@ ::mediapipe::Status LandmarksToRenderDataCalculator::Process( return ::mediapipe::OkStatus(); } -template +template void LandmarksToRenderDataCalculator::AddConnectionsWithDepth( - const std::vector& landmarks, bool normalized, float min_z, + const LandmarkListType& landmarks, bool normalized, float min_z, float max_z, RenderData* render_data) { for (int i = 0; i < options_.landmark_connections_size(); i += 2) { - const auto& ld0 = landmarks[options_.landmark_connections(i)]; - const auto& ld1 = landmarks[options_.landmark_connections(i + 1)]; + const auto& ld0 = landmarks.landmark(options_.landmark_connections(i)); + const auto& ld1 = landmarks.landmark(options_.landmark_connections(i + 1)); const int gray_val1 = 255 - static_cast(Remap(ld0.z(), min_z, max_z, 255)); const int gray_val2 = @@ -272,13 +277,13 @@ void LandmarksToRenderDataCalculator::AddConnectionToRenderData( connection_annotation->set_thickness(options.thickness()); } -template +template void LandmarksToRenderDataCalculator::AddConnections( - const std::vector& landmarks, bool normalized, + const LandmarkListType& landmarks, bool normalized, RenderData* render_data) { for (int i = 0; i < options_.landmark_connections_size(); i += 2) { - const auto& ld0 = landmarks[options_.landmark_connections(i)]; - const auto& ld1 = landmarks[options_.landmark_connections(i + 1)]; + const auto& ld0 = landmarks.landmark(options_.landmark_connections(i)); + const auto& ld1 = landmarks.landmark(options_.landmark_connections(i + 1)); AddConnectionToRenderData(ld0.x(), ld0.y(), ld1.x(), ld1.y(), options_, normalized, render_data); } diff --git a/mediapipe/calculators/util/top_k_scores_calculator.cc b/mediapipe/calculators/util/top_k_scores_calculator.cc index bc8d30f87c..8465c23917 100644 --- a/mediapipe/calculators/util/top_k_scores_calculator.cc +++ b/mediapipe/calculators/util/top_k_scores_calculator.cc @@ -29,8 +29,7 @@ #include "mediapipe/framework/port/statusor.h" #include "mediapipe/util/resource_util.h" -#if defined(MEDIAPIPE_LITE) || defined(__EMSCRIPTEN__) || \ - defined(__ANDROID__) || (defined(__APPLE__) && !TARGET_OS_OSX) +#if defined(MEDIAPIPE_MOBILE) #include "mediapipe/util/android/file/base/file.h" #include "mediapipe/util/android/file/base/helpers.h" #else diff --git a/mediapipe/calculators/video/opencv_video_decoder_calculator.cc b/mediapipe/calculators/video/opencv_video_decoder_calculator.cc index 49c7f05561..a5016d3ddf 100644 --- a/mediapipe/calculators/video/opencv_video_decoder_calculator.cc +++ b/mediapipe/calculators/video/opencv_video_decoder_calculator.cc @@ -12,6 +12,8 @@ // See the License for the specific language governing permissions and // limitations under the License. +#include + #include "mediapipe/framework/calculator_framework.h" #include "mediapipe/framework/formats/image_format.pb.h" #include "mediapipe/framework/formats/image_frame.h" @@ -66,6 +68,20 @@ ImageFormat::Format GetImageFormat(int num_channels) { // output_stream: "VIDEO:video_frames" // output_stream: "VIDEO_PRESTREAM:video_header" // } +// +// OpenCV's VideoCapture doesn't decode audio tracks. If the audio tracks need +// to be saved, specify an output side packet with tag "SAVED_AUDIO_PATH". +// The calculator will call FFmpeg binary to save audio tracks as an aac file. +// +// Example config: +// node { +// calculator: "OpenCvVideoDecoderCalculator" +// input_side_packet: "INPUT_FILE_PATH:input_file_path" +// output_side_packet: "SAVED_AUDIO_PATH:audio_path" +// output_stream: "VIDEO:video_frames" +// output_stream: "VIDEO_PRESTREAM:video_header" +// } +// class OpenCvVideoDecoderCalculator : public CalculatorBase { public: static ::mediapipe::Status GetContract(CalculatorContract* cc) { @@ -74,6 +90,9 @@ class OpenCvVideoDecoderCalculator : public CalculatorBase { if (cc->Outputs().HasTag("VIDEO_PRESTREAM")) { cc->Outputs().Tag("VIDEO_PRESTREAM").Set(); } + if (cc->OutputSidePackets().HasTag("SAVED_AUDIO_PATH")) { + cc->OutputSidePackets().Tag("SAVED_AUDIO_PATH").Set(); + } return ::mediapipe::OkStatus(); } @@ -127,6 +146,25 @@ class OpenCvVideoDecoderCalculator : public CalculatorBase { } // Rewind to the very first frame. cap_->set(cv::CAP_PROP_POS_AVI_RATIO, 0); + + if (cc->OutputSidePackets().HasTag("SAVED_AUDIO_PATH")) { +#ifdef HAVE_FFMPEG + std::string saved_audio_path = std::tmpnam(nullptr); + system(absl::StrCat("ffmpeg -nostats -loglevel 0 -i ", input_file_path, + " -vn -f adts ", saved_audio_path) + .c_str()); + cc->OutputSidePackets() + .Tag("SAVED_AUDIO_PATH") + .Set(MakePacket(saved_audio_path)); + +#else + return ::mediapipe::InvalidArgumentErrorBuilder(MEDIAPIPE_LOC) + << "OpenCVVideoDecoderCalculator can't save the audio file " + "because FFmpeg is not installed. Please remove " + "output_side_packet: \"SAVED_AUDIO_PATH\" from the node " + "config."; +#endif + } return ::mediapipe::OkStatus(); } diff --git a/mediapipe/calculators/video/opencv_video_encoder_calculator.cc b/mediapipe/calculators/video/opencv_video_encoder_calculator.cc index 6ac11d933c..3bf29be425 100644 --- a/mediapipe/calculators/video/opencv_video_encoder_calculator.cc +++ b/mediapipe/calculators/video/opencv_video_encoder_calculator.cc @@ -12,6 +12,8 @@ // See the License for the specific language governing permissions and // limitations under the License. +#include + #include #include #include @@ -39,13 +41,31 @@ namespace mediapipe { // packet. Currently, the calculator only supports one video stream (in // mediapipe::ImageFrame). // -// Example config to generate the output video file: +// Example config: +// node { +// calculator: "OpenCvVideoEncoderCalculator" +// input_stream: "VIDEO:video" +// input_stream: "VIDEO_PRESTREAM:video_header" +// input_side_packet: "OUTPUT_FILE_PATH:output_file_path" +// node_options { +// [type.googleapis.com/mediapipe.OpenCvVideoEncoderCalculatorOptions]: { +// codec: "avc1" +// video_format: "mp4" +// } +// } +// } // +// OpenCV's VideoWriter doesn't encode audio. If an input side packet with tag +// "AUDIO_FILE_PATH" is specified, the calculator will call FFmpeg binary to +// attach the audio file to the video as the last step in Close(). +// +// Example config: // node { // calculator: "OpenCvVideoEncoderCalculator" // input_stream: "VIDEO:video" // input_stream: "VIDEO_PRESTREAM:video_header" // input_side_packet: "OUTPUT_FILE_PATH:output_file_path" +// input_side_packet: "AUDIO_FILE_PATH:audio_path" // node_options { // [type.googleapis.com/mediapipe.OpenCvVideoEncoderCalculatorOptions]: { // codec: "avc1" @@ -53,6 +73,7 @@ namespace mediapipe { // } // } // } +// class OpenCvVideoEncoderCalculator : public CalculatorBase { public: static ::mediapipe::Status GetContract(CalculatorContract* cc); @@ -77,6 +98,9 @@ ::mediapipe::Status OpenCvVideoEncoderCalculator::GetContract( } RET_CHECK(cc->InputSidePackets().HasTag("OUTPUT_FILE_PATH")); cc->InputSidePackets().Tag("OUTPUT_FILE_PATH").Set(); + if (cc->InputSidePackets().HasTag("AUDIO_FILE_PATH")) { + cc->InputSidePackets().Tag("AUDIO_FILE_PATH").Set(); + } return ::mediapipe::OkStatus(); } @@ -155,6 +179,27 @@ ::mediapipe::Status OpenCvVideoEncoderCalculator::Close(CalculatorContext* cc) { if (writer_ && writer_->isOpened()) { writer_->release(); } + if (cc->InputSidePackets().HasTag("AUDIO_FILE_PATH")) { +#ifdef HAVE_FFMPEG + const std::string& audio_file_path = + cc->InputSidePackets().Tag("AUDIO_FILE_PATH").Get(); + // A temp output file is needed because FFmpeg can't do in-place editing. + const std::string temp_file_path = std::tmpnam(nullptr); + system(absl::StrCat("mv ", output_file_path_, " ", temp_file_path, + "&& ffmpeg -nostats -loglevel 0 -i ", temp_file_path, + " -i ", audio_file_path, + " -c copy -map 0:v:0 -map 1:a:0 ", output_file_path_, + "&& rm ", temp_file_path) + .c_str()); + +#else + return ::mediapipe::InvalidArgumentErrorBuilder(MEDIAPIPE_LOC) + << "OpenCVVideoEncoderCalculator can't attach the audio tracks to " + "the video because FFmpeg is not installed. Please remove " + "input_side_packet: \"AUDIO_FILE_PATH\" from the node " + "config."; +#endif + } return ::mediapipe::OkStatus(); } diff --git a/mediapipe/docs/android_archive_library.md b/mediapipe/docs/android_archive_library.md index 0fed0e404e..0b6ccdf2ba 100644 --- a/mediapipe/docs/android_archive_library.md +++ b/mediapipe/docs/android_archive_library.md @@ -126,6 +126,6 @@ project. ``` 6. Follow our Android app examples to use MediaPipe in Android Studio for your - use case. If you are looking for an example, a working face detection + use case. If you are looking for an example, a face detection example can be found - [here](https://github.com/jiuqiant/mediapipe_aar_example). + [here](https://github.com/jiuqiant/mediapipe_face_detection_aar_example) and a multi-hand tracking example can be found [here](https://github.com/jiuqiant/mediapipe_multi_hands_tracking_aar_example). diff --git a/mediapipe/docs/gpu.md b/mediapipe/docs/gpu.md index d177332663..63c97e4bcf 100644 --- a/mediapipe/docs/gpu.md +++ b/mediapipe/docs/gpu.md @@ -1,10 +1,10 @@ ## Running on GPUs - [Overview](#overview) -- [OpenGL Support](#graphconfig) +- [OpenGL Support](#opengl-support) - [Life of a GPU calculator](#life-of-a-gpu-calculator) - [GpuBuffer to ImageFrame converters](#gpubuffer-to-imageframe-converters) - +- [Disable GPU support](#disable-gpu-support) ### Overview MediaPipe supports calculator nodes for GPU compute and rendering, and allows combining multiple GPU nodes, as well as mixing them with CPU based calculator nodes. There exist several GPU APIs on mobile platforms (eg, OpenGL ES, Metal and Vulkan). MediaPipe does not attempt to offer a single cross-API GPU abstraction. Individual nodes can be written using different APIs, allowing them to take advantage of platform specific features when needed. @@ -23,6 +23,7 @@ Below are the design principles for GPU support in MediaPipe * A calculator should be allowed maximum flexibility in using the GPU for all or part of its operation, combining it with the CPU if necessary. ### OpenGL support + MediaPipe supports OpenGL ES up to version 3.2 on Android and up to ES 3.0 on iOS. In addition, MediaPipe also supports Metal on iOS. * MediaPipe allows graphs to run OpenGL in multiple GL contexts. For example, this can be very useful in graphs that combine a slower GPU inference path (eg, at 10 FPS) with a faster GPU rendering path (eg, at 30 FPS): since one GL context corresponds to one sequential command queue, using the same context for both tasks would reduce the rendering frame rate. One challenge MediaPipe's use of multiple contexts solves is the ability to communicate across them. An example scenario is one with an input video that is sent to both the rendering and inferences paths, and rendering needs to have access to the latest output from inference. @@ -128,3 +129,26 @@ The below diagram shows the data flow in a mobile application that captures vide |:--:| | *Video frames from the camera are fed into the graph as `GpuBuffer` packets. The input stream is accessed by two calculators in parallel. `GpuBufferToImageFrameCalculator` converts the buffer into an `ImageFrame`, which is then sent through a grayscale converter and a canny filter (both based on OpenCV and running on the CPU), whose output is then converted into a `GpuBuffer` again. A multi-input GPU calculator, GlOverlayCalculator, takes as input both the original `GpuBuffer` and the one coming out of the edge detector, and overlays them using a shader. The output is then sent back to the application using a callback calculator, and the application renders the image to the screen using OpenGL.* | +### Disable GPU Support + +By default, building MediaPipe (with no special bazel flags) attempts to compile +and link against OpenGL/Metal libraries. + +There are some command line build flags available to disable/enable GPU support +within the MediaPipe framework: + +``` +# To disable *all* gpu support +bazel build --define MEDIAPIPE_DISABLE_GPU=1 + +# to enable full GPU support (OpenGL ES 3.1+ & Metal) +bazel build --copt -DMESA_EGL_NO_X11_HEADERS + +# to enable only OpenGL ES 3.0 and below (no GLES 3.1+ features) +bazel build --copt -DMESA_EGL_NO_X11_HEADERS --copt -DMEDIAPIPE_DISABLE_GL_COMPUTE +``` + +Note *MEDIAPIPE_DISABLE_GL_COMPUTE* is automatically defined on all Apple +systems (Apple doesn't support OpenGL ES 3.1+). + +Note on iOS and Android, it is assumed that GPU support will be enabled. diff --git a/mediapipe/docs/install.md b/mediapipe/docs/install.md index 02b6fc1492..97eb8ca0be 100644 --- a/mediapipe/docs/install.md +++ b/mediapipe/docs/install.md @@ -245,19 +245,23 @@ To build and run iOS apps: $ cd mediapipe ``` -3. Install Bazel (0.24.1 and above required). +3. Install Bazel (version between 0.24.1 and 1.1.0). - Option 1. Use package manager tool to install the latest version of Bazel. + Option 1. Use package manager tool to install Bazel 1.1.0 ```bash - $ brew install bazel - - # Run 'bazel version' to check version of bazel installed + # If Bazel 1.1.0+ was installed. + $ brew uninstall bazel + # Install Bazel 1.1.0 + $ brew install https://raw.githubusercontent.com/bazelbuild/homebrew-tap/f8a0fa981bcb1784a0d0823e14867b844e94fb3d/Formula/bazel.rb + $ brew link bazel + # Run 'bazel version' to check version of bazel ``` Option 2. Follow the official [Bazel documentation](https://docs.bazel.build/versions/master/install-os-x.html#install-with-installer-mac-os-x) - to install any version of Bazel manually. + to install any version of Bazel manually. Note that MediaPipe doesn't + support Bazel 1.1.0+ on macOS yet. 4. Install OpenCV and FFmpeg. @@ -526,7 +530,7 @@ This will use a Docker image that will isolate mediapipe's installation from the ```bash $ docker run -it --name mediapipe mediapipe:latest - root@bca08b91ff63:/mediapipe# bash ./setup_android_sdk_and_ndk + root@bca08b91ff63:/mediapipe# bash ./setup_android_sdk_and_ndk.sh # Should print: # Android NDK is now installed. Consider setting $ANDROID_NDK_HOME environment variable to be /root/Android/Sdk/ndk-bundle/android-ndk-r18b diff --git a/mediapipe/docs/mediapipe_ios_setup.md b/mediapipe/docs/mediapipe_ios_setup.md index 1ac531c8c6..533e075d63 100644 --- a/mediapipe/docs/mediapipe_ios_setup.md +++ b/mediapipe/docs/mediapipe_ios_setup.md @@ -7,7 +7,8 @@ 2. Install [Bazel](https://bazel.build/). - See their [instructions](https://docs.bazel.build/versions/master/install-os-x.html). + See their + [instructions](https://docs.bazel.build/versions/master/install-os-x.html). We recommend using [Homebrew](https://brew.sh/): ```bash @@ -15,13 +16,23 @@ brew install bazelbuild/tap/bazel ``` -3. Clone the MediaPipe repository. +3. Install python "future" and "six". + + To make Mediapipe work with TensorFlow, please install the python "future" + library and the python "six" library: + + ```bash + pip install --user future six + ``` + +4. Clone the MediaPipe repository. ```bash git clone https://github.com/google/mediapipe.git ``` -4. Symlink or copy your provisioning profile to `mediapipe/mediapipe/provisioning_profile.mobileprovision`. +5. Symlink or copy your provisioning profile to + `mediapipe/mediapipe/provisioning_profile.mobileprovision`. ```bash cd mediapipe diff --git a/mediapipe/docs/multi_hand_tracking_mobile_gpu.md b/mediapipe/docs/multi_hand_tracking_mobile_gpu.md index 53a09d96e1..111cd894d5 100644 --- a/mediapipe/docs/multi_hand_tracking_mobile_gpu.md +++ b/mediapipe/docs/multi_hand_tracking_mobile_gpu.md @@ -560,7 +560,7 @@ node { # BATCH_END timestamp, outputs the vector of landmarks at the BATCH_END # timestamp. node { - calculator: "EndLoopNormalizedLandmarksVectorCalculator" + calculator: "EndLoopNormalizedLandmarkListVectorCalculator" input_stream: "ITEM:single_hand_landmarks" input_stream: "BATCH_END:single_hand_rect_timestamp" output_stream: "ITERABLE:multi_hand_landmarks" @@ -580,7 +580,7 @@ node { # hand. If the hand presence for hand #i is false, the set of landmarks # corresponding to that hand are dropped from the vector. node { - calculator: "FilterLandmarksCollectionCalculator" + calculator: "FilterLandmarkListCollectionCalculator" input_stream: "ITERABLE:multi_hand_landmarks" input_stream: "CONDITION:multi_hand_presence" output_stream: "ITERABLE:filtered_multi_hand_landmarks" @@ -669,7 +669,7 @@ node { # timestamp for downstream calculators to inform them that all elements in the # vector have been processed. node { - calculator: "BeginLoopNormalizedLandmarksVectorCalculator" + calculator: "BeginLoopNormalizedLandmarkListVectorCalculator" input_stream: "ITERABLE:multi_hand_landmarks" output_stream: "ITEM:single_hand_landmarks" output_stream: "BATCH_END:landmark_timestamp" diff --git a/mediapipe/examples/android/src/java/com/google/mediapipe/apps/handtrackinggpu/BUILD b/mediapipe/examples/android/src/java/com/google/mediapipe/apps/handtrackinggpu/BUILD index 9dd6b475d2..9846a9c45e 100644 --- a/mediapipe/examples/android/src/java/com/google/mediapipe/apps/handtrackinggpu/BUILD +++ b/mediapipe/examples/android/src/java/com/google/mediapipe/apps/handtrackinggpu/BUILD @@ -75,6 +75,7 @@ android_library( resource_files = glob(["res/**"]), deps = [ ":mediapipe_jni_lib", + "//mediapipe/framework/formats:landmark_java_proto_lite", "//mediapipe/java/com/google/mediapipe/components:android_camerax_helper", "//mediapipe/java/com/google/mediapipe/components:android_components", "//mediapipe/java/com/google/mediapipe/framework:android_framework", diff --git a/mediapipe/examples/android/src/java/com/google/mediapipe/apps/handtrackinggpu/MainActivity.java b/mediapipe/examples/android/src/java/com/google/mediapipe/apps/handtrackinggpu/MainActivity.java index a907ea4e5c..a5fa8c6743 100644 --- a/mediapipe/examples/android/src/java/com/google/mediapipe/apps/handtrackinggpu/MainActivity.java +++ b/mediapipe/examples/android/src/java/com/google/mediapipe/apps/handtrackinggpu/MainActivity.java @@ -17,18 +17,23 @@ import android.graphics.SurfaceTexture; import android.os.Bundle; import androidx.appcompat.app.AppCompatActivity; +import android.util.Log; import android.util.Size; import android.view.SurfaceHolder; import android.view.SurfaceView; import android.view.View; import android.view.ViewGroup; +import com.google.mediapipe.formats.proto.LandmarkProto.NormalizedLandmark; +import com.google.mediapipe.formats.proto.LandmarkProto.NormalizedLandmarkList; import com.google.mediapipe.components.CameraHelper; import com.google.mediapipe.components.CameraXPreviewHelper; import com.google.mediapipe.components.ExternalTextureConverter; import com.google.mediapipe.components.FrameProcessor; import com.google.mediapipe.components.PermissionHelper; import com.google.mediapipe.framework.AndroidAssetUtil; +import com.google.mediapipe.framework.PacketGetter; import com.google.mediapipe.glutil.EglManager; +import com.google.protobuf.InvalidProtocolBufferException; /** Main activity of MediaPipe example apps. */ public class MainActivity extends AppCompatActivity { @@ -37,6 +42,8 @@ public class MainActivity extends AppCompatActivity { private static final String BINARY_GRAPH_NAME = "handtrackinggpu.binarypb"; private static final String INPUT_VIDEO_STREAM_NAME = "input_video"; private static final String OUTPUT_VIDEO_STREAM_NAME = "output_video"; + private static final String OUTPUT_HAND_PRESENCE_STREAM_NAME = "hand_presence"; + private static final String OUTPUT_LANDMARKS_STREAM_NAME = "hand_landmarks"; private static final CameraHelper.CameraFacing CAMERA_FACING = CameraHelper.CameraFacing.FRONT; // Flips the camera-preview frames vertically before sending them into FrameProcessor to be @@ -90,6 +97,41 @@ protected void onCreate(Bundle savedInstanceState) { OUTPUT_VIDEO_STREAM_NAME); processor.getVideoSurfaceOutput().setFlipY(FLIP_FRAMES_VERTICALLY); + processor.addPacketCallback( + OUTPUT_HAND_PRESENCE_STREAM_NAME, + (packet) -> { + Boolean handPresence = PacketGetter.getBool(packet); + if (!handPresence) { + Log.d( + TAG, + "[TS:" + packet.getTimestamp() + "] Hand presence is false, no hands detected."); + } + }); + + processor.addPacketCallback( + OUTPUT_LANDMARKS_STREAM_NAME, + (packet) -> { + byte[] landmarksRaw = PacketGetter.getProtoBytes(packet); + try { + NormalizedLandmarkList landmarks = NormalizedLandmarkList.parseFrom(landmarksRaw); + if (landmarks == null) { + Log.d(TAG, "[TS:" + packet.getTimestamp() + "] No hand landmarks."); + return; + } + // Note: If hand_presence is false, these landmarks are useless. + Log.d( + TAG, + "[TS:" + + packet.getTimestamp() + + "] #Landmarks for hand: " + + landmarks.getLandmarkCount()); + Log.d(TAG, getLandmarksDebugString(landmarks)); + } catch (InvalidProtocolBufferException e) { + Log.e(TAG, "Couldn't Exception received - " + e); + return; + } + }); + PermissionHelper.checkAndRequestCameraPermissions(this); } @@ -164,4 +206,23 @@ private void startCamera() { }); cameraHelper.startCamera(this, CAMERA_FACING, /*surfaceTexture=*/ null); } + + private static String getLandmarksDebugString(NormalizedLandmarkList landmarks) { + int landmarkIndex = 0; + String landmarksString = ""; + for (NormalizedLandmark landmark : landmarks.getLandmarkList()) { + landmarksString += + "\t\tLandmark[" + + landmarkIndex + + "]: (" + + landmark.getX() + + ", " + + landmark.getY() + + ", " + + landmark.getZ() + + ")\n"; + ++landmarkIndex; + } + return landmarksString; + } } diff --git a/mediapipe/examples/android/src/java/com/google/mediapipe/apps/multihandtrackinggpu/BUILD b/mediapipe/examples/android/src/java/com/google/mediapipe/apps/multihandtrackinggpu/BUILD index 61c2065dd1..80beaf37b6 100644 --- a/mediapipe/examples/android/src/java/com/google/mediapipe/apps/multihandtrackinggpu/BUILD +++ b/mediapipe/examples/android/src/java/com/google/mediapipe/apps/multihandtrackinggpu/BUILD @@ -75,6 +75,7 @@ android_library( resource_files = glob(["res/**"]), deps = [ ":mediapipe_jni_lib", + "//mediapipe/framework/formats:landmark_java_proto_lite", "//mediapipe/java/com/google/mediapipe/components:android_camerax_helper", "//mediapipe/java/com/google/mediapipe/components:android_components", "//mediapipe/java/com/google/mediapipe/framework:android_framework", diff --git a/mediapipe/examples/android/src/java/com/google/mediapipe/apps/multihandtrackinggpu/MainActivity.java b/mediapipe/examples/android/src/java/com/google/mediapipe/apps/multihandtrackinggpu/MainActivity.java index 7e541ea458..f7fe06c821 100644 --- a/mediapipe/examples/android/src/java/com/google/mediapipe/apps/multihandtrackinggpu/MainActivity.java +++ b/mediapipe/examples/android/src/java/com/google/mediapipe/apps/multihandtrackinggpu/MainActivity.java @@ -17,18 +17,23 @@ import android.graphics.SurfaceTexture; import android.os.Bundle; import androidx.appcompat.app.AppCompatActivity; +import android.util.Log; import android.util.Size; import android.view.SurfaceHolder; import android.view.SurfaceView; import android.view.View; import android.view.ViewGroup; +import com.google.mediapipe.formats.proto.LandmarkProto.NormalizedLandmark; +import com.google.mediapipe.formats.proto.LandmarkProto.NormalizedLandmarkList; import com.google.mediapipe.components.CameraHelper; import com.google.mediapipe.components.CameraXPreviewHelper; import com.google.mediapipe.components.ExternalTextureConverter; import com.google.mediapipe.components.FrameProcessor; import com.google.mediapipe.components.PermissionHelper; import com.google.mediapipe.framework.AndroidAssetUtil; +import com.google.mediapipe.framework.PacketGetter; import com.google.mediapipe.glutil.EglManager; +import java.util.List; /** Main activity of MediaPipe example apps. */ public class MainActivity extends AppCompatActivity { @@ -37,6 +42,7 @@ public class MainActivity extends AppCompatActivity { private static final String BINARY_GRAPH_NAME = "multihandtrackinggpu.binarypb"; private static final String INPUT_VIDEO_STREAM_NAME = "input_video"; private static final String OUTPUT_VIDEO_STREAM_NAME = "output_video"; + private static final String OUTPUT_LANDMARKS_STREAM_NAME = "multi_hand_landmarks"; private static final CameraHelper.CameraFacing CAMERA_FACING = CameraHelper.CameraFacing.FRONT; // Flips the camera-preview frames vertically before sending them into FrameProcessor to be @@ -90,6 +96,20 @@ protected void onCreate(Bundle savedInstanceState) { OUTPUT_VIDEO_STREAM_NAME); processor.getVideoSurfaceOutput().setFlipY(FLIP_FRAMES_VERTICALLY); + processor.addPacketCallback( + OUTPUT_LANDMARKS_STREAM_NAME, + (packet) -> { + Log.d(TAG, "Received multi-hand landmarks packet."); + List multiHandLandmarks = + PacketGetter.getProtoVector(packet, NormalizedLandmarkList.parser()); + Log.d( + TAG, + "[TS:" + + packet.getTimestamp() + + "] " + + getMultiHandLandmarksDebugString(multiHandLandmarks)); + }); + PermissionHelper.checkAndRequestCameraPermissions(this); } @@ -164,4 +184,32 @@ private void startCamera() { }); cameraHelper.startCamera(this, CAMERA_FACING, /*surfaceTexture=*/ null); } + + private String getMultiHandLandmarksDebugString(List multiHandLandmarks) { + if (multiHandLandmarks.isEmpty()) { + return "No hand landmarks"; + } + String multiHandLandmarksStr = "Number of hands detected: " + multiHandLandmarks.size() + "\n"; + int handIndex = 0; + for (NormalizedLandmarkList landmarks : multiHandLandmarks) { + multiHandLandmarksStr += + "\t#Hand landmarks for hand[" + handIndex + "]: " + landmarks.getLandmarkCount() + "\n"; + int landmarkIndex = 0; + for (NormalizedLandmark landmark : landmarks.getLandmarkList()) { + multiHandLandmarksStr += + "\t\tLandmark [" + + landmarkIndex + + "]: (" + + landmark.getX() + + ", " + + landmark.getY() + + ", " + + landmark.getZ() + + ")\n"; + ++landmarkIndex; + } + ++handIndex; + } + return multiHandLandmarksStr; + } } diff --git a/mediapipe/examples/desktop/demo_run_graph_main.cc b/mediapipe/examples/desktop/demo_run_graph_main.cc index 14136560cc..d650cec53a 100644 --- a/mediapipe/examples/desktop/demo_run_graph_main.cc +++ b/mediapipe/examples/desktop/demo_run_graph_main.cc @@ -76,6 +76,11 @@ ::mediapipe::Status RunMPPGraph() { RET_CHECK(writer.isOpened()); } else { cv::namedWindow(kWindowName, /*flags=WINDOW_AUTOSIZE*/ 1); +#if (CV_MAJOR_VERSION >= 3) && (CV_MINOR_VERSION >= 2) + capture.set(cv::CAP_PROP_FRAME_WIDTH, 640); + capture.set(cv::CAP_PROP_FRAME_HEIGHT, 480); + capture.set(cv::CAP_PROP_FPS, 30); +#endif } LOG(INFO) << "Start running the calculator graph."; diff --git a/mediapipe/examples/desktop/demo_run_graph_main_gpu.cc b/mediapipe/examples/desktop/demo_run_graph_main_gpu.cc index 4bf8cf97a6..687a704ebe 100644 --- a/mediapipe/examples/desktop/demo_run_graph_main_gpu.cc +++ b/mediapipe/examples/desktop/demo_run_graph_main_gpu.cc @@ -86,6 +86,11 @@ ::mediapipe::Status RunMPPGraph() { RET_CHECK(writer.isOpened()); } else { cv::namedWindow(kWindowName, /*flags=WINDOW_AUTOSIZE*/ 1); +#if (CV_MAJOR_VERSION >= 3) && (CV_MINOR_VERSION >= 2) + capture.set(cv::CAP_PROP_FRAME_WIDTH, 640); + capture.set(cv::CAP_PROP_FRAME_HEIGHT, 480); + capture.set(cv::CAP_PROP_FPS, 30); +#endif } LOG(INFO) << "Start running the calculator graph."; diff --git a/mediapipe/examples/ios/handtrackinggpu/BUILD b/mediapipe/examples/ios/handtrackinggpu/BUILD index f84008fc1a..7481db2213 100644 --- a/mediapipe/examples/ios/handtrackinggpu/BUILD +++ b/mediapipe/examples/ios/handtrackinggpu/BUILD @@ -12,15 +12,15 @@ # See the License for the specific language governing permissions and # limitations under the License. -licenses(["notice"]) # Apache 2.0 - -MIN_IOS_VERSION = "10.0" - load( "@build_bazel_rules_apple//apple:ios.bzl", "ios_application", ) +licenses(["notice"]) # Apache 2.0 + +MIN_IOS_VERSION = "10.0" + # To use the 3D model instead of the default 2D model, add "--define 3D=true" to the # bazel build command. config_setting( @@ -90,6 +90,7 @@ objc_library( "//mediapipe:ios_x86_64": [], "//conditions:default": [ "//mediapipe/graphs/hand_tracking:mobile_calculators", + "//mediapipe/framework/formats:landmark_cc_proto", ], }), ) diff --git a/mediapipe/examples/ios/handtrackinggpu/ViewController.mm b/mediapipe/examples/ios/handtrackinggpu/ViewController.mm index ca587ed883..a15de9d435 100644 --- a/mediapipe/examples/ios/handtrackinggpu/ViewController.mm +++ b/mediapipe/examples/ios/handtrackinggpu/ViewController.mm @@ -18,10 +18,13 @@ #import "mediapipe/objc/MPPCameraInputSource.h" #import "mediapipe/objc/MPPLayerRenderer.h" +#include "mediapipe/framework/formats/landmark.pb.h" + static NSString* const kGraphName = @"hand_tracking_mobile_gpu"; static const char* kInputStream = "input_video"; static const char* kOutputStream = "output_video"; +static const char* kLandmarksOutputStream = "hand_landmarks"; static const char* kVideoQueueLabel = "com.google.mediapipe.example.videoQueue"; @interface ViewController () @@ -80,6 +83,7 @@ + (MPPGraph*)loadGraphFromResource:(NSString*)resource { // Create MediaPipe graph with mediapipe::CalculatorGraphConfig proto object. MPPGraph* newGraph = [[MPPGraph alloc] initWithGraphConfig:config]; [newGraph addFrameOutputStream:kOutputStream outputPacketType:MPPPacketTypePixelBuffer]; + [newGraph addFrameOutputStream:kLandmarksOutputStream outputPacketType:MPPPacketTypeRaw]; return newGraph; } @@ -160,6 +164,25 @@ - (void)mediapipeGraph:(MPPGraph*)graph } } +// Receives a raw packet from the MediaPipe graph. Invoked on a MediaPipe worker thread. +- (void)mediapipeGraph:(MPPGraph*)graph + didOutputPacket:(const ::mediapipe::Packet&)packet + fromStream:(const std::string&)streamName { + if (streamName == kLandmarksOutputStream) { + if (packet.IsEmpty()) { + NSLog(@"[TS:%lld] No hand landmarks", packet.Timestamp().Value()); + return; + } + const auto& landmarks = packet.Get<::mediapipe::NormalizedLandmarkList>(); + NSLog(@"[TS:%lld] Number of landmarks on hand: %d", packet.Timestamp().Value(), + landmarks.landmark_size()); + for (int i = 0; i < landmarks.landmark_size(); ++i) { + NSLog(@"\tLandmark[%d]: (%f, %f, %f)", i, landmarks.landmark(i).x(), + landmarks.landmark(i).y(), landmarks.landmark(i).z()); + } + } +} + #pragma mark - MPPInputSourceDelegate methods // Must be invoked on _videoQueue. diff --git a/mediapipe/examples/ios/multihandtrackinggpu/BUILD b/mediapipe/examples/ios/multihandtrackinggpu/BUILD index edfd5bb548..cda589e2d0 100644 --- a/mediapipe/examples/ios/multihandtrackinggpu/BUILD +++ b/mediapipe/examples/ios/multihandtrackinggpu/BUILD @@ -12,15 +12,15 @@ # See the License for the specific language governing permissions and # limitations under the License. -licenses(["notice"]) # Apache 2.0 - -MIN_IOS_VERSION = "10.0" - load( "@build_bazel_rules_apple//apple:ios.bzl", "ios_application", ) +licenses(["notice"]) # Apache 2.0 + +MIN_IOS_VERSION = "10.0" + # To use the 3D model instead of the default 2D model, add "--define 3D=true" to the # bazel build command. config_setting( @@ -90,6 +90,7 @@ objc_library( "//mediapipe:ios_x86_64": [], "//conditions:default": [ "//mediapipe/graphs/hand_tracking:multi_hand_mobile_calculators", + "//mediapipe/framework/formats:landmark_cc_proto", ], }), ) diff --git a/mediapipe/examples/ios/multihandtrackinggpu/ViewController.mm b/mediapipe/examples/ios/multihandtrackinggpu/ViewController.mm index 2d7c5d7a5f..66a3c9aff8 100644 --- a/mediapipe/examples/ios/multihandtrackinggpu/ViewController.mm +++ b/mediapipe/examples/ios/multihandtrackinggpu/ViewController.mm @@ -18,10 +18,13 @@ #import "mediapipe/objc/MPPCameraInputSource.h" #import "mediapipe/objc/MPPLayerRenderer.h" +#include "mediapipe/framework/formats/landmark.pb.h" + static NSString* const kGraphName = @"multi_hand_tracking_mobile_gpu"; static const char* kInputStream = "input_video"; static const char* kOutputStream = "output_video"; +static const char* kLandmarksOutputStream = "multi_hand_landmarks"; static const char* kVideoQueueLabel = "com.google.mediapipe.example.videoQueue"; @interface ViewController () @@ -80,6 +83,7 @@ + (MPPGraph*)loadGraphFromResource:(NSString*)resource { // Create MediaPipe graph with mediapipe::CalculatorGraphConfig proto object. MPPGraph* newGraph = [[MPPGraph alloc] initWithGraphConfig:config]; [newGraph addFrameOutputStream:kOutputStream outputPacketType:MPPPacketTypePixelBuffer]; + [newGraph addFrameOutputStream:kLandmarksOutputStream outputPacketType:MPPPacketTypeRaw]; return newGraph; } @@ -160,6 +164,29 @@ - (void)mediapipeGraph:(MPPGraph*)graph } } +// Receives a raw packet from the MediaPipe graph. Invoked on a MediaPipe worker thread. +- (void)mediapipeGraph:(MPPGraph*)graph + didOutputPacket:(const ::mediapipe::Packet&)packet + fromStream:(const std::string&)streamName { + if (streamName == kLandmarksOutputStream) { + if (packet.IsEmpty()) { + NSLog(@"[TS:%lld] No hand landmarks", packet.Timestamp().Value()); + return; + } + const auto& multi_hand_landmarks = packet.Get>(); + NSLog(@"[TS:%lld] Number of hand instances with landmarks: %lu", packet.Timestamp().Value(), + multi_hand_landmarks.size()); + for (int hand_index = 0; hand_index < multi_hand_landmarks.size(); ++hand_index) { + const auto& landmarks = multi_hand_landmarks[hand_index]; + NSLog(@"\tNumber of landmarks for hand[%d]: %d", hand_index, landmarks.landmark_size()); + for (int i = 0; i < landmarks.landmark_size(); ++i) { + NSLog(@"\t\tLandmark[%d]: (%f, %f, %f)", i, landmarks.landmark(i).x(), + landmarks.landmark(i).y(), landmarks.landmark(i).z()); + } + } + } +} + #pragma mark - MPPInputSourceDelegate methods // Must be invoked on _videoQueue. diff --git a/mediapipe/framework/calculator_graph.cc b/mediapipe/framework/calculator_graph.cc index 32a790addf..9bd6ac6faa 100644 --- a/mediapipe/framework/calculator_graph.cc +++ b/mediapipe/framework/calculator_graph.cc @@ -1094,6 +1094,19 @@ bool CalculatorGraph::IsNodeThrottled(int node_id) { return max_queue_size_ != -1 && !full_input_streams_[node_id].empty(); } +// Returns true if an input stream serves as a graph-output-stream. +bool IsGraphOutputStream( + InputStreamManager* stream, + const std::vector>& + graph_output_streams) { + for (auto& graph_output_stream : graph_output_streams) { + if (stream == graph_output_stream->input_stream()) { + return true; + } + } + return false; +} + bool CalculatorGraph::UnthrottleSources() { // NOTE: We can be sure that this function will grow input streams enough // to unthrottle at least one source node. The current stream queue sizes @@ -1105,25 +1118,17 @@ bool CalculatorGraph::UnthrottleSources() { { absl::MutexLock lock(&full_input_streams_mutex_); for (absl::flat_hash_set& s : full_input_streams_) { - if (!s.empty()) { - full_streams.insert(s.begin(), s.end()); + for (auto& stream : s) { + // The queue size of a graph output stream shouldn't change. Throttling + // should continue until the caller of the graph output stream consumes + // enough packets. + if (!IsGraphOutputStream(stream, graph_output_streams_)) { + full_streams.insert(stream); + } } } } for (InputStreamManager* stream : full_streams) { - // The queue size of a graph output stream shouldn't change. Throttling - // should continue until the caller of the graph output stream consumes - // enough packets. - bool is_graph_output_stream = false; - for (auto& graph_output_stream : graph_output_streams_) { - if (stream == graph_output_stream->input_stream()) { - is_graph_output_stream = true; - break; - } - } - if (is_graph_output_stream) { - continue; - } if (Config().report_deadlock()) { RecordError(::mediapipe::UnavailableError(absl::StrCat( "Detected a deadlock due to input throttling for: \"", stream->Name(), diff --git a/mediapipe/framework/formats/BUILD b/mediapipe/framework/formats/BUILD index 231fd321c1..f831bb3fd0 100644 --- a/mediapipe/framework/formats/BUILD +++ b/mediapipe/framework/formats/BUILD @@ -13,10 +13,6 @@ # limitations under the License. # -licenses(["notice"]) # Apache 2.0 - -exports_files(["LICENSE"]) - load("//mediapipe/framework/port:build_config.bzl", "mediapipe_cc_proto_library") package( @@ -24,6 +20,10 @@ package( features = ["-layering_check"], ) +licenses(["notice"]) # Apache 2.0 + +exports_files(["LICENSE"]) + proto_library( name = "detection_proto", srcs = ["detection.proto"], @@ -265,3 +265,17 @@ mediapipe_cc_proto_library( visibility = ["//visibility:public"], deps = [":landmark_proto"], ) + +java_lite_proto_library( + name = "landmark_java_proto_lite", + strict_deps = 0, + visibility = ["//mediapipe:__subpackages__"], + deps = [":landmark_proto"], +) + +# Expose the proto source files for building mediapipe AAR. +filegroup( + name = "protos_src", + srcs = glob(["*.proto"]), + visibility = ["//mediapipe:__subpackages__"], +) diff --git a/mediapipe/framework/formats/annotation/rasterization.proto b/mediapipe/framework/formats/annotation/rasterization.proto index 728c593c0c..9aad7e88f2 100644 --- a/mediapipe/framework/formats/annotation/rasterization.proto +++ b/mediapipe/framework/formats/annotation/rasterization.proto @@ -27,7 +27,7 @@ message Rasterization { required int32 right_x = 3; } - // Intervals are always sorted by y-corrdinate. + // Intervals are always sorted by y-coordinate. // Therefore, a region occupies a set of scanlines ranging // from interval(0).y() to interval(interval_size() - 1)).y(). // Note: In video, at some scanlines no interval might be present. diff --git a/mediapipe/framework/formats/landmark.proto b/mediapipe/framework/formats/landmark.proto index cfafb793a0..708a34000e 100644 --- a/mediapipe/framework/formats/landmark.proto +++ b/mediapipe/framework/formats/landmark.proto @@ -16,6 +16,9 @@ syntax = "proto2"; package mediapipe; +option java_package = "com.google.mediapipe.formats.proto"; +option java_outer_classname = "LandmarkProto"; + // A landmark that can have 1 to 3 dimensions. Use x for 1D points, (x, y) for // 2D points and (x, y, z) for 3D points. For more dimensions, consider using // matrix_data.proto. diff --git a/mediapipe/framework/packet.cc b/mediapipe/framework/packet.cc index 274e5ad236..f19f8ca6ff 100644 --- a/mediapipe/framework/packet.cc +++ b/mediapipe/framework/packet.cc @@ -107,6 +107,14 @@ const proto_ns::MessageLite& Packet::GetProtoMessageLite() const { return *proto; } +StatusOr> +Packet::GetVectorOfProtoMessageLitePtrs() { + if (holder_ == nullptr) { + return ::mediapipe::InternalError("Packet is empty."); + } + return holder_->GetVectorOfProtoMessageLite(); +} + MEDIAPIPE_REGISTER_TYPE(::mediapipe::Packet, "::mediapipe::Packet", nullptr, nullptr); MEDIAPIPE_REGISTER_TYPE(::std::vector<::mediapipe::Packet>, diff --git a/mediapipe/framework/packet.h b/mediapipe/framework/packet.h index 11cfb5cc09..9e1946eaab 100644 --- a/mediapipe/framework/packet.h +++ b/mediapipe/framework/packet.h @@ -163,6 +163,13 @@ class Packet { // object type is protocol buffer, crashes otherwise. const proto_ns::MessageLite& GetProtoMessageLite() const; + // Returns a vector of pointers to MessageLite data, if the underlying + // object type is a vector of MessageLite data, returns an error otherwise. + // Note: This function is meant to be used internally within the MediaPipe + // framework only. + StatusOr> + GetVectorOfProtoMessageLitePtrs(); + // Returns an error if the packet does not contain data of type T. template ::mediapipe::Status ValidateAsType() const; @@ -347,6 +354,12 @@ class HolderBase { // underlying object is protocol buffer type, otherwise, nullptr is returned. virtual const proto_ns::MessageLite* GetProtoMessageLite() = 0; + // Returns a vector for the data in the holder, if the + // underlying object is a vector of protocol buffer objects, otherwise, + // returns an error. + virtual StatusOr> + GetVectorOfProtoMessageLite() = 0; + private: size_t type_id_; }; @@ -364,6 +377,37 @@ const proto_ns::MessageLite* ConvertToProtoMessageLite(const T* data, return data; } +// Helper structs for determining if a type is an std::vector. +template +struct is_proto_vector : public std::false_type {}; + +template +struct is_proto_vector> + : public std::is_base_of::type {}; + +// Helper function to create and return a vector of pointers to proto message +// elements of the vector passed into the function. +template +StatusOr> +ConvertToVectorOfProtoMessageLitePtrs(const T* data, + /*is_proto_vector=*/std::false_type) { + return ::mediapipe::InvalidArgumentError(absl::StrCat( + "The Packet stores \"", typeid(T).name(), "\"", + "which is not convertible to vector.")); +} + +template +StatusOr> +ConvertToVectorOfProtoMessageLitePtrs(const T* data, + /*is_proto_vector=*/std::true_type) { + std::vector result; + for (auto it = data->begin(); it != data->end(); ++it) { + const proto_ns::MessageLite* element = &(*it); + result.push_back(element); + } + return result; +} + template class Holder : public HolderBase { public: @@ -421,6 +465,14 @@ class Holder : public HolderBase { ptr_, std::is_base_of()); } + // Returns a vector for the data in the holder, if the + // underlying object is a vector of protocol buffer objects, otherwise, + // returns an error. + StatusOr> + GetVectorOfProtoMessageLite() override { + return ConvertToVectorOfProtoMessageLitePtrs(ptr_, is_proto_vector()); + } + private: // Call delete[] if T is an array, delete otherwise. template diff --git a/mediapipe/framework/port.h b/mediapipe/framework/port.h index 275f8ca980..a1aeae8935 100644 --- a/mediapipe/framework/port.h +++ b/mediapipe/framework/port.h @@ -37,12 +37,16 @@ #if !defined(MEDIAPIPE_IOS) && !TARGET_OS_OSX #define MEDIAPIPE_IOS #endif +#if !defined(MEDIAPIPE_OSX) && TARGET_OS_OSX +#define MEDIAPIPE_OSX +#endif #endif // These platforms do not support OpenGL ES Compute Shaders (v3.1 and up), -// but can still run OpenGL ES 3.0 and below. -#if !defined(MEDIAPIPE_DISABLE_GL_COMPUTE) && \ - (defined(__APPLE__) || defined(__EMSCRIPTEN__)) +// but may or may not still be able to run other OpenGL code. +#if !defined(MEDIAPIPE_DISABLE_GL_COMPUTE) && \ + (defined(__APPLE__) || defined(__EMSCRIPTEN__) || \ + defined(MEDIAPIPE_DISABLE_GPU)) #define MEDIAPIPE_DISABLE_GL_COMPUTE #endif diff --git a/mediapipe/framework/port/opencv_core_inc.h b/mediapipe/framework/port/opencv_core_inc.h index 653d8c6771..a358ad90b3 100644 --- a/mediapipe/framework/port/opencv_core_inc.h +++ b/mediapipe/framework/port/opencv_core_inc.h @@ -20,6 +20,8 @@ #ifdef CV_VERSION_EPOCH // for OpenCV 2.x #include #else +#include + #include #endif diff --git a/mediapipe/framework/scheduler.cc b/mediapipe/framework/scheduler.cc index ce76f6530d..c8263ec0dd 100644 --- a/mediapipe/framework/scheduler.cc +++ b/mediapipe/framework/scheduler.cc @@ -158,9 +158,11 @@ void Scheduler::HandleIdle() { if (!active_sources_.empty() || throttled_graph_input_stream_count_ > 0) { VLOG(2) << "HandleIdle: unthrottling"; state_mutex_.Unlock(); - graph_->UnthrottleSources(); + bool did_unthrottle = graph_->UnthrottleSources(); state_mutex_.Lock(); - continue; + if (did_unthrottle) { + continue; + } } // Nothing left to do. diff --git a/mediapipe/framework/tool/BUILD b/mediapipe/framework/tool/BUILD index 61ec92e1e9..a75cda32c0 100644 --- a/mediapipe/framework/tool/BUILD +++ b/mediapipe/framework/tool/BUILD @@ -13,10 +13,6 @@ # limitations under the License. # -licenses(["notice"]) # Apache 2.0 - -package(default_visibility = ["//visibility:private"]) - load( "//mediapipe/framework/port:build_config.bzl", "mediapipe_cc_proto_library", @@ -27,6 +23,10 @@ load( "mediapipe_binary_graph", ) +licenses(["notice"]) # Apache 2.0 + +package(default_visibility = ["//visibility:private"]) + exports_files([ "simple_subgraph_template.cc", ]) diff --git a/mediapipe/graphs/hand_tracking/subgraphs/hand_landmark_cpu.pbtxt b/mediapipe/graphs/hand_tracking/subgraphs/hand_landmark_cpu.pbtxt index ad52a5716c..8865ea22c3 100644 --- a/mediapipe/graphs/hand_tracking/subgraphs/hand_landmark_cpu.pbtxt +++ b/mediapipe/graphs/hand_tracking/subgraphs/hand_landmark_cpu.pbtxt @@ -101,7 +101,7 @@ node { } } -# Decodes the landmark tensors into a vector of lanmarks, where the landmark +# Decodes the landmark tensors into a list of landmarks, where the landmark # coordinates are normalized by the size of the input image to the model. node { calculator: "TfLiteTensorsToLandmarksCalculator" diff --git a/mediapipe/graphs/hand_tracking/subgraphs/hand_landmark_gpu.pbtxt b/mediapipe/graphs/hand_tracking/subgraphs/hand_landmark_gpu.pbtxt index 283ce459cb..2294634544 100644 --- a/mediapipe/graphs/hand_tracking/subgraphs/hand_landmark_gpu.pbtxt +++ b/mediapipe/graphs/hand_tracking/subgraphs/hand_landmark_gpu.pbtxt @@ -96,7 +96,7 @@ node { } } -# Decodes the landmark tensors into a vector of lanmarks, where the landmark +# Decodes the landmark tensors into a list of landmarks, where the landmark # coordinates are normalized by the size of the input image to the model. node { calculator: "TfLiteTensorsToLandmarksCalculator" diff --git a/mediapipe/graphs/hand_tracking/subgraphs/multi_hand_landmark.pbtxt b/mediapipe/graphs/hand_tracking/subgraphs/multi_hand_landmark.pbtxt index a380966ca7..08b283a80e 100644 --- a/mediapipe/graphs/hand_tracking/subgraphs/multi_hand_landmark.pbtxt +++ b/mediapipe/graphs/hand_tracking/subgraphs/multi_hand_landmark.pbtxt @@ -47,7 +47,7 @@ node { # BATCH_END timestamp, outputs the vector of landmarks at the BATCH_END # timestamp. node { - calculator: "EndLoopNormalizedLandmarksVectorCalculator" + calculator: "EndLoopNormalizedLandmarkListVectorCalculator" input_stream: "ITEM:single_hand_landmarks" input_stream: "BATCH_END:single_hand_rect_timestamp" output_stream: "ITERABLE:multi_hand_landmarks" @@ -67,7 +67,7 @@ node { # hand. If the hand presence for hand #i is false, the set of landmarks # corresponding to that hand are dropped from the vector. node { - calculator: "FilterLandmarksCollectionCalculator" + calculator: "FilterLandmarkListCollectionCalculator" input_stream: "ITERABLE:multi_hand_landmarks" input_stream: "CONDITION:multi_hand_presence" output_stream: "ITERABLE:filtered_multi_hand_landmarks" diff --git a/mediapipe/graphs/hand_tracking/subgraphs/multi_hand_renderer_cpu.pbtxt b/mediapipe/graphs/hand_tracking/subgraphs/multi_hand_renderer_cpu.pbtxt index 2dcd6b478d..8406712e93 100644 --- a/mediapipe/graphs/hand_tracking/subgraphs/multi_hand_renderer_cpu.pbtxt +++ b/mediapipe/graphs/hand_tracking/subgraphs/multi_hand_renderer_cpu.pbtxt @@ -59,7 +59,7 @@ node { # timestamp for downstream calculators to inform them that all elements in the # vector have been processed. node { - calculator: "BeginLoopNormalizedLandmarksVectorCalculator" + calculator: "BeginLoopNormalizedLandmarkListVectorCalculator" input_stream: "ITERABLE:multi_hand_landmarks" output_stream: "ITEM:single_hand_landmarks" output_stream: "BATCH_END:landmark_timestamp" diff --git a/mediapipe/graphs/hand_tracking/subgraphs/multi_hand_renderer_gpu.pbtxt b/mediapipe/graphs/hand_tracking/subgraphs/multi_hand_renderer_gpu.pbtxt index 3ea9275dc3..d7e300c021 100644 --- a/mediapipe/graphs/hand_tracking/subgraphs/multi_hand_renderer_gpu.pbtxt +++ b/mediapipe/graphs/hand_tracking/subgraphs/multi_hand_renderer_gpu.pbtxt @@ -59,7 +59,7 @@ node { # timestamp for downstream calculators to inform them that all elements in the # vector have been processed. node { - calculator: "BeginLoopNormalizedLandmarksVectorCalculator" + calculator: "BeginLoopNormalizedLandmarkListVectorCalculator" input_stream: "ITERABLE:multi_hand_landmarks" output_stream: "ITEM:single_hand_landmarks" output_stream: "BATCH_END:landmark_timestamp" diff --git a/mediapipe/java/com/google/mediapipe/components/FrameProcessor.java b/mediapipe/java/com/google/mediapipe/components/FrameProcessor.java index c63f0495a7..8c901606eb 100644 --- a/mediapipe/java/com/google/mediapipe/components/FrameProcessor.java +++ b/mediapipe/java/com/google/mediapipe/components/FrameProcessor.java @@ -148,6 +148,11 @@ public void setHybridPath() { hybridPath = true; } + /** Adds a callback to the graph to process packets from the specified output stream. */ + public void addPacketCallback(String outputStream, PacketCallback callback) { + mediapipeGraph.addPacketCallback(outputStream, callback); + } + public void addConsumer(TextureFrameConsumer listener) { synchronized (this) { List newConsumers = new ArrayList<>(consumers); diff --git a/mediapipe/java/com/google/mediapipe/framework/PacketGetter.java b/mediapipe/java/com/google/mediapipe/framework/PacketGetter.java index a1a05b175d..d87bc89454 100644 --- a/mediapipe/java/com/google/mediapipe/framework/PacketGetter.java +++ b/mediapipe/java/com/google/mediapipe/framework/PacketGetter.java @@ -14,7 +14,10 @@ package com.google.mediapipe.framework; +import com.google.common.base.Preconditions; import com.google.common.flogger.FluentLogger; +import com.google.protobuf.InvalidProtocolBufferException; +import com.google.protobuf.Parser; import java.nio.ByteBuffer; import java.util.ArrayList; import java.util.List; @@ -134,6 +137,22 @@ public static double[] getFloat64Vector(final Packet packet) { return nativeGetFloat64Vector(packet.getNativeHandle()); } + public static List getProtoVector(final Packet packet, Parser messageParser) { + byte[][] protoVector = nativeGetProtoVector(packet.getNativeHandle()); + Preconditions.checkNotNull( + protoVector, "Vector of protocol buffer objects should not be null!"); + try { + List parsedMessageList = new ArrayList<>(); + for (byte[] message : protoVector) { + T parsedMessage = messageParser.parseFrom(message); + parsedMessageList.add(parsedMessage); + } + return parsedMessageList; + } catch (InvalidProtocolBufferException e) { + throw new IllegalArgumentException(e); + } + } + public static int getImageWidth(final Packet packet) { return nativeGetImageWidth(packet.getNativeHandle()); } @@ -277,6 +296,9 @@ public static GraphTextureFrame getTextureFrame(final Packet packet) { private static native long[] nativeGetInt64Vector(long nativePacketHandle); private static native float[] nativeGetFloat32Vector(long nativePacketHandle); private static native double[] nativeGetFloat64Vector(long nativePacketHandle); + + private static native byte[][] nativeGetProtoVector(long nativePacketHandle); + private static native int nativeGetImageWidth(long nativePacketHandle); private static native int nativeGetImageHeight(long nativePacketHandle); private static native boolean nativeGetImageData(long nativePacketHandle, ByteBuffer buffer); diff --git a/mediapipe/java/com/google/mediapipe/framework/jni/BUILD b/mediapipe/java/com/google/mediapipe/framework/jni/BUILD index 182226cbbf..0e6e71815d 100644 --- a/mediapipe/java/com/google/mediapipe/framework/jni/BUILD +++ b/mediapipe/java/com/google/mediapipe/framework/jni/BUILD @@ -134,6 +134,7 @@ cc_library( deps = [ "@com_google_absl//absl/synchronization", "//mediapipe/framework/port:logging", + "//mediapipe/framework/port:status", ] + select({ "//conditions:default": [ ], diff --git a/mediapipe/java/com/google/mediapipe/framework/jni/graph_jni.cc b/mediapipe/java/com/google/mediapipe/framework/jni/graph_jni.cc index d968ff5d06..e53b972352 100644 --- a/mediapipe/java/com/google/mediapipe/framework/jni/graph_jni.cc +++ b/mediapipe/java/com/google/mediapipe/framework/jni/graph_jni.cc @@ -69,25 +69,11 @@ mediapipe::Status AddStreamHeadersIntoGraph( return mediapipe::OkStatus(); } -// Creates a java MediaPipeException object for a mediapipe::Status. -jthrowable CreateMediaPipeException(JNIEnv* env, mediapipe::Status status) { - jclass status_cls = - env->FindClass("com/google/mediapipe/framework/MediaPipeException"); - jmethodID status_ctr = env->GetMethodID(status_cls, "", "(I[B)V"); - int length = status.message().length(); - jbyteArray message_bytes = env->NewByteArray(length); - env->SetByteArrayRegion(message_bytes, 0, length, - reinterpret_cast(const_cast( - std::string(status.message()).c_str()))); - return reinterpret_cast( - env->NewObject(status_cls, status_ctr, status.code(), message_bytes)); -} - // Throws a MediaPipeException for any non-ok mediapipe::Status. // Note that the exception is thrown after execution returns to Java. bool ThrowIfError(JNIEnv* env, mediapipe::Status status) { if (!status.ok()) { - env->Throw(CreateMediaPipeException(env, status)); + env->Throw(mediapipe::android::CreateMediaPipeException(env, status)); return true; } return false; diff --git a/mediapipe/java/com/google/mediapipe/framework/jni/jni_util.cc b/mediapipe/java/com/google/mediapipe/framework/jni/jni_util.cc index d383a7b0b8..cb9453b6ff 100644 --- a/mediapipe/java/com/google/mediapipe/framework/jni/jni_util.cc +++ b/mediapipe/java/com/google/mediapipe/framework/jni/jni_util.cc @@ -110,6 +110,19 @@ std::string JStringToStdString(JNIEnv* env, jstring jstr) { return str; } +jthrowable CreateMediaPipeException(JNIEnv* env, mediapipe::Status status) { + jclass status_cls = + env->FindClass("com/google/mediapipe/framework/MediaPipeException"); + jmethodID status_ctr = env->GetMethodID(status_cls, "", "(I[B)V"); + int length = status.message().length(); + jbyteArray message_bytes = env->NewByteArray(length); + env->SetByteArrayRegion(message_bytes, 0, length, + reinterpret_cast(const_cast( + std::string(status.message()).c_str()))); + return reinterpret_cast( + env->NewObject(status_cls, status_ctr, status.code(), message_bytes)); +} + } // namespace android namespace java { diff --git a/mediapipe/java/com/google/mediapipe/framework/jni/jni_util.h b/mediapipe/java/com/google/mediapipe/framework/jni/jni_util.h index 81a44919d7..9efa283047 100644 --- a/mediapipe/java/com/google/mediapipe/framework/jni/jni_util.h +++ b/mediapipe/java/com/google/mediapipe/framework/jni/jni_util.h @@ -19,12 +19,17 @@ #include +#include "mediapipe/framework/port/status.h" + namespace mediapipe { namespace android { std::string JStringToStdString(JNIEnv* env, jstring jstr); +// Creates a java MediaPipeException object for a mediapipe::Status. +jthrowable CreateMediaPipeException(JNIEnv* env, mediapipe::Status status); + } // namespace android namespace java { diff --git a/mediapipe/java/com/google/mediapipe/framework/jni/packet_getter_jni.cc b/mediapipe/java/com/google/mediapipe/framework/jni/packet_getter_jni.cc index 9940d186e0..1cab1aca7e 100644 --- a/mediapipe/java/com/google/mediapipe/framework/jni/packet_getter_jni.cc +++ b/mediapipe/java/com/google/mediapipe/framework/jni/packet_getter_jni.cc @@ -19,8 +19,10 @@ #include "mediapipe/framework/formats/time_series_header.pb.h" #include "mediapipe/framework/formats/video_stream_header.h" #include "mediapipe/framework/port/core_proto_inc.h" +#include "mediapipe/framework/port/proto_ns.h" #include "mediapipe/java/com/google/mediapipe/framework/jni/colorspace.h" #include "mediapipe/java/com/google/mediapipe/framework/jni/graph.h" +#include "mediapipe/java/com/google/mediapipe/framework/jni/jni_util.h" #ifndef MEDIAPIPE_DISABLE_GPU #include "mediapipe/gpu/gl_calculator_helper.h" #endif // !defined(MEDIAPIPE_DISABLE_GPU) @@ -141,6 +143,37 @@ JNIEXPORT jbyteArray JNICALL PACKET_GETTER_METHOD(nativeGetProtoBytes)( return data; } +JNIEXPORT jobjectArray JNICALL PACKET_GETTER_METHOD(nativeGetProtoVector)( + JNIEnv* env, jobject thiz, jlong packet) { + mediapipe::Packet mediapipe_packet = + mediapipe::android::Graph::GetPacketFromHandle(packet); + auto get_proto_vector = mediapipe_packet.GetVectorOfProtoMessageLitePtrs(); + if (!get_proto_vector.ok()) { + env->Throw(mediapipe::android::CreateMediaPipeException( + env, get_proto_vector.status())); + } + const std::vector& proto_vector = + get_proto_vector.ValueOrDie(); + jobjectArray proto_array = + env->NewObjectArray(proto_vector.size(), env->FindClass("[B"), nullptr); + for (int i = 0; i < proto_vector.size(); ++i) { + const ::mediapipe::proto_ns::MessageLite* proto_message = proto_vector[i]; + + // Convert the proto object into a Java byte array. + std::string serialized; + proto_message->SerializeToString(&serialized); + jbyteArray byte_array = env->NewByteArray(serialized.size()); + env->SetByteArrayRegion(byte_array, 0, serialized.size(), + reinterpret_cast(serialized.c_str())); + + // Add the serialized proto byte_array to the output array. + env->SetObjectArrayElement(proto_array, i, byte_array); + env->DeleteLocalRef(byte_array); + } + + return proto_array; +} + JNIEXPORT jshortArray JNICALL PACKET_GETTER_METHOD(nativeGetInt16Vector)( JNIEnv* env, jobject thiz, jlong packet) { const std::vector& values = diff --git a/mediapipe/java/com/google/mediapipe/framework/jni/packet_getter_jni.h b/mediapipe/java/com/google/mediapipe/framework/jni/packet_getter_jni.h index cb35bac667..72c55935d1 100644 --- a/mediapipe/java/com/google/mediapipe/framework/jni/packet_getter_jni.h +++ b/mediapipe/java/com/google/mediapipe/framework/jni/packet_getter_jni.h @@ -69,6 +69,9 @@ JNIEXPORT jbyteArray JNICALL PACKET_GETTER_METHOD(nativeGetBytes)(JNIEnv* env, JNIEXPORT jbyteArray JNICALL PACKET_GETTER_METHOD(nativeGetProtoBytes)( JNIEnv* env, jobject thiz, jlong packet); +JNIEXPORT jobjectArray JNICALL PACKET_GETTER_METHOD(nativeGetProtoVector)( + JNIEnv* env, jobject thiz, jlong packet); + JNIEXPORT jshortArray JNICALL PACKET_GETTER_METHOD(nativeGetInt16Vector)( JNIEnv* env, jobject thiz, jlong packet); diff --git a/mediapipe/java/com/google/mediapipe/mediapipe_aar.bzl b/mediapipe/java/com/google/mediapipe/mediapipe_aar.bzl index eaf4612cfd..0c0d2aac4f 100644 --- a/mediapipe/java/com/google/mediapipe/mediapipe_aar.bzl +++ b/mediapipe/java/com/google/mediapipe/mediapipe_aar.bzl @@ -64,23 +64,18 @@ cat > $(OUTS) < $(OUTS) < $(OUTS) <_dummy_app target below) diff --git a/mediapipe/util/sequence/media_sequence.py b/mediapipe/util/sequence/media_sequence.py index fc1f15d32c..7a443afe87 100644 --- a/mediapipe/util/sequence/media_sequence.py +++ b/mediapipe/util/sequence/media_sequence.py @@ -284,7 +284,7 @@ # have overlapping track ids. REGION_CLASS_INDEX_KEY = "region/class/index" REGION_CLASS_STRING_KEY = "region/class/string" -REGION_CLASS_CONFIDENCE_KEY = "region/class/confidencee" +REGION_CLASS_CONFIDENCE_KEY = "region/class/confidence" # The timestamp of the region annotation in microseconds. REGION_TIMESTAMP_KEY = "region/timestamp" # The original timestamp in microseconds for region annotations. diff --git a/third_party/com_github_glog_glog_9779e5ea6ef59562b030248947f787d1256132ae.diff b/third_party/com_github_glog_glog_9779e5ea6ef59562b030248947f787d1256132ae.diff index 776e6d6711..89e80a9c3e 100644 --- a/third_party/com_github_glog_glog_9779e5ea6ef59562b030248947f787d1256132ae.diff +++ b/third_party/com_github_glog_glog_9779e5ea6ef59562b030248947f787d1256132ae.diff @@ -1,9 +1,3 @@ -commit 9779e5ea6ef59562b030248947f787d1256132ae -Author: jqtang -Date: Wed Sep 18 11:43:48 2019 -0700 - - Add glog Android support for MediaPipe. - diff --git a/src/logging.cc b/src/logging.cc index 0b5e6ee..be5a506 100644 --- a/src/logging.cc diff --git a/third_party/com_google_absl_f863b622fe13612433fdf43f76547d5edda0c93001.diff b/third_party/com_google_absl_f863b622fe13612433fdf43f76547d5edda0c93001.diff new file mode 100644 index 0000000000..0cd2dffa4f --- /dev/null +++ b/third_party/com_google_absl_f863b622fe13612433fdf43f76547d5edda0c93001.diff @@ -0,0 +1,14 @@ +diff --git a/absl/time/internal/cctz/BUILD.bazel b/absl/time/internal/cctz/BUILD.bazel +index 9fceffe..e7f9d01 100644 +--- a/absl/time/internal/cctz/BUILD.bazel ++++ b/absl/time/internal/cctz/BUILD.bazel +@@ -69,8 +69,5 @@ cc_library( + "include/cctz/zone_info_source.h", + ], + linkopts = select({ +- ":osx": [ +- "-framework Foundation", +- ], + ":ios": [ + "-framework Foundation", + ], \ No newline at end of file