Sync from tflite-micro at b4b0891.

Signed-off-by: CFU-Playground-Bot <cfu-playground-bot@google.com>
google · Aug 16, 2022 · fcc61af · fcc61af
1 parent 7eabc9c
commit fcc61af
Show file tree

Hide file tree

Showing 24 changed files with 362 additions and 82 deletions.
diff --git a/conf/tflite-micro.version b/conf/tflite-micro.version
@@ -1 +1 @@
-e8e05bb
+b4b0891
diff --git a/third_party/tflite-micro/tensorflow/lite/core/api/flatbuffer_conversions.cc b/third_party/tflite-micro/tensorflow/lite/core/api/flatbuffer_conversions.cc
@@ -457,6 +457,10 @@ TfLiteStatus ParseOpDataTfLite(const Operator* op, BuiltinOperator op_type,
       return ParseRsqrt(op, error_reporter, allocator, builtin_data);
     }
 
+    case BuiltinOperator_SELECT_V2: {
+      return ParseSelectV2(op, error_reporter, allocator, builtin_data);
+    }
+
     case BuiltinOperator_SHAPE: {
       return ParseShape(op, error_reporter, allocator, builtin_data);
     }
@@ -865,7 +869,6 @@ TfLiteStatus ParseOpDataTfLite(const Operator* op, BuiltinOperator op_type,
     case BuiltinOperator_RELU_0_TO_1:
     case BuiltinOperator_SCATTER_ND:
     case BuiltinOperator_SELECT:
-    case BuiltinOperator_SELECT_V2:
     case BuiltinOperator_SLICE:
     case BuiltinOperator_TILE:
     case BuiltinOperator_TOPK_V2:
@@ -1983,6 +1986,14 @@ TfLiteStatus ParseRsqrt(const Operator*, ErrorReporter*, BuiltinDataAllocator*,
   return kTfLiteOk;
 }
 
+// We have this parse function instead of directly returning kTfLiteOk from the
+// switch-case in ParseOpData because this function is used as part of the
+// selective registration for the OpResolver implementation in micro.
+TfLiteStatus ParseSelectV2(const Operator*, ErrorReporter*,
+                           BuiltinDataAllocator*, void**) {
+  return kTfLiteOk;
+}
+
 TfLiteStatus ParseShape(const Operator* op, ErrorReporter* error_reporter,
                         BuiltinDataAllocator* allocator, void** builtin_data) {
   SafeBuiltinDataAllocator safe_allocator(allocator);

diff --git a/third_party/tflite-micro/tensorflow/lite/core/api/flatbuffer_conversions.h b/third_party/tflite-micro/tensorflow/lite/core/api/flatbuffer_conversions.h
@@ -319,6 +319,10 @@ TfLiteStatus ParseRound(const Operator* op, ErrorReporter* error_reporter,
 TfLiteStatus ParseRsqrt(const Operator* op, ErrorReporter* error_reporter,
                         BuiltinDataAllocator* allocator, void** builtin_data);
 
+TfLiteStatus ParseSelectV2(const Operator* op, ErrorReporter* error_reporter,
+                           BuiltinDataAllocator* allocator,
+                           void** builtin_data);
+
 TfLiteStatus ParseShape(const Operator* op, ErrorReporter* error_reporter,
                         BuiltinDataAllocator* allocator, void** builtin_data);
 

diff --git a/third_party/tflite-micro/tensorflow/lite/kernels/internal/reference/select.h b/third_party/tflite-micro/tensorflow/lite/kernels/internal/reference/select.h
@@ -0,0 +1,151 @@
+/* Copyright 2022 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_SELECT_H_
+#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_SELECT_H_
+
+#include <cmath>
+
+#include "ruy/profiler/instrumentation.h"  // from @ruy
+#include "tensorflow/lite/kernels/internal/common.h"
+#include "tensorflow/lite/kernels/internal/types.h"
+
+namespace tflite {
+namespace reference_ops {
+
+template <typename D, typename T>
+void Select(const RuntimeShape& input_condition_shape,
+            const D* input_condition_data, const RuntimeShape& input_x_shape,
+            const T* input_x_data, const RuntimeShape& input_y_shape,
+            const T* input_y_data, const RuntimeShape& output_shape,
+            T* output_data) {
+  ruy::profiler::ScopeLabel label("Select");
+  int64_t flatsize;
+  // Allow select operator executions on mixed scalar tensors and one element
+  // tensors.
+  if (input_condition_shape.FlatSize() == 1 && input_x_shape.FlatSize() == 1 &&
+      input_y_shape.FlatSize() == 1 && output_shape.FlatSize() == 1) {
+    flatsize = 1;
+  } else {
+    flatsize = MatchingFlatSize(input_condition_shape, input_x_shape,
+                                input_y_shape, output_shape);
+  }
+  for (int64_t i = 0; i < flatsize; ++i) {
+    output_data[i] =
+        input_condition_data[i] ? input_x_data[i] : input_y_data[i];
+  }
+}
+
+template <typename D, typename T>
+void RankOneSelect(const RuntimeShape& input_condition_shape,
+                   const D* input_condition_data,
+                   const RuntimeShape& input_x_shape, const T* input_x_data,
+                   const RuntimeShape& input_y_shape, const T* input_y_data,
+                   const RuntimeShape& output_shape, T* output_data) {
+  ruy::profiler::ScopeLabel label("Select/RankOneSelect");
+  const int64_t outer_size = input_condition_shape.FlatSize();
+  int64_t inner_size;
+  if (input_condition_shape.DimensionsCount() == 0) {
+    inner_size = MatchingFlatSize(input_x_shape, input_y_shape, output_shape);
+  } else {
+    TFLITE_DCHECK_EQ(
+        MatchingDim(input_x_shape, 0, input_y_shape, 0, output_shape, 0),
+        outer_size);
+    inner_size =
+        MatchingFlatSizeSkipDim(input_x_shape, 0, input_y_shape, output_shape);
+  }
+
+  int64_t offset = 0;
+  for (int64_t i = 0; i < outer_size; i++) {
+    const T* input_data = input_condition_data[i] ? input_x_data : input_y_data;
+    memcpy(output_data + offset, input_data + offset, inner_size * sizeof(T));
+    offset += inner_size;
+  }
+}
+
+template <typename D, typename T>
+void BroadcastSelect5DSlow(const RuntimeShape& input_condition_shape,
+                           const D* input_condition_data,
+                           const RuntimeShape& input_x_shape,
+                           const T* input_x_data,
+                           const RuntimeShape& input_y_shape,
+                           const T* input_y_data,
+                           const RuntimeShape& output_shape, T* output_data) {
+  ruy::profiler::ScopeLabel label("Select/BroadcastSelectSlow");
+  TFLITE_DCHECK_LE(input_condition_shape.DimensionsCount(), 5);
+  TFLITE_DCHECK_LE(input_x_shape.DimensionsCount(), 5);
+  TFLITE_DCHECK_LE(input_y_shape.DimensionsCount(), 5);
+  TFLITE_DCHECK_LE(output_shape.DimensionsCount(), 5);
+
+  NdArrayDesc<5> desc_condition;
+  NdArrayDesc<5> desc_x;
+  NdArrayDesc<5> desc_y;
+  NdArrayDesc<5> desc_output;
+  const RuntimeShape extended_output_shape =
+      RuntimeShape::ExtendedShape(5, output_shape);
+  CopyDimsToDesc(extended_output_shape, &desc_output);
+  NdArrayDescsForElementwiseBroadcast(input_condition_shape, input_x_shape,
+                                      input_y_shape, &desc_condition, &desc_x,
+                                      &desc_y);
+
+  // In Tensorflow, the dimensions are canonically named (batch_number, row,
+  // col, channel), with extents (batches, height, width, depth), with the
+  // trailing dimension changing most rapidly (channels has the smallest
+  // stride, typically 1 element).
+  //
+  // In generated C code, we store arrays with the dimensions reversed. The
+  // first dimension has smallest stride.
+  //
+  // We name our variables by their Tensorflow convention, but generate C code
+  // nesting loops such that the innermost loop has the smallest stride for
+  // the best cache behavior.
+  for (int n = 0; n < desc_output.extents[0]; ++n) {
+    int out_idx_n = desc_output.extents[1] * n;
+    int cond_idx_n = desc_condition.strides[0] * n;
+    int in_idx1_n = desc_x.strides[0] * n;
+    int in_idx2_n = desc_y.strides[0] * n;
+    for (int b = 0; b < desc_output.extents[1]; ++b) {
+      int out_idx_b = (out_idx_n + b) * desc_output.extents[2];
+      int cond_idx_b = cond_idx_n + desc_condition.strides[1] * b;
+      int in_idx1_b = in_idx1_n + desc_x.strides[1] * b;
+      int in_idx2_b = in_idx2_n + desc_y.strides[1] * b;
+      for (int y = 0; y < desc_output.extents[2]; ++y) {
+        int out_idx_y = (out_idx_b + y) * desc_output.extents[3];
+        int cond_idx_y = cond_idx_b + desc_condition.strides[2] * y;
+        int in_idx1_y = in_idx1_b + desc_x.strides[2] * y;
+        int in_idx2_y = in_idx2_b + desc_y.strides[2] * y;
+        for (int x = 0; x < desc_output.extents[3]; ++x) {
+          int out_idx = (out_idx_y + x) * desc_output.extents[4];
+          int cond_idx = cond_idx_y + desc_condition.strides[3] * x;
+          int in_idx1 = in_idx1_y + desc_x.strides[3] * x;
+          int in_idx2 = in_idx2_y + desc_y.strides[3] * x;
+          for (int c = 0; c < desc_output.extents[4]; ++c) {
+            output_data[out_idx] = input_condition_data[cond_idx]
+                                       ? input_x_data[in_idx1]
+                                       : input_y_data[in_idx2];
+            out_idx++;
+            cond_idx += desc_condition.strides[4];
+            in_idx1 += desc_x.strides[4];
+            in_idx2 += desc_y.strides[4];
+          }
+        }
+      }
+    }
+  }
+}
+
+}  // namespace reference_ops
+}  // namespace tflite
+
+#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_SELECT_H_
diff --git a/third_party/tflite-micro/tensorflow/lite/micro/examples/micro_speech/audio_provider_test.cc b/third_party/tflite-micro/tensorflow/lite/micro/examples/micro_speech/audio_provider_test.cc
@@ -1,4 +1,4 @@
-/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+/* Copyright 2022 The TensorFlow Authors. All Rights Reserved.
 
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
@@ -41,6 +41,7 @@ TF_LITE_MICRO_TEST(TestAudioProvider) {
   for (int i = 0; i < audio_samples_size; ++i) {
     total += audio_samples[i];
   }
+  (void)total;
 }
 
 TF_LITE_MICRO_TEST(TestTimer) {

diff --git a/...party/tflite-micro/tensorflow/lite/micro/examples/person_detection/image_provider_test.cc b/...party/tflite-micro/tensorflow/lite/micro/examples/person_detection/image_provider_test.cc
@@ -1,4 +1,4 @@
-/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+/* Copyright 2022 The TensorFlow Authors. All Rights Reserved.
 
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
@@ -37,6 +37,7 @@ TF_LITE_MICRO_TEST(TestImageProvider) {
   for (int i = 0; i < kMaxImageSize; ++i) {
     total += image_data[i];
   }
+  (void)total;
 }
 
 TF_LITE_MICRO_TESTS_END
diff --git a/third_party/tflite-micro/tensorflow/lite/micro/examples/person_detection/main_functions.cc b/third_party/tflite-micro/tensorflow/lite/micro/examples/person_detection/main_functions.cc
@@ -74,7 +74,7 @@ void setup() {
   // tflite::AllOpsResolver resolver;
   // NOLINTNEXTLINE(runtime-global-variables)
   static tflite::MicroMutableOpResolver<5> micro_op_resolver;
-  micro_op_resolver.AddAveragePool2D();
+  micro_op_resolver.AddAveragePool2D(tflite::Register_AVERAGE_POOL_2D_INT8());
   micro_op_resolver.AddConv2D(tflite::Register_CONV_2D_INT8());
   micro_op_resolver.AddDepthwiseConv2D(
       tflite::Register_DEPTHWISE_CONV_2D_INT8());

diff --git a/...rty/tflite-micro/tensorflow/lite/micro/examples/person_detection/person_detection_test.cc b/...rty/tflite-micro/tensorflow/lite/micro/examples/person_detection/person_detection_test.cc
@@ -54,7 +54,7 @@ TF_LITE_MICRO_TEST(TestInvoke) {
   // incur some penalty in code space for op implementations that are not
   // needed by this graph.
   tflite::MicroMutableOpResolver<5> micro_op_resolver;
-  micro_op_resolver.AddAveragePool2D();
+  micro_op_resolver.AddAveragePool2D(tflite::Register_AVERAGE_POOL_2D_INT8());
   micro_op_resolver.AddConv2D(tflite::Register_CONV_2D_INT8());
   micro_op_resolver.AddDepthwiseConv2D(
       tflite::Register_DEPTHWISE_CONV_2D_INT8());

diff --git a/third_party/tflite-micro/tensorflow/lite/micro/kernels/add.h b/third_party/tflite-micro/tensorflow/lite/micro/kernels/add.h
@@ -1,4 +1,4 @@
-/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
+/* Copyright 2022 The TensorFlow Authors. All Rights Reserved.
 
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
@@ -59,6 +59,19 @@ TfLiteStatus CalculateOpDataAdd(TfLiteContext* context, TfLiteAddParams* params,
 
 TfLiteStatus AddPrepare(TfLiteContext* context, TfLiteNode* node);
 
+// Generic must define registration function.
+TfLiteRegistration Register_ADD();
+
+#if defined(CMSIS_NN)
+TfLiteRegistration Register_ADD_INT8();
+
+TfLiteRegistration Register_ADD_INT16();
+#else
+// Fallback registration
+inline TfLiteRegistration Register_ADD_INT8() { return Register_ADD(); }
+
+inline TfLiteRegistration Register_ADD_INT16() { return Register_ADD(); }
+#endif
 }  // namespace tflite
 
 #endif  // TENSORFLOW_LITE_MICRO_KERNELS_ADD_H_
diff --git a/third_party/tflite-micro/tensorflow/lite/micro/kernels/concatenation.cc b/third_party/tflite-micro/tensorflow/lite/micro/kernels/concatenation.cc
@@ -133,7 +133,7 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
   TF_LITE_ENSURE(context,
                  input_type == kTfLiteFloat32 || input_type == kTfLiteInt8 ||
                      input_type == kTfLiteInt16 || input_type == kTfLiteInt32 ||
-                     input_type == kTfLiteInt64);
+                     input_type == kTfLiteInt64 || input_type == kTfLiteBool);
 
   // Output type must match input type
   TF_LITE_ENSURE_EQ(context, output_type, input_type);
@@ -167,6 +167,7 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
   TF_LITE_ENSURE(context, output != nullptr);
 
   switch (output_type) {  // Already know in/outtypes are same.
+    case kTfLiteBool:
     case kTfLiteFloat32:
     case kTfLiteInt16:
     case kTfLiteInt32:
@@ -236,6 +237,9 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
     case kTfLiteInt16:
       EvalUnquantized<int16_t>(context, node);
       break;
+    case kTfLiteBool:
+      EvalUnquantized<bool>(context, node);
+      break;
 
     default:
       MicroPrintf("Op Concatenation does not currently support Type '%s'.",

diff --git a/third_party/tflite-micro/tensorflow/lite/micro/kernels/lstm_eval.cc b/third_party/tflite-micro/tensorflow/lite/micro/kernels/lstm_eval.cc
@@ -22,6 +22,8 @@ limitations under the License.
 #include "tensorflow/lite/c/builtin_op_data.h"
 #include "tensorflow/lite/c/common.h"
 #include "tensorflow/lite/kernels/internal/compatibility.h"
+#include "tensorflow/lite/kernels/internal/reference/integer_ops/logistic.h"
+#include "tensorflow/lite/kernels/internal/reference/integer_ops/tanh.h"
 #include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
 #include "tensorflow/lite/kernels/op_macros.h"
 #include "tensorflow/lite/micro/kernels/kernel_util.h"
@@ -530,11 +532,20 @@ void CalculateLstmGateInteger8x8_16(
   // Apply activation
   switch (activation) {
     case kTfLiteActSigmoid:
-      micro_tensor_utils::ApplySigmoid(gate, n_batch, n_cell, gate);
-      break;
-    case kTfLiteActTanh:
-      micro_tensor_utils::ApplyTanh(3, gate, n_batch, n_cell, gate);
+
+      reference_integer_ops::Logistic(
+          0 /*data->input_multiplier*/, 0 /*data->input_left_shift */,
+          n_batch * n_cell /*NumElements(input->dims)*/,
+          gate /* tflite::micro::GetTensorData<int16_t>(input) */,
+          gate /*tflite::micro::GetTensorData<int16_t>(output) */);
+
       break;
+    case kTfLiteActTanh: {
+      int32_t dims_data = n_batch * n_cell;
+      RuntimeShape tanh_inp_shape = RuntimeShape(1, &dims_data);
+      reference_integer_ops::Tanh(0, 0, tanh_inp_shape, gate, tanh_inp_shape,
+                                  gate);
+    } break;
     default:
       // Only Sigmoid or Tanh is used.
       TFLITE_ASSERT_FALSE;
@@ -599,16 +610,31 @@ void UpdateLstmCellInteger(int n_batch, int n_cell, int16_t* cell_state,
 //  - scratch1: scratch area of size n_batch*n_cell
 //  - scratch2: scratch area used by MatrixBatchVectorMultiplyAccumulate
 void CalculateLstmOutputInteger8x8_16(
-    int n_batch, int n_cell, int n_output, const int16_t* cell_state,
+    int n_batch, int n_cell, int n_output, int16_t* cell_state,
     int32_t cell_state_scale, const int16_t* output_gate,
     int32_t hidden_scale_a, int32_t hidden_scale_b, int32_t hidden_zp,
     const int8_t* projection_weights, int32_t proj_scale_a,
     int32_t proj_scale_b, const int32_t* projection_bias,
     int32_t output_state_zp, int8_t quantized_proj_clip, int8_t* output_state,
     int16_t* scratch0, int8_t* scratch1, int32_t* scratch2) {
   // Note: unlike float/hybrid, the activation is always Tanh.
-  micro_tensor_utils::ApplyTanh(15 + cell_state_scale, cell_state, n_batch,
-                                n_cell, scratch0);
+
+  {
+    int32_t tanh_input_left_shift = (15 + cell_state_scale) - 3;
+    int32_t dims_data = n_batch * n_cell;
+    if (tanh_input_left_shift < 0) /* handling negative shift value */
+    {
+      int32_t i;
+      tanh_input_left_shift = -tanh_input_left_shift;
+      for (i = 0; i < dims_data; i++) {
+        cell_state[i] = cell_state[i] >> tanh_input_left_shift;
+      }
+      tanh_input_left_shift = 0;
+    }
+    RuntimeShape tanh_inp_shape = RuntimeShape(1, &dims_data);
+    reference_integer_ops::Tanh(0, tanh_input_left_shift, tanh_inp_shape,
+                                cell_state, tanh_inp_shape, scratch0);
+  }
   micro_tensor_utils::CwiseMul(output_gate, scratch0, hidden_scale_a,
                                hidden_scale_b, n_batch, n_cell, hidden_zp,
                                scratch1);

diff --git a/third_party/tflite-micro/tensorflow/lite/micro/kernels/mul.h b/third_party/tflite-micro/tensorflow/lite/micro/kernels/mul.h
@@ -1,4 +1,4 @@
-/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
+/* Copyright 2022 The TensorFlow Authors. All Rights Reserved.
 
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
@@ -60,6 +60,15 @@ void EvalMulFloatReference(TfLiteContext* context, TfLiteNode* node,
                            const TfLiteEvalTensor* input2,
                            TfLiteEvalTensor* output);
 
+// Generic must define registration function.
+TfLiteRegistration Register_MUL();
+
+#if defined(CMSIS_NN)
+TfLiteRegistration Register_MUL_INT8();
+#else
+// Fallback registration
+inline TfLiteRegistration Register_MUL_INT8() { return Register_MUL(); }
+#endif
 }  // namespace tflite
 
 #endif  // TENSORFLOW_LITE_MICRO_KERNELS_MUL_H_