From 0f153b0ddc6420c96971c8ad7a496e95f8b2dee2 Mon Sep 17 00:00:00 2001 From: Lei Zhang Date: Sun, 6 Jun 2021 14:26:46 -0400 Subject: [PATCH] Add benchmark cases for MobileBERT This commit is meant to be an example for showing how to register benchmark cases for a new model. In addition to the changes in CMake, one would also need to generate the input model and place it in a cloud storage. --- iree/benchmark/TensorFlow/CMakeLists.txt | 125 ++++++++++++++++++++++- 1 file changed, 124 insertions(+), 1 deletion(-) diff --git a/iree/benchmark/TensorFlow/CMakeLists.txt b/iree/benchmark/TensorFlow/CMakeLists.txt index 7e0df7d68b6a..9f61cf268ced 100644 --- a/iree/benchmark/TensorFlow/CMakeLists.txt +++ b/iree/benchmark/TensorFlow/CMakeLists.txt @@ -16,6 +16,26 @@ # # ################################################################################ +set(MOBILEBERT_FP16_MODULE + "MobileBertSquad" # MODULE_NAME + "fp16" # MODULE_TAGS + # This uses the same input MLIR source as fp32 to save download time. + # It requires users to have "--iree-flow-demote-f32-to-f16". + "https://storage.googleapis.com/iree-model-artifacts/MobileBertSquad-810f6fdc.tar.gz" # MLIR_SOURCE + "serving_default" # ENTRY_FUNCTION + # The conversion done by "--iree-flow-demote-f32-to-f16" won't change the + # original input signature. + "1x384xi32,1x384xi32,1x384xi32" # FUNCTION_INPUTS +) + +set(MOBILEBERT_FP32_MODULE + "MobileBertSquad" # MODULE_NAME + "fp32" # MODULE_TAGS + "https://storage.googleapis.com/iree-model-artifacts/MobileBertSquad-810f6fdc.tar.gz" # MLIR_SOURCE + "serving_default" # ENTRY_FUNCTION + "1x384xi32,1x384xi32,1x384xi32" # FUNCTION_INPUTS +) + set(MOBILENET_V2_MODULE "MobileNetV2" # MODULE_NAME "fp32,imagenet" # MODULE_TAGS @@ -34,7 +54,7 @@ set(MOBILENET_V3SMALL_MODULE ################################################################################ # # -# Benchmark suites # +# Common benchmark configurations # # # # Each suite benchmarks a list of modules with some specific configuration, # # typically involving different translation/runtime flags and targeting # @@ -142,6 +162,7 @@ iree_mlir_benchmark_suite( # GPU, Vulkan, Adreno, full-inference iree_mlir_benchmark_suite( MODULES + ${MOBILEBERT_FP32_MODULE} ${MOBILENET_V2_MODULE} ${MOBILENET_V3SMALL_MODULE} @@ -189,6 +210,7 @@ iree_mlir_benchmark_suite( # GPU, Vulkan, Mali, full-inference iree_mlir_benchmark_suite( MODULES + ${MOBILEBERT_FP32_MODULE} ${MOBILENET_V2_MODULE} ${MOBILENET_V3SMALL_MODULE} @@ -232,3 +254,104 @@ iree_mlir_benchmark_suite( RUNTIME_FLAGS "--batch_size=32" ) + +################################################################################ +# # +# Speical benchmark configurations # +# # +# These are configurations that can only be enabled for some specific model. # +# However, THIS SHOULD REALLY BE TEMPORARY; we should strike for uniformity. # +# # +################################################################################ + +# CPU, Dylib-Sync, big/little-core, full-inference +iree_mlir_benchmark_suite( + MODULES + ${MOBILEBERT_FP32_MODULE} + + BENCHMARK_MODES + "big-core,full-inference" + "little-core,full-inference" + TARGET_BACKEND + "dylib-llvm-aot" + TARGET_ARCHITECTURE + "CPU-ARM64-v8A" + TRANSLATION_FLAGS + # TODO: Merge this rule once we can use the same flags as the common one. + "--iree-input-type=mhlo" + "--iree-llvm-target-triple=aarch64-none-linux-android29" + "--iree-flow-inline-constants-max-byte-length=2048" + DRIVER + "dylib-sync" +) + +# CPU, Dylib, 1-thread, big/little-core, full-inference +iree_mlir_benchmark_suite( + MODULES + ${MOBILEBERT_FP32_MODULE} + + BENCHMARK_MODES + "1-thread,big-core,full-inference" + "1-thread,little-core,full-inference" + TARGET_BACKEND + "dylib-llvm-aot" + TARGET_ARCHITECTURE + "CPU-ARM64-v8A" + TRANSLATION_FLAGS + # TODO: Merge this rule once we can use the same flags as the common one. + "--iree-input-type=mhlo" + "--iree-llvm-target-triple=aarch64-none-linux-android29" + "--iree-flow-inline-constants-max-byte-length=2048" + DRIVER + "dylib" + RUNTIME_FLAGS + "--task_topology_group_count=1" +) + +# CPU, Dylib, 3-thread, big/little-core, full-inference +iree_mlir_benchmark_suite( + MODULES + ${MOBILEBERT_FP32_MODULE} + + BENCHMARK_MODES + "3-thread,big-core,full-inference" + "3-thread,little-core,full-inference" + TARGET_BACKEND + "dylib-llvm-aot" + TARGET_ARCHITECTURE + "CPU-ARM64-v8A" + TRANSLATION_FLAGS + # TODO: Merge this rule once we can use the same flags as the common one. + "--iree-input-type=mhlo" + "--iree-llvm-target-triple=aarch64-none-linux-android29" + "--iree-flow-inline-constants-max-byte-length=2048" + DRIVER + "dylib" + RUNTIME_FLAGS + "--task_topology_group_count=3" +) + +# GPU, Vulkan, Mali, kernel-execution +iree_mlir_benchmark_suite( + MODULES + ${MOBILEBERT_FP16_MODULE} + + BENCHMARK_MODES + "kernel-execution" + TARGET_BACKEND + "vulkan-spirv" + TARGET_ARCHITECTURE + "GPU-Mali-Valhall" + TRANSLATION_FLAGS + "--iree-input-type=mhlo" + "--iree-flow-demote-f32-to-f16" + "--iree-vulkan-target-triple=valhall-unknown-android11" + "--iree-flow-inline-constants-max-byte-length=16" + "--iree-flow-dispatch-formation-enable-operand-fusion" + "--iree-enable-fusion-with-reduction-ops" + "--iree-hal-benchmark-dispatch-repeat-count=32" + DRIVER + "vulkan" + RUNTIME_FLAGS + "--batch_size=32" +)