gramineproject · dimakuv · Feb 17, 2022 · Sep 22, 2021
diff --git a/openvino/.gitignore b/openvino/.gitignore
@@ -1,3 +1,5 @@
-*.bmp
+/benchmark_app
+/inference_engine_cpp_samples_build/
 /model/
 /openvino/
+/output.txt
diff --git a/openvino/Makefile b/openvino/Makefile
@@ -1,81 +1,112 @@
+SHELL := /bin/bash # use bash syntax to activate virtual environment and to download models
+
+OPENVINO_DIR ?= $(INTEL_OPENVINO_DIR)
+
 THIS_DIR := $(dir $(lastword $(MAKEFILE_LIST)))
 
 ARCH_LIBDIR ?= /lib/$(shell $(CC) -dumpmachine)
 
-MODEL_DIR ?= $(THIS_DIR)model
-MODEL_COMMIT ?= acf297c73db8cb3f68791ae1fad4a7cc4a6039e5     # corresponds to tag "2019_R3"
-MODEL_NAME ?= VGG_VOC0712Plus_SSD_300x300_ft_iter_160000
+INFERENCE_ENGINE_CPP_SAMPLES_BUILD ?= $(THIS_DIR)inference_engine_cpp_samples_build
+
+MODEL_DIR ?= $(THIS_DIR)/model
 
-OPENVINO_DIR ?= $(THIS_DIR)openvino
-OPENVINO_COMMIT ?= 023e7c2c3f8a8ac83564db09799d2049115d9cf6  # corresponds to tag "2020.4"
+INTEL_MODELS = \
+	bert-large-uncased-whole-word-masking-squad-0001 \
+	bert-large-uncased-whole-word-masking-squad-int8-0001
+
+PUBLIC_MODELS = \
+	resnet-50-tf \
+	brain-tumor-segmentation-0001 \
+	brain-tumor-segmentation-0002 \
+	ssd_mobilenet_v1_coco
+
+VENV_DIR ?= $(THIS_DIR)/openvino
 
 SGX_SIGNER_KEY ?= ../../Pal/src/host/Linux-SGX/signer/enclave-key.pem
 
 ifeq ($(DEBUG),1)
 GRAMINE_LOG_LEVEL = debug
-OPENVINO_BUILD = Debug
 else
 GRAMINE_LOG_LEVEL = error
-OPENVINO_BUILD = Release
 endif
 
 .PHONY: all
-all: $(OPENVINO_DIR)/inference-engine/bin/intel64/$(OPENVINO_BUILD)/object_detection_sample_ssd \
-     $(MODEL_DIR)/$(MODEL_NAME).bin object_detection_sample_ssd.manifest
+all: benchmark_app benchmark_app.manifest $(VENV_DIR)/.INSTALLATION_OK intel_models public_models
 ifeq ($(SGX),1)
-all: object_detection_sample_ssd.manifest.sgx object_detection_sample_ssd.sig \
-     object_detection_sample_ssd.token
+all: benchmark_app.manifest.sgx benchmark_app.sig benchmark_app.token
 endif
 
-$(MODEL_DIR)/README.md:
-	git clone https://github.com/opencv/open_model_zoo.git $(MODEL_DIR)
-	git -C $(MODEL_DIR) checkout $(MODEL_COMMIT)
-
-$(MODEL_DIR)/$(MODEL_NAME).bin: $(MODEL_DIR)/README.md
-	cd $(MODEL_DIR)/tools/downloader && python3 ./downloader.py \
-		--name ssd300 -o $(abspath $(MODEL_DIR))
-	cd $(OPENVINO_DIR)/model-optimizer && python3 ./mo.py \
-		--input_model $(abspath $(MODEL_DIR))/public/ssd300/models/VGGNet/VOC0712Plus/SSD_300x300_ft/$(MODEL_NAME).caffemodel \
-		--input_proto $(abspath $(MODEL_DIR))/public/ssd300/models/VGGNet/VOC0712Plus/SSD_300x300_ft/deploy.prototxt \
-		--output_dir $(abspath $(MODEL_DIR))
-
-$(OPENVINO_DIR)/README.md:
-	git clone https://github.com/openvinotoolkit/openvino.git $(OPENVINO_DIR)
-	git -C $(OPENVINO_DIR) checkout $(OPENVINO_COMMIT)
-	git -C $(OPENVINO_DIR)/inference-engine submodule update --init --recursive
-
-$(OPENVINO_DIR)/inference-engine/bin/intel64/$(OPENVINO_BUILD)/object_detection_sample_ssd: $(OPENVINO_DIR)/README.md
-	cd $(OPENVINO_DIR)/inference-engine && mkdir -p build
-	cd $(OPENVINO_DIR)/inference-engine/build && cmake -DCMAKE_BUILD_TYPE=$(OPENVINO_BUILD) ../..
-	$(MAKE) -C $(OPENVINO_DIR)/inference-engine/build
-
-object_detection_sample_ssd.manifest: object_detection_sample_ssd.manifest.template
+.PHONY: intel_models
+intel_models: $(VENV_DIR)/.INSTALLATION_OK
+	@source $(VENV_DIR)/bin/activate \
+	&& cd $(OPENVINO_DIR)/deployment_tools/open_model_zoo/tools/downloader \
+	&& for model in $(INTEL_MODELS); do \
+		if [ ! -d $(abspath $(MODEL_DIR))/intel/$$model ]; then \
+			python3 ./downloader.py --name $$model -o $(abspath $(MODEL_DIR)); \
+			python3 ./converter.py --name $$model -d $(abspath $(MODEL_DIR)) -o $(abspath $(MODEL_DIR)); \
+		fi; \
+	done \
+	&& deactivate
+
+.PHONY: public_models
+public_models: $(VENV_DIR)/.INSTALLATION_OK
+	@source $(VENV_DIR)/bin/activate \
+	&& cd $(OPENVINO_DIR)/deployment_tools/open_model_zoo/tools/downloader \
+	&& for model in $(PUBLIC_MODELS); do \
+		if [ ! -d $(abspath $(MODEL_DIR))/public/$$model ]; then \
+			python3 ./downloader.py --name $$model -o $(abspath $(MODEL_DIR)); \
+			python3 ./converter.py --name $$model -d $(abspath $(MODEL_DIR)) -o $(abspath $(MODEL_DIR)); \
+		fi; \
+	done \
+	&& deactivate
+
+.PRECIOUS: $(VENV_DIR)/.INSTALLATION_OK
+$(VENV_DIR)/.INSTALLATION_OK:
+	mkdir -p model
+	python3 -m venv $(VENV_DIR) \
+	&& source $(VENV_DIR)/bin/activate \
+	&& pip install --upgrade pip \
+	&& pushd $(OPENVINO_DIR)/deployment_tools/open_model_zoo/tools/downloader \
+	&& python3 -mpip install -r ./requirements.in \
+	&& popd \
+	&& pushd $(OPENVINO_DIR)/deployment_tools/model_optimizer \
+	&& pip3 install -r requirements.txt \
+	&& popd \
+	&& deactivate \
+	&& touch $@
+
+benchmark_app.manifest: benchmark_app.manifest.template
 	gramine-manifest \
 		-Dlog_level=$(GRAMINE_LOG_LEVEL) \
 		-Darch_libdir=$(ARCH_LIBDIR) \
 		-Dopenvino_dir=$(abspath $(OPENVINO_DIR)) \
-		-Dmodel_dir=$(MODEL_DIR) \
-		-Dopenvino_build=$(OPENVINO_BUILD) \
+		-Dinference_engine_cpp_samples_build=$(abspath $(INFERENCE_ENGINE_CPP_SAMPLES_BUILD)) \
 		$< > $@
 
-object_detection_sample_ssd.manifest.sgx: object_detection_sample_ssd.manifest \
-		$(OPENVINO_DIR)/inference-engine/bin/intel64/$(OPENVINO_BUILD)/object_detection_sample_ssd
+benchmark_app.manifest.sgx: benchmark_app.manifest | benchmark_app
 	@test -s $(SGX_SIGNER_KEY) || \
 	    { echo "SGX signer private key was not found, please specify SGX_SIGNER_KEY!"; exit 1; }
 	gramine-sgx-sign \
 		--key $(SGX_SIGNER_KEY) \
 		--manifest $< \
 		--output $@
 
-object_detection_sample_ssd.sig: object_detection_sample_ssd.manifest.sgx
+benchmark_app.sig: benchmark_app.manifest.sgx
 
-object_detection_sample_ssd.token: object_detection_sample_ssd.sig
+benchmark_app.token: benchmark_app.sig
 	gramine-sgx-get-token --sig $< --output $@
 
+benchmark_app: $(OPENVINO_DIR)
+	mkdir -p $(INFERENCE_ENGINE_CPP_SAMPLES_BUILD)
+	cd $(INFERENCE_ENGINE_CPP_SAMPLES_BUILD) \
+	&& cmake -DCMAKE_BUILD_TYPE=Release $(OPENVINO_DIR)/inference_engine/samples/cpp \
+	&& make
+	ln -s $(INFERENCE_ENGINE_CPP_SAMPLES_BUILD)/intel64/Release/benchmark_app $@
+
 .PHONY: clean
 clean:
-	$(RM) *.manifest *.manifest.sgx *.token *.sig *.bmp
+	$(RM) *.manifest *.manifest.sgx *.token *.sig benchmark_app
 
 .PHONY: distclean
 distclean: clean
-	$(RM) -r $(OPENVINO_DIR) $(MODEL_DIR)
+	$(RM) -r $(MODEL_DIR) $(VENV_DIR) $(INFERENCE_ENGINE_CPP_SAMPLES_BUILD)
diff --git a/openvino/README.md b/openvino/README.md
@@ -1,52 +1,150 @@
-# OpenVINO
+# OpenVINO benchmark
 
-This directory contains a Makefile and a template manifest for the most
-recent version of OpenVINO toolkit (as of this writing, version 2020.4).
-We use the "Object Detection C++ Sample SSD" (object_detection_sample_ssd)
-example from the OpenVINO distribution as a concrete application running
-under Gramine-SGX. We test only the CPU backend (i.e., no GPU or FPGA).
+This directory contains a Makefile and a template manifest for the most recent version of OpenVINO
+toolkit (as of this writing, version 2021.4). We use the `Benchmark C++ Tool` (benchmark_app) from
+the OpenVINO distribution as a concrete application running under Gramine-SGX to estimate deep
+learning inference performance. We test only the CPU backend (i.e., no GPU or FPGA).
 
-We build OpenVINO from the source code instead of using an existing installation.
-**Note:** the build process requires ~1.1GB of disk space and takes ~20 minutes.
+## Software requirements
 
-We also download the Open Model Zoo repository and use the SSD300 pre-trained
-model from it. **Note:** the model zoo requires ~350MB of disk space.
+- OpenVINO: Please download latest OpenVINO toolkit (as of this writing, version 2021.4) for Linux
+  from https://software.intel.com/content/www/us/en/develop/tools/openvino-toolkit/download.html.
+  For OpenVINO installation step-by-step instructions please refer to this
+[link](https://docs.openvinotoolkit.org/latest/openvino_docs_install_guides_installing_openvino_linux.html).
+- Python (version 3.6 or higher)
+- Python virtual environment: `sudo apt-get install python3-venv`
+- CMake (version 3.10 or higher)
 
-# Prerequisites
+## Supported models for Gramine-SGX
 
-For Ubuntu 18.04, install the following prerequisite packages:
+The following models have been tested with Gramine-SGX:
 
-* Install CMake version >= 3.11 (on Ubuntu 18.04, this may require installing
-  Cmake from a non-official APT repository like Kitware).
+- bert-large-uncased-whole-word-masking-squad-0001 (FP16/FP32)
+- bert-large-uncased-whole-word-masking-squad-int8-0001 (INT8)
+- brain-tumor-segmentation-0001 (FP16/FP32)
+- brain-tumor-segmentation-0002 (FP16/FP32)
+- resnet-50-tf (FP16/FP32)
+- ssd_mobilenet_v1_coco (FP16/FP32)
 
-* Install libusb version >= 1.0.0 (`sudo apt install libusb-1.0-0-dev`).
+## Preparing the source
 
-* Install libtbb-dev
+1. Set up OpenVINO environment variables by running:
+    - root user: `source /opt/intel/openvino_2021/bin/setupvars.sh`
+    - root user and set permanently: append `source /opt/intel/openvino_2021/bin/setupvars.sh` to
+      `~/.bashrc`
+    - regular user: `source /home/<USER>/intel/openvino_2021/bin/setupvars.sh`
+2. Build: `make SGX=1`
 
-* Install packages for Python3:
-  `pip3 install pyyaml numpy networkx test-generator defusedxml protobuf>=3.6.1`
+## Running the benchmark in Gramine-SGX
 
-# Quick Start
+The below commands are utilizing only socket 0.
 
-```sh
-# build OpenVINO together with object_detection_sample_ssd and the final manifest;
-# note that it also downloads the SSD300 model and transforms it from the Caffe
-# format to an optimized Intermediate Representation (IR)
-make SGX=1
+### Throughput runs
 
-# run original OpenVINO/object_detection_sample_ssd
-# note that this assumes the Release build of OpenVINO (no DEBUG=1)
-./openvino/bin/intel64/Release/object_detection_sample_ssd -i images/horses.jpg \
-    -m model/VGG_VOC0712Plus_SSD_300x300_ft_iter_160000.xml -d CPU
+Options `-nireq`, `-nstreams` and `-nthreads` should be set to the *number of logical cores on the
+socket 0* for achieving maximum performance.
 
-# run OpenVINO/object_detection_sample_ssd in non-SGX Gramine
-gramine-direct object_detection_sample_ssd -i images/horses.jpg \
-    -m model/VGG_VOC0712Plus_SSD_300x300_ft_iter_160000.xml -d CPU
+```bash
+$ export THREADS_CNT=<number of logical cores on the socket 0>
+$ KMP_AFFINITY=granularity=fine,noverbose,compact,1,0 numactl --cpubind=0 --membind=0 \
+    gramine-sgx benchmark_app -i <image files> \
+    -m model/<public | intel>/<model_dir>/<INT8 | FP16 | FP32>/<model_xml_file> \
+    -d CPU -b 1 -t 20 \
+    -nstreams THREADS_CNT -nthreads THREADS_CNT -nireq THREADS_CNT
+```
+
+For example, in a system with 36 physical cores per socket and 2 threads per core, please export
+`THREADS_CNT` as below.
+```bash
+$ export THREADS_CNT=72
+```
 
-# run OpenVINO/object_detection_sample_ssd in Gramine-SGX
-gramine-sgx object_detection_sample_ssd -i images/horses.jpg \
-    -m model/VGG_VOC0712Plus_SSD_300x300_ft_iter_160000.xml -d CPU
+### Latency runs
 
-# Each of these commands produces an image out_0.bmp with detected objects
-xxd out_0.bmp   # or open in any image editor
+```bash
+$ KMP_AFFINITY=granularity=fine,noverbose,compact,1,0 numactl --cpubind=0 --membind=0 \
+    gramine-sgx benchmark_app -i <image files> \
+    -m model/<public | intel>/<model_dir>/<INT8 | FP16 | FP32>/<model_xml_file> \
+    -d CPU -b 1 -t 20 -api sync
 ```
+
+## Running the benchmark in non-SGX Gramine and natively
+
+To run the benchmark in non-SGX Gramine, replace `gramine-sgx benchmark_app` with
+`gramine-direct benchmark_app` in the above commands.
+
+To run the benchmark natively (outside Gramine), replace `gramine-sgx benchmark_app` with
+`./benchmark_app` in the above commands.
+
+## Notes
+
+- The models require ~3GB of disk space.
+- After setting up OpenVINO environment variables if you want to re-build Gramine you need to unset
+  `LD_LIBRARY_PATH`. Please make sure to set up OpenVINO environment variables after building
+  Gramine again.
+- Option `-i <image files>` is optional. The user may use this option to benchmark specific images
+  rather than randomly generated ones.
+- Please tune the batch size to get the best performance on your system.
+- Models for bert-large can be found in `model/intel` directory; the rest of the models can be found
+  in `model/public` directory.
+- For bert-large and brain-tumor-segmentation models the enclave size must be set to 64/128 GB for
+  throughput runs.
+- In multi-socket systems for bert-large-uncased-whole-word-masking-squad-0001 and
+  brain-tumor-segmentation-0001 FP32/FP16 models, add more NUMA nodes using `numactl --membind` if
+  memory allocation fails (for throughput runs).
+
+## Performance considerations
+
+### CPU frequency scaling
+
+Linux systems have CPU frequency scaling governor that helps the system to scale the CPU frequency
+to achieve best performance or to save power based on the requirement. To achieve the best
+performance, please set the CPU frequency scaling governor to `performance` mode.
+
+```bash
+for ((i=0; i<$(nproc); i++)); do
+    echo 'performance' > /sys/devices/system/cpu/cpu$i/cpufreq/scaling_governor;
+done
+```
+
+### Manifest options for performance
+
+- Preheat manifest option pre-faults the enclave memory and moves the performance penalty to
+  Gramine-SGX startup (before the workload starts executing). To use the preheat option, make sure
+  that `sgx.preheat_enclave = true` is added to the manifest template.
+- Skipping invalid user pointer checks when the application does not invoke system calls with
+  invalid pointers (typical case) can help improve performance. To use this option, make sure that
+  `libos.check_invalid_pointers = false` is added to the manifest template.
+
+### Memory allocator libraries
+
+TCMalloc and mimalloc are memory allocator libraries from Google and Microsoft that can help improve
+performance significantly based on the workloads. Only one of these allocators can be used at the
+same time.
+
+#### TCMalloc
+
+(Please update the binary location and name if different from default.)
+
+- Install tcmalloc: `sudo apt-get install google-perftools`
+- Modify the manifest template file:
+    - Add `loader.env.LD_PRELOAD = "/usr/lib/x86_64-linux-gnu/libtcmalloc.so.4"`
+    - Append below entries to `sgx.trusted_files`:
+        - `"file:/usr/lib/x86_64-linux-gnu/libtcmalloc.so.4"`
+        - `"file:/usr/lib/x86_64-linux-gnu/libunwind.so.8"`
+- Save the manifest template and rebuild this example.
+
+#### mimalloc
+
+(Please update the binary location and name if different from default.)
+
+- Install mimalloc using the steps from https://github.com/microsoft/mimalloc
+- Modify the manifest template file:
+    - Add the `/usr/local` FS mount point:
+        - `fs.mount.usr_local.type = "chroot"`
+        - `fs.mount.usr_local.path = "/usr/local"`
+        - `fs.mount.usr_local.uri = "file:/usr/local"`
+    - Add `loader.env.LD_PRELOAD = "/usr/local/lib/mimalloc-1.7/libmimalloc.so.1.7"`
+    - Append below entry to `sgx.trusted_files`:
+        - `"file:/usr/local/lib/mimalloc-1.7/libmimalloc.so.1.7"`
+- Save the manifest template and rebuild this example.