diff --git a/.azure-pipelines/scripts/codeScan/pyspelling/inc_dict.txt b/.azure-pipelines/scripts/codeScan/pyspelling/inc_dict.txt index 3da37de2b70..3dbf314558b 100644 --- a/.azure-pipelines/scripts/codeScan/pyspelling/inc_dict.txt +++ b/.azure-pipelines/scripts/codeScan/pyspelling/inc_dict.txt @@ -2397,15 +2397,13 @@ grappler amsgrad qoperator apis -PostTrainingQuantConfig -dgpu CPz PostTrainingQuantConfig -dgpu Nsh UmK fe vmware +PythonLauncher keepachangelog vscode IntelNeuralCompressor @@ -2451,7 +2449,6 @@ npmjs AWSSageMakerSupport sagemaker xpu -PostTrainingQuantConfig dgpu BenchmarkConfig QuantizationAwareTrainingConfig @@ -2464,3 +2461,6 @@ Namhoon Thalaiyasingam Torr MixedPrecisionConfig +AccuracyCriterion +AccuracyLoss +TuningCriterion diff --git a/docs/source/_static/imgs/pruning/pruning_patterns.png b/docs/source/_static/imgs/pruning/pruning_patterns.png index d453622ed5a..0bb10d43906 100644 Binary files a/docs/source/_static/imgs/pruning/pruning_patterns.png and b/docs/source/_static/imgs/pruning/pruning_patterns.png differ diff --git a/examples/.config/model_params_tensorflow.json b/examples/.config/model_params_tensorflow.json index 05cb683d122..21a7c3a31c5 100644 --- a/examples/.config/model_params_tensorflow.json +++ b/examples/.config/model_params_tensorflow.json @@ -539,10 +539,8 @@ "model_src_dir": "nlp/transformer_lt/quantization/ptq", "dataset_location": "/tf_dataset/tensorflow/transformer-lt-official-fp32-inference/transformer_lt_official_fp32_pretrained_model/data", "input_model": "/tf_dataset/tensorflow/transformer-lt-official-fp32-inference/transformer_lt_official_fp32_pretrained_model/graph/fp32_graphdef.pb", - "yaml": "transformer_lt.yaml", - "strategy": "basic", - "batch_size": 64, - "new_benchmark": false + "main_script": "main.py", + "batch_size": 64 }, "bert_large_squad": { "model_src_dir": "nlp/bert_large_squad/quantization/ptq", @@ -566,10 +564,8 @@ "model_src_dir": "nlp/bert_base_mrpc/quantization/ptq", "dataset_location": "/tf_dataset/tensorflow/bert/mrpc_data", "input_model": "/tf_dataset/tensorflow/bert/bert_base_mrpc", - "yaml": "mrpc.yaml", - "strategy": "basic", - "batch_size": 64, - "new_benchmark": true + "main_script": "run_classifier.py", + "batch_size": 64 }, "deeplab": { "model_src_dir": "semantic_image_segmentation/deeplab/quantization/ptq", @@ -2059,19 +2055,15 @@ "model_src_dir": "nlp/transformer_lt_mlperf/quantization/ptq", "dataset_location": "/tf_dataset2/datasets/transformer_uniform_data", "input_model": "/tf_dataset2/models/tensorflow/transformer_lt_mlperf/fp32/transformer_mlperf_fp32.pb", - "yaml": "transformer_lt_mlperf.yaml", - "strategy": "basic", - "batch_size": 64, - "new_benchmark": false + "main_script": "run_inference.py", + "batch_size": 64 }, "distilbert_base": { "model_src_dir": "nlp/distilbert_base/quantization/ptq", "dataset_location": "/tf_dataset2/datasets/sst2_validation_dataset", "input_model": "/tf_dataset2/models/tensorflow/distilbert_base/fp32/distilbert_base_fp32.pb", - "yaml": "distilbert_base.yaml", - "strategy": "basic", - "batch_size": 128, - "new_benchmark": false + "main_script": "run_inference.py", + "batch_size": 128 }, "xception": { "model_src_dir": "image_recognition/keras_models/xception/quantization/ptq", diff --git a/examples/tensorflow/image_recognition/SavedModel/quantization/ptq/README.md b/examples/tensorflow/image_recognition/SavedModel/quantization/ptq/README.md deleted file mode 100644 index 415a155c4b2..00000000000 --- a/examples/tensorflow/image_recognition/SavedModel/quantization/ptq/README.md +++ /dev/null @@ -1,54 +0,0 @@ -Step-by-Step -============ - -This document is used to enable Tensorflow SavedModel format using Intel® Neural Compressor for performance only. -This example can run on Intel CPUs and GPUs. - - -## Prerequisite - -### 1. Installation -```shell -# Install Intel® Neural Compressor -pip install neural-compressor -``` -### 2. Install Intel Tensorflow -```shell -pip install intel-tensorflow -``` -> Note: Supported Tensorflow >= 2.4.0. - -### 3. Install Intel Extension for Tensorflow -#### Quantizing the model on Intel GPU -Intel Extension for Tensorflow is mandatory to be installed for quantizing the model on Intel GPUs. - -```shell -pip install --upgrade intel-extension-for-tensorflow[gpu] -``` -For any more details, please follow the procedure in [install-gpu-drivers](https://github.com/intel-innersource/frameworks.ai.infrastructure.intel-extension-for-tensorflow.intel-extension-for-tensorflow/blob/master/docs/install/install_for_gpu.md#install-gpu-drivers) - -#### Quantizing the model on Intel CPU(Experimental) -Intel Extension for Tensorflow for Intel CPUs is experimental currently. It's not mandatory for quantizing the model on Intel CPUs. - -```shell -pip install --upgrade intel-extension-for-tensorflow[cpu] -``` - -### 4. Prepare Pretrained model -Download the model from tensorflow-hub. - -image recognition -- [mobilenetv1(experiment)](https://hub.tensorflow.google.cn/google/imagenet/mobilenet_v1_075_224/classification/5) -- [mobilenetv2(experiment)](https://hub.tensorflow.google.cn/google/imagenet/mobilenet_v2_035_224/classification/5) -- [efficientnet_v2_b0](https://hub.tensorflow.google.cn/google/imagenet/efficientnet_v2_imagenet1k_b0/classification/2) - -## Write Yaml config file -In examples directory, there are mobilenet_v1.yaml, mobilenet_v2.yaml and efficientnet_v2_b0.yaml for tuning the model on Intel CPUs. The 'framework' in the yaml is set to 'tensorflow'. If running this example on Intel GPUs, the 'framework' should be set to 'tensorflow_itex' and the device in yaml file should be set to 'gpu'. The mobilenet_v1_itex.yaml, mobilenet_v2_itex.yaml and efficientnet_v2_b0_itex.yaml are prepared for the GPU case. We could remove most of items and only keep mandatory item for tuning. We also implement a calibration dataloader and have evaluation field for creation of evaluation function at internal neural_compressor. - -## Run Command - ```shell - bash run_tuning.sh --config=./config.yaml --input_model=./SavedModel --output_model=./nc_SavedModel - ``` - ```shell - bash run_benchmark.sh --config=./config.yaml --input_model=./SavedModel --mode=performance - ``` diff --git a/examples/tensorflow/image_recognition/SavedModel/quantization/ptq/efficientnet_v2_b0.yaml b/examples/tensorflow/image_recognition/SavedModel/quantization/ptq/efficientnet_v2_b0.yaml deleted file mode 100644 index bce410a1004..00000000000 --- a/examples/tensorflow/image_recognition/SavedModel/quantization/ptq/efficientnet_v2_b0.yaml +++ /dev/null @@ -1,90 +0,0 @@ -# -# Copyright (c) 2021 Intel Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -version: 1.0 - -model: # mandatory. neural_compressor uses this model name and framework name to decide where to save tuning history and deploy yaml. - name: efficientnet_v2_b0 - framework: tensorflow # mandatory. supported values are tensorflow, pytorch, pytorch_ipex, onnxrt_integer, onnxrt_qlinear or mxnet; allow new framework backend extension. - -device: cpu # optional. default value is cpu, other value is gpu. - -quantization: # optional. tuning constraints on model-wise for advance user to reduce tuning space. - calibration: - sampling_size: 5, 10, 50, 100 # optional. default value is the size of whole dataset. used to set how many portions of calibration dataset is used. exclusive with iterations field. - dataloader: - dataset: - ImagenetRaw: - data_path: /path/to/calibration/dataset # NOTE: modify to calibration dataset location if needed - image_list: /path/to/calibration/label # data file, record image_names and their labels - transform: - PaddedCenterCrop: - size: 224 - crop_padding: 32 - Resize: - size: 224 - interpolation: bicubic - Normalize: - mean: [123.675, 116.28, 103.53] - std: [58.395, 57.12, 57.375] - -evaluation: # optional. required if user doesn't provide eval_func in neural_compressor.Quantization. - accuracy: # optional. required if user doesn't provide eval_func in neural_compressor.Quantization. - metric: - topk: 1 # built-in metrics are topk, map, f1, allow user to register new metric. - dataloader: - batch_size: 32 - dataset: - ImagenetRaw: - data_path: /path/to/evaluation/dataset # NOTE: modify to evaluation dataset location if needed - image_list: /path/to/evaluation/label # data file, record image_names and their labels - transform: - PaddedCenterCrop: - size: 224 - crop_padding: 32 - Resize: - size: 224 - interpolation: bicubic - Normalize: - mean: [123.675, 116.28, 103.53] - std: [58.395, 57.12, 57.375] - performance: # optional. used to benchmark performance of passing model. - iteration: 100 - configs: - cores_per_instance: 4 - num_of_instance: 7 - dataloader: - batch_size: 1 - dataset: - ImagenetRaw: - data_path: /path/to/evaluation/dataset # NOTE: modify to evaluation dataset location if needed - image_list: /path/to/evaluation/label # data file, record image_names and their labels - transform: - PaddedCenterCrop: - size: 224 - crop_padding: 32 - Resize: - size: 224 - interpolation: bicubic - Normalize: - mean: [123.675, 116.28, 103.53] - std: [58.395, 57.12, 57.375] - -tuning: - accuracy_criterion: - relative: 0.01 # optional. default value is relative, other value is absolute. this example allows relative accuracy loss: 1%. - exit_policy: - timeout: 0 # optional. tuning timeout (seconds). default value is 0 which means early stop. combine with max_trials field to decide when to exit. - random_seed: 9527 # optional. random seed for deterministic tuning. diff --git a/examples/tensorflow/image_recognition/SavedModel/quantization/ptq/efficientnet_v2_b0_itex.yaml b/examples/tensorflow/image_recognition/SavedModel/quantization/ptq/efficientnet_v2_b0_itex.yaml deleted file mode 100644 index 05aa4502599..00000000000 --- a/examples/tensorflow/image_recognition/SavedModel/quantization/ptq/efficientnet_v2_b0_itex.yaml +++ /dev/null @@ -1,89 +0,0 @@ -# -# Copyright (c) 2021 Intel Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -version: 1.0 - -model: # mandatory. neural_compressor uses this model name and framework name to decide where to save tuning history and deploy yaml. - name: efficientnet_v2_b0 - framework: tensorflow_itex # mandatory. supported values are tensorflow, tensorflow_itex, pytorch, pytorch_ipex, onnxrt_integer, onnxrt_qlinear or mxnet; allow new framework backend extension. - -device: gpu # optional. set cpu if installed intel-extension-for-tensorflow[cpu], set gpu if installed intel-extension-for-tensorflow[gpu]. -quantization: # optional. tuning constraints on model-wise for advance user to reduce tuning space. - calibration: - sampling_size: 5, 10, 50, 100 # optional. default value is the size of whole dataset. used to set how many portions of calibration dataset is used. exclusive with iterations field. - dataloader: - dataset: - ImagenetRaw: - data_path: /path/to/calibration/dataset # NOTE: modify to calibration dataset location if needed - image_list: /path/to/calibration/label # data file, record image_names and their labels - transform: - PaddedCenterCrop: - size: 224 - crop_padding: 32 - Resize: - size: 224 - interpolation: bicubic - Normalize: - mean: [123.675, 116.28, 103.53] - std: [58.395, 57.12, 57.375] - -evaluation: # optional. required if user doesn't provide eval_func in neural_compressor.Quantization. - accuracy: # optional. required if user doesn't provide eval_func in neural_compressor.Quantization. - metric: - topk: 1 # built-in metrics are topk, map, f1, allow user to register new metric. - dataloader: - batch_size: 32 - dataset: - ImagenetRaw: - data_path: /path/to/evaluation/dataset # NOTE: modify to evaluation dataset location if needed - image_list: /path/to/evaluation/label # data file, record image_names and their labels - transform: - PaddedCenterCrop: - size: 224 - crop_padding: 32 - Resize: - size: 224 - interpolation: bicubic - Normalize: - mean: [123.675, 116.28, 103.53] - std: [58.395, 57.12, 57.375] - performance: # optional. used to benchmark performance of passing model. - iteration: 100 - configs: - cores_per_instance: 4 - num_of_instance: 7 - dataloader: - batch_size: 1 - dataset: - ImagenetRaw: - data_path: /path/to/evaluation/dataset # NOTE: modify to evaluation dataset location if needed - image_list: /path/to/evaluation/label # data file, record image_names and their labels - transform: - PaddedCenterCrop: - size: 224 - crop_padding: 32 - Resize: - size: 224 - interpolation: bicubic - Normalize: - mean: [123.675, 116.28, 103.53] - std: [58.395, 57.12, 57.375] - -tuning: - accuracy_criterion: - relative: 0.01 # optional. default value is relative, other value is absolute. this example allows relative accuracy loss: 1%. - exit_policy: - timeout: 0 # optional. tuning timeout (seconds). default value is 0 which means early stop. combine with max_trials field to decide when to exit. - random_seed: 9527 # optional. random seed for deterministic tuning. diff --git a/examples/tensorflow/image_recognition/SavedModel/quantization/ptq/main.py b/examples/tensorflow/image_recognition/SavedModel/quantization/ptq/main.py deleted file mode 100644 index 3d2713107f6..00000000000 --- a/examples/tensorflow/image_recognition/SavedModel/quantization/ptq/main.py +++ /dev/null @@ -1,59 +0,0 @@ -# -# -*- coding: utf-8 -*- -# -# Copyright (c) 2020 Intel Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -# - -from __future__ import division -import time -import numpy as np -import tensorflow as tf -from argparse import ArgumentParser - -class eval_object_detection_optimized_graph(object): - - def __init__(self): - arg_parser = ArgumentParser(description='Parse args') - - arg_parser.add_argument('-g', - "--input-graph", - help='Specify the input graph.', - dest='input_graph') - arg_parser.add_argument('--config', type=str, default='') - arg_parser.add_argument('--output_model', type=str, default='') - arg_parser.add_argument('--mode', type=str, default='performance') - arg_parser.add_argument('--tune', action='store_true', default=False) - arg_parser.add_argument('--benchmark', dest='benchmark', - action='store_true', help='run benchmark') - self.args = arg_parser.parse_args() - - def run(self): - if self.args.tune: - from neural_compressor.experimental import Quantization - quantizer = Quantization(self.args.config) - quantizer.model = self.args.input_graph - q_model = quantizer.fit() - q_model.save(self.args.output_model) - - if self.args.benchmark: - from neural_compressor.experimental import Benchmark - evaluator = Benchmark(self.args.config) - evaluator.model = self.args.input_graph - evaluator(self.args.mode) - -if __name__ == "__main__": - evaluate_opt_graph = eval_object_detection_optimized_graph() - evaluate_opt_graph.run() diff --git a/examples/tensorflow/image_recognition/SavedModel/quantization/ptq/mobilenet_v1.yaml b/examples/tensorflow/image_recognition/SavedModel/quantization/ptq/mobilenet_v1.yaml deleted file mode 100644 index 72e7ff3ddf9..00000000000 --- a/examples/tensorflow/image_recognition/SavedModel/quantization/ptq/mobilenet_v1.yaml +++ /dev/null @@ -1,73 +0,0 @@ -# -# Copyright (c) 2021 Intel Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -model: # mandatory. used to specify model specific information. - name: mobilenet_v1 - framework: tensorflow # mandatory. supported values are tensorflow, pytorch, pytorch_ipex, onnxrt_integer, onnxrt_qlinear or mxnet; allow new framework backend extension. - -device: cpu # optional. default value is cpu, other value is gpu. - -quantization: # optional. tuning constraints on model-wise for advance user to reduce tuning space. - calibration: - sampling_size: 20, 50 # optional. default value is 100. used to set how many samples should be used in calibration. - dataloader: - batch_size: 10 - dataset: - ImageRecord: - root: /path/to/calibration/dataset # NOTE: modify to calibration dataset location if needed - transform: - BilinearImagenet: - height: 224 - width: 224 - model_wise: # optional. tuning constraints on model-wise for advance user to reduce tuning space. - activation: - algorithm: minmax - weight: - granularity: per_channel - -evaluation: # optional. required if user doesn't provide eval_func in neural_compressor.Quantization. - accuracy: # optional. required if user doesn't provide eval_func in neural_compressor.Quantization. - metric: - topk: 1 # built-in metrics are topk, map, f1, allow user to register new metric. - dataloader: - batch_size: 32 - dataset: - ImageRecord: - root: /path/to/evaluation/dataset # NOTE: modify to evaluation dataset location if needed - transform: - BilinearImagenet: - height: 224 - width: 224 - performance: # optional. used to benchmark performance of passing model. - iteration: 100 - configs: - cores_per_instance: 4 - num_of_instance: 7 - dataloader: - batch_size: 1 - dataset: - ImageRecord: - root: /path/to/evaluation/dataset # NOTE: modify to evaluation dataset location if needed - transform: - BilinearImagenet: - height: 224 - width: 224 - -tuning: - accuracy_criterion: - relative: 0.01 # optional. default value is relative, other value is absolute. this example allows relative accuracy loss: 1%. - exit_policy: - timeout: 0 # optional. tuning timeout (seconds). default value is 0 which means early stop. combine with max_trials field to decide when to exit. - random_seed: 9527 # optional. random seed for deterministic tuning. diff --git a/examples/tensorflow/image_recognition/SavedModel/quantization/ptq/mobilenet_v1_itex.yaml b/examples/tensorflow/image_recognition/SavedModel/quantization/ptq/mobilenet_v1_itex.yaml deleted file mode 100644 index 29b4dcde606..00000000000 --- a/examples/tensorflow/image_recognition/SavedModel/quantization/ptq/mobilenet_v1_itex.yaml +++ /dev/null @@ -1,73 +0,0 @@ -# -# Copyright (c) 2021 Intel Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -model: # mandatory. used to specify model specific information. - name: mobilenet_v1 - framework: tensorflow_itex # mandatory. supported values are tensorflow, tensorflow_itex, pytorch, pytorch_ipex, onnxrt_integer, onnxrt_qlinear or mxnet; allow new framework backend extension. - -device: gpu # optional. set cpu if installed intel-extension-for-tensorflow[cpu], set gpu if installed intel-extension-for-tensorflow[gpu]. - -quantization: # optional. tuning constraints on model-wise for advance user to reduce tuning space. - calibration: - sampling_size: 20, 50 # optional. default value is 100. used to set how many samples should be used in calibration. - dataloader: - batch_size: 10 - dataset: - ImageRecord: - root: /path/to/calibration/dataset # NOTE: modify to calibration dataset location if needed - transform: - BilinearImagenet: - height: 224 - width: 224 - model_wise: # optional. tuning constraints on model-wise for advance user to reduce tuning space. - activation: - algorithm: minmax - weight: - granularity: per_channel - -evaluation: # optional. required if user doesn't provide eval_func in neural_compressor.Quantization. - accuracy: # optional. required if user doesn't provide eval_func in neural_compressor.Quantization. - metric: - topk: 1 # built-in metrics are topk, map, f1, allow user to register new metric. - dataloader: - batch_size: 32 - dataset: - ImageRecord: - root: /path/to/evaluation/dataset # NOTE: modify to evaluation dataset location if needed - transform: - BilinearImagenet: - height: 224 - width: 224 - performance: # optional. used to benchmark performance of passing model. - iteration: 100 - configs: - cores_per_instance: 4 - num_of_instance: 7 - dataloader: - batch_size: 1 - dataset: - ImageRecord: - root: /path/to/evaluation/dataset # NOTE: modify to evaluation dataset location if needed - transform: - BilinearImagenet: - height: 224 - width: 224 - -tuning: - accuracy_criterion: - relative: 0.01 # optional. default value is relative, other value is absolute. this example allows relative accuracy loss: 1%. - exit_policy: - timeout: 0 # optional. tuning timeout (seconds). default value is 0 which means early stop. combine with max_trials field to decide when to exit. - random_seed: 9527 # optional. random seed for deterministic tuning. diff --git a/examples/tensorflow/image_recognition/SavedModel/quantization/ptq/mobilenet_v2.yaml b/examples/tensorflow/image_recognition/SavedModel/quantization/ptq/mobilenet_v2.yaml deleted file mode 100644 index 0b0c25b458b..00000000000 --- a/examples/tensorflow/image_recognition/SavedModel/quantization/ptq/mobilenet_v2.yaml +++ /dev/null @@ -1,82 +0,0 @@ -# -# Copyright (c) 2021 Intel Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -model: # mandatory. used to specify model specific information. - name: mobilenet_v2 - framework: tensorflow # mandatory. supported values are tensorflow, pytorch, pytorch_ipex, onnxrt_integer, onnxrt_qlinear or mxnet; allow new framework backend extension. - -device: cpu # optional. default value is cpu, other value is gpu. - -quantization: # optional. tuning constraints on model-wise for advance user to reduce tuning space. - calibration: - sampling_size: 20, 50 # optional. default value is 100. used to set how many samples should be used in calibration. - dataloader: - batch_size: 10 - dataset: - ImageRecord: - root: /path/to/calibration/dataset # NOTE: modify to calibration dataset location if needed - transform: - BilinearImagenet: - height: 224 - width: 224 - model_wise: # optional. tuning constraints on model-wise for advance user to reduce tuning space. - activation: - algorithm: minmax - weight: - granularity: per_channel - - op_wise: { - 'MobilenetV2/expanded_conv/depthwise/depthwise': { - 'activation': {'dtype': ['fp32']}, - }, - 'MobilenetV2/Conv_1/Conv2D': { - 'activation': {'dtype': ['fp32']}, - } - } - -evaluation: # optional. required if user doesn't provide eval_func in neural_compressor.Quantization. - accuracy: # optional. required if user doesn't provide eval_func in neural_compressor.Quantization. - metric: - topk: 1 # built-in metrics are topk, map, f1, allow user to register new metric. - dataloader: - batch_size: 32 - dataset: - ImageRecord: - root: /path/to/evaluation/dataset # NOTE: modify to evaluation dataset location if needed - transform: - BilinearImagenet: - height: 224 - width: 224 - performance: # optional. used to benchmark performance of passing model. - iteration: 100 - configs: - cores_per_instance: 4 - num_of_instance: 7 - dataloader: - batch_size: 1 - dataset: - ImageRecord: - root: /path/to/evaluation/dataset # NOTE: modify to evaluation dataset location if needed - transform: - BilinearImagenet: - height: 224 - width: 224 - -tuning: - accuracy_criterion: - relative: 0.01 # optional. default value is relative, other value is absolute. this example allows relative accuracy loss: 1%. - exit_policy: - timeout: 0 # optional. tuning timeout (seconds). default value is 0 which means early stop. combine with max_trials field to decide when to exit. - random_seed: 9527 # optional. random seed for deterministic tuning. diff --git a/examples/tensorflow/image_recognition/SavedModel/quantization/ptq/mobilenet_v2_itex.yaml b/examples/tensorflow/image_recognition/SavedModel/quantization/ptq/mobilenet_v2_itex.yaml deleted file mode 100644 index 0ae6a06a429..00000000000 --- a/examples/tensorflow/image_recognition/SavedModel/quantization/ptq/mobilenet_v2_itex.yaml +++ /dev/null @@ -1,82 +0,0 @@ -# -# Copyright (c) 2021 Intel Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -model: # mandatory. used to specify model specific information. - name: mobilenet_v2 - framework: tensorflow_itex # mandatory. supported values are tensorflow, tensorflow_itex, pytorch, pytorch_ipex, onnxrt_integer, onnxrt_qlinear or mxnet; allow new framework backend extension. - -device: gpu # optional. set cpu if installed intel-extension-for-tensorflow[cpu], set gpu if installed intel-extension-for-tensorflow[gpu]. - -quantization: # optional. tuning constraints on model-wise for advance user to reduce tuning space. - calibration: - sampling_size: 20, 50 # optional. default value is 100. used to set how many samples should be used in calibration. - dataloader: - batch_size: 10 - dataset: - ImageRecord: - root: /path/to/calibration/dataset # NOTE: modify to calibration dataset location if needed - transform: - BilinearImagenet: - height: 224 - width: 224 - model_wise: # optional. tuning constraints on model-wise for advance user to reduce tuning space. - activation: - algorithm: minmax - weight: - granularity: per_channel - - op_wise: { - 'MobilenetV2/expanded_conv/depthwise/depthwise': { - 'activation': {'dtype': ['fp32']}, - }, - 'MobilenetV2/Conv_1/Conv2D': { - 'activation': {'dtype': ['fp32']}, - } - } - -evaluation: # optional. required if user doesn't provide eval_func in neural_compressor.Quantization. - accuracy: # optional. required if user doesn't provide eval_func in neural_compressor.Quantization. - metric: - topk: 1 # built-in metrics are topk, map, f1, allow user to register new metric. - dataloader: - batch_size: 32 - dataset: - ImageRecord: - root: /path/to/evaluation/dataset # NOTE: modify to evaluation dataset location if needed - transform: - BilinearImagenet: - height: 224 - width: 224 - performance: # optional. used to benchmark performance of passing model. - iteration: 100 - configs: - cores_per_instance: 4 - num_of_instance: 7 - dataloader: - batch_size: 1 - dataset: - ImageRecord: - root: /path/to/evaluation/dataset # NOTE: modify to evaluation dataset location if needed - transform: - BilinearImagenet: - height: 224 - width: 224 - -tuning: - accuracy_criterion: - relative: 0.01 # optional. default value is relative, other value is absolute. this example allows relative accuracy loss: 1%. - exit_policy: - timeout: 0 # optional. tuning timeout (seconds). default value is 0 which means early stop. combine with max_trials field to decide when to exit. - random_seed: 9527 # optional. random seed for deterministic tuning. diff --git a/examples/tensorflow/image_recognition/SavedModel/quantization/ptq/run_benchmark.sh b/examples/tensorflow/image_recognition/SavedModel/quantization/ptq/run_benchmark.sh deleted file mode 100644 index 2304baaea20..00000000000 --- a/examples/tensorflow/image_recognition/SavedModel/quantization/ptq/run_benchmark.sh +++ /dev/null @@ -1,40 +0,0 @@ -#!/bin/bash -set -x - -function main { - - init_params "$@" - run_benchmark - -} - -# init params -function init_params { - for var in "$@" - do - case $var in - --config=*) - config=$(echo $var |cut -f2 -d=) - ;; - --input_model=*) - input_model=$(echo $var |cut -f2 -d=) - ;; - --mode=*) - mode=$(echo $var |cut -f2 -d=) - ;; - esac - done - -} - -# run_tuning -function run_benchmark { - - python main.py \ - --input-graph ${input_model} \ - --config ${config} \ - --mode ${mode} \ - --benchmark -} - -main "$@" diff --git a/examples/tensorflow/image_recognition/SavedModel/quantization/ptq/run_tuning.sh b/examples/tensorflow/image_recognition/SavedModel/quantization/ptq/run_tuning.sh deleted file mode 100644 index 23e86e2dc42..00000000000 --- a/examples/tensorflow/image_recognition/SavedModel/quantization/ptq/run_tuning.sh +++ /dev/null @@ -1,41 +0,0 @@ -#!/bin/bash -set -x - -function main { - - init_params "$@" - - run_tuning - -} - -# init params -function init_params { - - for var in "$@" - do - case $var in - --config=*) - config=$(echo "$var" |cut -f2 -d=) - ;; - --input_model=*) - input_model=$(echo "$var" |cut -f2 -d=) - ;; - --output_model=*) - output_model=$(echo "$var" |cut -f2 -d=) - ;; - esac - done - -} - -# run_tuning -function run_tuning { - python main.py \ - --input-graph "${input_model}" \ - --config ${config} \ - --output_model "${output_model}" \ - --tune -} - -main "$@" diff --git a/examples/tensorflow/image_recognition/ViT/pruning/magnitude/README.md b/examples/tensorflow/image_recognition/ViT/pruning/magnitude/README.md deleted file mode 100644 index 215cf5d968b..00000000000 --- a/examples/tensorflow/image_recognition/ViT/pruning/magnitude/README.md +++ /dev/null @@ -1,42 +0,0 @@ -Step-by-Step -============ - -This document is used to list steps of reproducing Intel® Neural Compressor magnitude pruning feature on ViT model. - - -## Prerequisite - -### 1. Installation -```shell -# Install Intel® Neural Compressor -pip install neural-compressor -``` -### 2. Install requirements -```shell -pip install -r requirements.txt -``` -### 3. Train and save a ViT model -According to the following link [Image classification with Vision Transformer](https://github.com/keras-team/keras-io/blob/master/examples/vision/md/image_classification_with_vision_transformer.md), train a ViT model as the baseline. Please add a line 'model.save("./ViT_Model")' in the function 'def run_experiment' to save the model to the directory './ViT_Model'. - -```python -def run_experiment(model): -...... -...... - model.load_weights(checkpoint_filepath) - _, accuracy, top_5_accuracy = model.evaluate(x_test, y_test) - print(f"Test accuracy: {round(accuracy * 100, 2)}%") - print(f"Test top 5 accuracy: {round(top_5_accuracy * 100, 2)}%") - model.save("./ViT_Model") # Add this line - return history -...... -...... -``` -## Run command to prune the model -Run the command to get pruned model which overwritten and saved into './ViT_Model'. -```shell -python main.py -``` -If you want to accelerate pruning with multi-node distributed training and evaluation, you only need to add a small amount of code and use horovod to run main.py. As shown in main.py, uncomment two lines 'prune.train_distributed = True' and 'prune.evaluation_distributed = True' in main.py is all you need. Run the command to get pruned model with multi-node distributed training and evaluation. -```shell -horovodrun -np -H python main.py -``` diff --git a/examples/tensorflow/image_recognition/ViT/pruning/magnitude/main.py b/examples/tensorflow/image_recognition/ViT/pruning/magnitude/main.py deleted file mode 100644 index b3085385f94..00000000000 --- a/examples/tensorflow/image_recognition/ViT/pruning/magnitude/main.py +++ /dev/null @@ -1,52 +0,0 @@ -import numpy as np -import tensorflow as tf -import tensorflow_addons as tfa -from neural_compressor.experimental import Pruning, common -from neural_compressor.utils import logger - - -# Prepare dataset -def prepare_dataset(): - (x_train, y_train), (x_test, y_test) = tf.keras.datasets.cifar100.load_data() - y_train = tf.keras.utils.to_categorical(y_train, 100) - y_test = tf.keras.utils.to_categorical(y_test, 100) - logger.info(f"Training set: x_shape-{x_train.shape}, y_shape-{y_train.shape}") - logger.info(f"Test set: x_shape-{x_test.shape}, y_shape-{y_test.shape}") - return TrainDataset(x_train, y_train), EvalDataset(x_test, y_test) - -# Build TrainDataset and EvalDataset -class TrainDataset(object): - def __init__(self, x_train, y_train): - self.x_train = x_train - self.y_train = y_train - - def __len__(self): - return len(self.x_train) - - def __getitem__(self, idx): - return self.x_train[idx], self.y_train[idx] - -class EvalDataset(object): - def __init__(self, x_test, y_test): - self.x_test = x_test - self.y_test = y_test - - def __len__(self): - return len(self.x_test) - - def __getitem__(self, idx): - return self.x_test[idx], self.y_test[idx] - - -if __name__ == '__main__': - prune = Pruning("./prune_vit.yaml") - # prune.train_distributed = True - # prune.evaluation_distributed = True - training_set, test_set = prepare_dataset() - prune.train_dataloader = common.DataLoader(training_set, batch_size=128) - prune.eval_dataloader = common.DataLoader(test_set, batch_size=256) - prune.model = './ViT_Model' - model = prune.fit() - stats, sparsity = model.report_sparsity() - logger.info(stats) - logger.info(sparsity) diff --git a/examples/tensorflow/image_recognition/ViT/pruning/magnitude/prune_vit.yaml b/examples/tensorflow/image_recognition/ViT/pruning/magnitude/prune_vit.yaml deleted file mode 100644 index 831b02ee83b..00000000000 --- a/examples/tensorflow/image_recognition/ViT/pruning/magnitude/prune_vit.yaml +++ /dev/null @@ -1,53 +0,0 @@ -# -# Copyright (c) 2021 Intel Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -model: - name: vit - framework: tensorflow - -pruning: - train: - epoch: 15 - optimizer: - AdamW: - learning_rate: 0.001 - weight_decay: 0.0001 - criterion: - CrossEntropyLoss: - reduction: sum_over_batch_size - from_logits: True - approach: - weight_compression: - initial_sparsity: 0.0 - target_sparsity: 0.7 - start_epoch: 0 - end_epoch: 9 - pruners: - - !Pruner - start_epoch: 0 - end_epoch: 9 - prune_type: basic_magnitude - -evaluation: - accuracy: - metric: - topk: 1 - -tuning: - accuracy_criterion: - relative: 0.01 # the tuning target of accuracy loss percentage: 1% - exit_policy: - timeout: 0 # tuning timeout (seconds) - random_seed: 9527 # random seed diff --git a/examples/tensorflow/image_recognition/ViT/pruning/magnitude/requirements.txt b/examples/tensorflow/image_recognition/ViT/pruning/magnitude/requirements.txt deleted file mode 100644 index 23ce7f40aad..00000000000 --- a/examples/tensorflow/image_recognition/ViT/pruning/magnitude/requirements.txt +++ /dev/null @@ -1,5 +0,0 @@ -tensorflow==2.10.1 -keras==2.10.0 -tensorflow-estimator==2.10.0 -tensorflow-addons==0.18.0 -horovod==0.26.1 diff --git a/examples/tensorflow/image_recognition/inception_v3/pruning/magnitude/README.md b/examples/tensorflow/image_recognition/inception_v3/pruning/magnitude/README.md deleted file mode 100644 index 7f996f25344..00000000000 --- a/examples/tensorflow/image_recognition/inception_v3/pruning/magnitude/README.md +++ /dev/null @@ -1,26 +0,0 @@ -Step-by-Step -============ - -This document is used to list steps of reproducing Intel® Neural Compressor magnitude pruning feature on Inception-V3 model. - - -## Prerequisite - -### 1. Install Intel® Neural Compressor -```shell -pip install neural-compressor -``` -### 2. Install other requirements -```shell -pip install -r requirements.txt -``` - -## Run command to prune the model -Run the command to get baseline model and then prune it and save the pruned model into './Inception-V3_Model'. -```shell -python main.py -``` -If you want to accelerate pruning with multi-node distributed training and evaluation, you only need to add a small amount of code and use horovod to run main.py. As shown in main.py, uncomment two lines 'prune.train_distributed = True' and 'prune.evaluation_distributed = True' in main.py is all you need. Run the command to get pruned model with multi-node distributed training and evaluation. -```shell -horovodrun -np -H python main.py -``` diff --git a/examples/tensorflow/image_recognition/inception_v3/pruning/magnitude/main.py b/examples/tensorflow/image_recognition/inception_v3/pruning/magnitude/main.py deleted file mode 100644 index 3d039ffeaa4..00000000000 --- a/examples/tensorflow/image_recognition/inception_v3/pruning/magnitude/main.py +++ /dev/null @@ -1,41 +0,0 @@ -import tensorflow as tf -from neural_compressor.experimental import Pruning -from neural_compressor.utils import logger - -def generate_model(model_name='InceptionV3'): - model = getattr(tf.keras.applications, model_name)( - include_top=True, - weights="imagenet", - input_tensor=None, - input_shape=None, - pooling=None, - classes=1000, - classifier_activation="softmax" - ) - return model - -def get_vgg16_baseline(model_path): - if prune.train_distributed == True: - import horovod.tensorflow as hvd - hvd.init() - if hvd.rank() == 0: - model = generate_model('InceptionV3') - model.summary() - model.save(model_path) - else: - model = generate_model('InceptionV3') - model.summary() - model.save(model_path) - return model_path - - -if __name__ == '__main__': - prune = Pruning("./prune_inception_v3.yaml") - # prune.train_distributed = True - # prune.evaluation_distributed = True - model_path = get_vgg16_baseline('./Inception-V3_Model') - prune.model = model_path - model = prune.fit() - stats, sparsity = model.report_sparsity() - logger.info(stats) - logger.info(sparsity) diff --git a/examples/tensorflow/image_recognition/inception_v3/pruning/magnitude/prune_inception_v3.yaml b/examples/tensorflow/image_recognition/inception_v3/pruning/magnitude/prune_inception_v3.yaml deleted file mode 100644 index 96360569634..00000000000 --- a/examples/tensorflow/image_recognition/inception_v3/pruning/magnitude/prune_inception_v3.yaml +++ /dev/null @@ -1,81 +0,0 @@ -# -# Copyright (c) 2021 Intel Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -model: - name: inception_v3 - framework: tensorflow - -pruning: - train: - dataloader: - batch_size: 128 - dataset: - ImageRecord: - root: /path/to/training/dataset # NOTE: modify to the ImageNet training set location - transform: - BilinearImagenet: - height: 299 - width: 299 - postprocess: - transform: - LabelShift: 1 - epoch: 40 - optimizer: - Adam: - learning_rate: 1e-06 - beta_1: 0.9 - beta_2: 0.999 - epsilon: 1e-07 - amsgrad: False - criterion: - SparseCategoricalCrossentropy: - reduction: sum_over_batch_size - from_logits: False - approach: - weight_compression: - initial_sparsity: 0.0 - target_sparsity: 0.54 - start_epoch: 0 - end_epoch: 19 - pruners: - - !Pruner - start_epoch: 0 - end_epoch: 19 - prune_type: basic_magnitude - -evaluation: # optional. required if user doesn't provide eval_func in neural_compressor.Quantization. - accuracy: # optional. required if user doesn't provide eval_func in neural_compressor.Quantization. - metric: - topk: 1 # built-in metrics are topk, map, f1, allow user to register new metric. - dataloader: - distributed: False - batch_size: 512 - dataset: - ImageRecord: - root: /path/to/validation/dataset # NOTE: modify to the ImageNet validation set location - transform: - BilinearImagenet: - height: 299 - width: 299 - postprocess: - transform: - LabelShift: 1 - -tuning: - accuracy_criterion: - relative: 0.01 # the tuning target of accuracy loss percentage: 1% - exit_policy: - timeout: 0 # tuning timeout (seconds) - random_seed: 9527 # random seed diff --git a/examples/tensorflow/image_recognition/inception_v3/pruning/magnitude/requirements.txt b/examples/tensorflow/image_recognition/inception_v3/pruning/magnitude/requirements.txt deleted file mode 100644 index d0f31da00c9..00000000000 --- a/examples/tensorflow/image_recognition/inception_v3/pruning/magnitude/requirements.txt +++ /dev/null @@ -1,2 +0,0 @@ -intel-tensorflow==2.7.0 -horovod==0.24.0 diff --git a/examples/tensorflow/image_recognition/resnet_v2/pruning/magnitude/README.md b/examples/tensorflow/image_recognition/resnet_v2/pruning/magnitude/README.md deleted file mode 100644 index 4720482a448..00000000000 --- a/examples/tensorflow/image_recognition/resnet_v2/pruning/magnitude/README.md +++ /dev/null @@ -1,46 +0,0 @@ -Step-by-Step -============ - -This document is used to list steps of reproducing Intel® Neural Compressor magnitude pruning feature. - - -## Prerequisite - -### 1. Installation -```shell -# Install Intel® Neural Compressor -pip install neural-compressor -``` -### 2. Install TensorFlow 2.10.0 or above. -```shell -pip install tensorflow==2.10.0 -``` -### 3. Train and save a ResNet-V2 model -According to the following link [Trains a ResNet on the CIFAR10 dataset.](https://keras.io/zh/examples/cifar10_resnet), set 'version = 2' and train a ResNet-V2 model as the baseline. Please add a line 'model.save("./ResNetV2_Model")' at the end of the code to save the model to the directory './ResNetV2_Model'. - -```python -...... -...... -# Score trained model. -scores = model.evaluate(x_test, y_test, verbose=1) -print('Test loss:', scores[0]) -print('Test accuracy:', scores[1]) -model.save("./ResNetV2_Model") # Add a line at the end - -``` -## Run command to prune the model -Run the command to get pruned model which overwritten and saved into './ResNetV2_Model'. -```shell -python main.py -``` -If you want to accelerate pruning with multi-node distributed training and evaluation, you only need to add a small amount of code and use horovod to run main.py. -As shown in main.py, uncomment two lines 'prune.train_distributed = True' and 'prune.evaluation_distributed = True' is all you need. -Use horovod to run main.py to get pruned model with multi-node distributed training and evaluation. -```shell -horovodrun -np -H python main.py -``` - -Run the command to get pruned model performance. -```shell -python benchmark.py -``` \ No newline at end of file diff --git a/examples/tensorflow/image_recognition/resnet_v2/pruning/magnitude/benchmark.py b/examples/tensorflow/image_recognition/resnet_v2/pruning/magnitude/benchmark.py deleted file mode 100644 index 4a92827015a..00000000000 --- a/examples/tensorflow/image_recognition/resnet_v2/pruning/magnitude/benchmark.py +++ /dev/null @@ -1,37 +0,0 @@ -import tensorflow -from tensorflow.keras.datasets import cifar10 - -from tensorflow import keras -import numpy as np - -num_classes = 10 -class EvalDataset(object): - def __init__(self, batch_size=100): - (x_train, y_train), (x_test, y_test) = cifar10.load_data() - - x_train = x_train.astype('float32') / 255 - x_test = x_test.astype('float32') / 255 - - # If subtract pixel mean is enabled - x_train_mean = np.mean(x_train, axis=0) - x_train -= x_train_mean - x_test -= x_train_mean - - # Convert class vectors to binary class matrices. - y_train = tensorflow.keras.utils.to_categorical(y_train, num_classes) - y_test = tensorflow.keras.utils.to_categorical(y_test, num_classes) - self.test_images = x_test - self.test_labels = y_test - - def __len__(self): - return len(self.test_images) - - def __getitem__(self, idx): - return self.test_images[idx], self.test_labels[idx] - - -from neural_compressor.experimental import Benchmark, common -evaluator = Benchmark('benchmark.yaml') -evaluator.model = common.Model('./baseline_model') -evaluator.b_dataloader = common.DataLoader(EvalDataset()) -evaluator('performance') diff --git a/examples/tensorflow/image_recognition/resnet_v2/pruning/magnitude/benchmark.yaml b/examples/tensorflow/image_recognition/resnet_v2/pruning/magnitude/benchmark.yaml deleted file mode 100644 index f58a5c6d404..00000000000 --- a/examples/tensorflow/image_recognition/resnet_v2/pruning/magnitude/benchmark.yaml +++ /dev/null @@ -1,27 +0,0 @@ -# -# Copyright (c) 2021 Intel Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -version: 1.0 - -model: # mandatory. used to specify model specific information. - name: resnet_v2_prune - framework: tensorflow # mandatory. supported values are tensorflow, pytorch, pytorch_ipex, onnxrt_integer, onnxrt_qlinear or mxnet; allow new framework backend extension. - -evaluation: # optional. required if user doesn't provide eval_func in neural_compressor.Quantization. - performance: # optional. used to benchmark performance of passing model. - iteration: 100 - configs: - cores_per_instance: 4 - num_of_instance: 7 \ No newline at end of file diff --git a/examples/tensorflow/image_recognition/resnet_v2/pruning/magnitude/main.py b/examples/tensorflow/image_recognition/resnet_v2/pruning/magnitude/main.py deleted file mode 100644 index 87aa5cb7a60..00000000000 --- a/examples/tensorflow/image_recognition/resnet_v2/pruning/magnitude/main.py +++ /dev/null @@ -1,278 +0,0 @@ -from __future__ import print_function -import tensorflow -from tensorflow.keras.layers import Dense, Conv2D, BatchNormalization, Activation -from tensorflow.keras.layers import AveragePooling2D, Input, Flatten -from tensorflow.keras.callbacks import LearningRateScheduler -from tensorflow.keras.callbacks import ReduceLROnPlateau -from tensorflow.keras.regularizers import l2 -from tensorflow.keras.models import Model -from tensorflow.keras.datasets import cifar10 -import numpy as np -import yaml - - -def lr_schedule(epoch): - """Learning Rate Schedule - - Learning rate is scheduled to be reduced after 80, 120, 160, 180 epochs. - Called automatically every epoch as part of callbacks during training. - - # Arguments - epoch (int): The number of epochs - - # Returns - lr (float32): learning rate - """ - lr = 1e-3 - if epoch > 180: - lr *= 0.5e-3 - elif epoch > 160: - lr *= 1e-3 - elif epoch > 120: - lr *= 1e-2 - elif epoch > 80: - lr *= 1e-1 - print('Learning rate: ', lr) - return lr - - -def resnet_layer(inputs, - num_filters=16, - kernel_size=3, - strides=1, - activation='relu', - batch_normalization=True, - conv_first=True): - """2D Convolution-Batch Normalization-Activation stack builder - - # Arguments - inputs (tensor): input tensor from input image or previous layer - num_filters (int): Conv2D number of filters - kernel_size (int): Conv2D square kernel dimensions - strides (int): Conv2D square stride dimensions - activation (string): activation name - batch_normalization (bool): whether to include batch normalization - conv_first (bool): conv-bn-activation (True) or - bn-activation-conv (False) - - # Returns - x (tensor): tensor as input to the next layer - """ - conv = Conv2D(num_filters, - kernel_size=kernel_size, - strides=strides, - padding='same', - use_bias=True, - kernel_initializer='he_normal', - kernel_regularizer=l2(1e-4)) - - x = inputs - if conv_first: - x = conv(x) - if batch_normalization: - x = BatchNormalization()(x) - if activation is not None: - x = Activation(activation)(x) - else: - if batch_normalization: - x = BatchNormalization()(x) - if activation is not None: - x = Activation(activation)(x) - x = conv(x) - return x - -def resnet_v2(input_shape, depth, num_classes=10): - """ResNet Version 2 Model builder [b] - - Stacks of (1 x 1)-(3 x 3)-(1 x 1) BN-ReLU-Conv2D or also known as - bottleneck layer - First shortcut connection per layer is 1 x 1 Conv2D. - Second and onwards shortcut connection is identity. - At the beginning of each stage, the feature map size is halved (downsampled) - by a convolutional layer with strides=2, while the number of filter maps is - doubled. Within each stage, the layers have the same number filters and the - same filter map sizes. - Features maps sizes: - conv1 : 32x32, 16 - stage 0: 32x32, 64 - stage 1: 16x16, 128 - stage 2: 8x8, 256 - - # Arguments - input_shape (tensor): shape of input image tensor - depth (int): number of core convolutional layers - num_classes (int): number of classes (CIFAR10 has 10) - - # Returns - model (Model): Keras model instance - """ - if (depth - 2) % 9 != 0: - raise ValueError('depth should be 9n+2 (eg 56 or 110 in [b])') - # Start model definition. - num_filters_in = 16 - num_res_blocks = int((depth - 2) / 9) - - inputs = Input(shape=input_shape) - # v2 performs Conv2D with BN-ReLU on input before splitting into 2 paths - x = resnet_layer(inputs=inputs, - num_filters=num_filters_in, - conv_first=True) - - # Instantiate the stack of residual units - for stage in range(3): - for res_block in range(num_res_blocks): - activation = 'relu' - batch_normalization = True - strides = 1 - if stage == 0: - num_filters_out = num_filters_in * 4 - if res_block == 0: # first layer and first stage - activation = None - batch_normalization = False - else: - num_filters_out = num_filters_in * 2 - if res_block == 0: # first layer but not first stage - strides = 2 # downsample - - # bottleneck residual unit - y = resnet_layer(inputs=x, - num_filters=num_filters_in, - kernel_size=1, - strides=strides, - activation=activation, - batch_normalization=batch_normalization, - conv_first=False) - y = resnet_layer(inputs=y, - num_filters=num_filters_in, - conv_first=False) - - y = resnet_layer(inputs=y, - num_filters=num_filters_out, - kernel_size=1, - conv_first=False) - if res_block == 0: - # linear projection residual shortcut connection to match - # changed dims - x = resnet_layer(inputs=x, - num_filters=num_filters_out, - kernel_size=1, - strides=strides, - activation=None, - batch_normalization=False) - x = tensorflow.keras.layers.add([x, y]) - - num_filters_in = num_filters_out - - # Add classifier on top. - # v2 has BN-ReLU before Pooling - x = BatchNormalization()(x) - x = Activation('relu')(x) - x = AveragePooling2D(pool_size=8)(x) - y = Flatten()(x) - outputs = Dense(num_classes, - activation='softmax', - kernel_initializer='he_normal')(y) - - # Instantiate model. - model = Model(inputs=inputs, outputs=outputs) - return model - -# Training parameters -batch_size = 32 # orig paper trained all networks with batch_size=128 -epochs = 2 -num_classes = 10 - -# Subtracting pixel mean improves accuracy -subtract_pixel_mean = True - -# Model parameter -# ---------------------------------------------------------------------------- -# | | 200-epoch | Orig Paper| 200-epoch | Orig Paper| sec/epoch -# Model | n | ResNet v1 | ResNet v1 | ResNet v2 | ResNet v2 | GTX1080Ti -# |v1(v2)| %Accuracy | %Accuracy | %Accuracy | %Accuracy | v1 (v2) -# ---------------------------------------------------------------------------- -# ResNet20 | 3 (2)| 92.16 | 91.25 | ----- | ----- | 35 (---) -# ResNet32 | 5(NA)| 92.46 | 92.49 | NA | NA | 50 ( NA) -# ResNet44 | 7(NA)| 92.50 | 92.83 | NA | NA | 70 ( NA) -# ResNet56 | 9 (6)| 92.71 | 93.03 | 93.01 | NA | 90 (100) -# ResNet110 |18(12)| 92.65 | 93.39+-.16| 93.15 | 93.63 | 165(180) -# ResNet164 |27(18)| ----- | 94.07 | ----- | 94.54 | ---(---) -# ResNet1001| (111)| ----- | 92.39 | ----- | 95.08+-.14| ---(---) -# --------------------------------------------------------------------------- -n = 3 -depth = n * 9 + 2 - -class EvalDataset(object): - def __init__(self, batch_size=100): - (x_train, y_train), (x_test, y_test) = cifar10.load_data() - - x_train = x_train.astype('float32') / 255 - x_test = x_test.astype('float32') / 255 - - # If subtract pixel mean is enabled - x_train_mean = np.mean(x_train, axis=0) - x_train -= x_train_mean - x_test -= x_train_mean - - print('x_train shape:', x_train.shape) - print(x_train.shape[0], 'train samples') - print(x_test.shape[0], 'test samples') - print('y_train shape:', y_train.shape) - - # Convert class vectors to binary class matrices. - y_train = tensorflow.keras.utils.to_categorical(y_train, num_classes) - y_test = tensorflow.keras.utils.to_categorical(y_test, num_classes) - self.test_images = x_test - self.test_labels = y_test - - def __len__(self): - return len(self.test_images) - - def __getitem__(self, idx): - return self.test_images[idx], self.test_labels[idx] - -class TrainDataset(object): - def __init__(self, batch_size=100): - (x_train, y_train), (x_test, y_test) = cifar10.load_data() - - x_train = x_train.astype('float32') / 255 - x_test = x_test.astype('float32') / 255 - - # If subtract pixel mean is enabled - x_train_mean = np.mean(x_train, axis=0) - x_train -= x_train_mean - x_test -= x_train_mean - - print('x_train shape:', x_train.shape) - print(x_train.shape[0], 'train samples') - print(x_test.shape[0], 'test samples') - print('y_train shape:', y_train.shape) - - # Convert class vectors to binary class matrices. - y_train = tensorflow.keras.utils.to_categorical(y_train, num_classes) - y_test = tensorflow.keras.utils.to_categorical(y_test, num_classes) - self.test_images = x_test - self.test_labels = y_test - self.train_images = x_train - self.train_labels = y_train - - def __len__(self): - return len(self.train_images) - - def __getitem__(self, idx): - return self.train_images[idx], self.train_labels[idx] - -if __name__ == '__main__': - from neural_compressor.experimental import Pruning, common - from neural_compressor.utils import logger - prune = Pruning("./prune.yaml") - # prune.train_distributed = True - # prune.evaluation_distributed = True - prune.train_dataloader = common.DataLoader(TrainDataset(), batch_size=32) - prune.eval_dataloader = common.DataLoader(EvalDataset(), batch_size=32) - prune.model = './baseline_model' - model = prune.fit() - stats, sparsity = model.report_sparsity() - logger.info(stats) - logger.info(sparsity) - diff --git a/examples/tensorflow/image_recognition/resnet_v2/pruning/magnitude/prune.yaml b/examples/tensorflow/image_recognition/resnet_v2/pruning/magnitude/prune.yaml deleted file mode 100644 index 625ba3059ac..00000000000 --- a/examples/tensorflow/image_recognition/resnet_v2/pruning/magnitude/prune.yaml +++ /dev/null @@ -1,53 +0,0 @@ -# -# Copyright (c) 2021 Intel Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -model: - name: resnet_v2_prune - framework: tensorflow - -pruning: - train: - epoch: 8 - optimizer: - SGD: - learning_rate: 0.001 - momentum: 0.9 - nesterov: True - criterion: - CrossEntropyLoss: - reduction: sum_over_batch_size - approach: - weight_compression: - initial_sparsity: 0.0 - target_sparsity: 0.25 - start_epoch: 0 - end_epoch: 7 - pruners: - - !Pruner - start_epoch: 0 - end_epoch: 7 - prune_type: basic_magnitude - -evaluation: - accuracy: - metric: - topk: 1 - -tuning: - accuracy_criterion: - relative: 0.01 # the tuning target of accuracy loss percentage: 1% - exit_policy: - timeout: 0 # tuning timeout (seconds) - random_seed: 9527 # random seed diff --git a/examples/tensorflow/image_recognition/resnet_v2/quantization/qat/README.md b/examples/tensorflow/image_recognition/resnet_v2/quantization/qat/README.md deleted file mode 100644 index 8dc19e23fcd..00000000000 --- a/examples/tensorflow/image_recognition/resnet_v2/quantization/qat/README.md +++ /dev/null @@ -1,26 +0,0 @@ -Step-by-Step -============ - -This document is used to list steps of reproducing Intel® Neural Compressor QAT feature. - - -## Prerequisite - -### 1. Installation -```shell -# Install Intel® Neural Compressor -pip install neural-compressor -``` -### 2. Install Intel Tensorflow 2.4.0 or above. -```shell -pip install intel-tensorflow==2.4.0 -``` -### 3. Install tensorflow_model_optimization -```shell -pip install tensorflow_model_optimization -``` -## Run Command -```shell -python resnet_v2.py # to get the quantized ResNet-V2 model which will be saved into './trained_qat_model'. -``` - diff --git a/examples/tensorflow/image_recognition/resnet_v2/quantization/qat/resnet_v2.py b/examples/tensorflow/image_recognition/resnet_v2/quantization/qat/resnet_v2.py deleted file mode 100644 index c4e45ce5017..00000000000 --- a/examples/tensorflow/image_recognition/resnet_v2/quantization/qat/resnet_v2.py +++ /dev/null @@ -1,353 +0,0 @@ -from __future__ import print_function -import tensorflow -from tensorflow.keras.layers import Dense, Conv2D, BatchNormalization, Activation -from tensorflow.keras.layers import AveragePooling2D, Input, Flatten -from tensorflow.keras.callbacks import LearningRateScheduler -from tensorflow.keras.callbacks import ReduceLROnPlateau -from tensorflow.keras.regularizers import l2 -from tensorflow.keras.models import Model -from tensorflow.keras.datasets import cifar10 -import numpy as np - - -def lr_schedule(epoch): - """Learning Rate Schedule - - Learning rate is scheduled to be reduced after 80, 120, 160, 180 epochs. - Called automatically every epoch as part of callbacks during training. - - # Arguments - epoch (int): The number of epochs - - # Returns - lr (float32): learning rate - """ - lr = 1e-3 - if epoch > 180: - lr *= 0.5e-3 - elif epoch > 160: - lr *= 1e-3 - elif epoch > 120: - lr *= 1e-2 - elif epoch > 80: - lr *= 1e-1 - print('Learning rate: ', lr) - return lr - - -def resnet_layer(inputs, - num_filters=16, - kernel_size=3, - strides=1, - activation='relu', - batch_normalization=True, - conv_first=True): - """2D Convolution-Batch Normalization-Activation stack builder - - # Arguments - inputs (tensor): input tensor from input image or previous layer - num_filters (int): Conv2D number of filters - kernel_size (int): Conv2D square kernel dimensions - strides (int): Conv2D square stride dimensions - activation (string): activation name - batch_normalization (bool): whether to include batch normalization - conv_first (bool): conv-bn-activation (True) or - bn-activation-conv (False) - - # Returns - x (tensor): tensor as input to the next layer - """ - conv = Conv2D(num_filters, - kernel_size=kernel_size, - strides=strides, - padding='same', - use_bias=True, - kernel_initializer='he_normal', - kernel_regularizer=l2(1e-4)) - - x = inputs - if conv_first: - x = conv(x) - if batch_normalization: - x = BatchNormalization()(x) - if activation is not None: - x = Activation(activation)(x) - else: - # if batch_normalization: - # x = BatchNormalization()(x) - if activation is not None: - x = Activation(activation)(x) - x = conv(x) - return x - -def resnet_v2(input_shape, depth, num_classes=10): - """ResNet Version 2 Model builder [b] - - Stacks of (1 x 1)-(3 x 3)-(1 x 1) BN-ReLU-Conv2D or also known as - bottleneck layer - First shortcut connection per layer is 1 x 1 Conv2D. - Second and onwards shortcut connection is identity. - At the beginning of each stage, the feature map size is halved (downsampled) - by a convolutional layer with strides=2, while the number of filter maps is - doubled. Within each stage, the layers have the same number filters and the - same filter map sizes. - Features maps sizes: - conv1 : 32x32, 16 - stage 0: 32x32, 64 - stage 1: 16x16, 128 - stage 2: 8x8, 256 - - # Arguments - input_shape (tensor): shape of input image tensor - depth (int): number of core convolutional layers - num_classes (int): number of classes (CIFAR10 has 10) - - # Returns - model (Model): Keras model instance - """ - if (depth - 2) % 9 != 0: - raise ValueError('depth should be 9n+2 (eg 56 or 110 in [b])') - # Start model definition. - num_filters_in = 16 - num_res_blocks = int((depth - 2) / 9) - - inputs = Input(shape=input_shape) - # v2 performs Conv2D with BN-ReLU on input before splitting into 2 paths - x = resnet_layer(inputs=inputs, - num_filters=num_filters_in, - conv_first=True) - - # Instantiate the stack of residual units - for stage in range(3): - for res_block in range(num_res_blocks): - activation = 'relu' - batch_normalization = True - strides = 1 - if stage == 0: - num_filters_out = num_filters_in * 4 - if res_block == 0: # first layer and first stage - activation = None - batch_normalization = False - else: - num_filters_out = num_filters_in * 2 - if res_block == 0: # first layer but not first stage - strides = 2 # downsample - - # bottleneck residual unit - y = resnet_layer(inputs=x, - num_filters=num_filters_in, - kernel_size=1, - strides=strides, - activation=activation, - batch_normalization=batch_normalization, - conv_first=False) - y = resnet_layer(inputs=y, - num_filters=num_filters_in, - conv_first=False) - - y = resnet_layer(inputs=y, - num_filters=num_filters_out, - kernel_size=1, - conv_first=False) - if res_block == 0: - # linear projection residual shortcut connection to match - # changed dims - x = resnet_layer(inputs=x, - num_filters=num_filters_out, - kernel_size=1, - strides=strides, - activation=None, - batch_normalization=False) - x = tensorflow.keras.layers.add([x, y]) - - num_filters_in = num_filters_out - - # Add classifier on top. - # v2 has BN-ReLU before Pooling - # x = BatchNormalization()(x) - x = Activation('relu')(x) - x = AveragePooling2D(pool_size=8)(x) - y = Flatten()(x) - outputs = Dense(num_classes, - activation='softmax', - kernel_initializer='he_normal')(y) - - # Instantiate model. - model = Model(inputs=inputs, outputs=outputs) - return model - - -# Training parameters -batch_size = 32 # orig paper trained all networks with batch_size=128 -epochs = 2 -num_classes = 10 - -# Subtracting pixel mean improves accuracy -subtract_pixel_mean = True - -# Model parameter -# ---------------------------------------------------------------------------- -# | | 200-epoch | Orig Paper| 200-epoch | Orig Paper| sec/epoch -# Model | n | ResNet v1 | ResNet v1 | ResNet v2 | ResNet v2 | GTX1080Ti -# |v1(v2)| %Accuracy | %Accuracy | %Accuracy | %Accuracy | v1 (v2) -# ---------------------------------------------------------------------------- -# ResNet20 | 3 (2)| 92.16 | 91.25 | ----- | ----- | 35 (---) -# ResNet32 | 5(NA)| 92.46 | 92.49 | NA | NA | 50 ( NA) -# ResNet44 | 7(NA)| 92.50 | 92.83 | NA | NA | 70 ( NA) -# ResNet56 | 9 (6)| 92.71 | 93.03 | 93.01 | NA | 90 (100) -# ResNet110 |18(12)| 92.65 | 93.39+-.16| 93.15 | 93.63 | 165(180) -# ResNet164 |27(18)| ----- | 94.07 | ----- | 94.54 | ---(---) -# ResNet1001| (111)| ----- | 92.39 | ----- | 95.08+-.14| ---(---) -# --------------------------------------------------------------------------- -n = 3 - -depth = n * 9 + 2 - -def train(): - # Load the CIFAR10 data. - (x_train, y_train), (x_test, y_test) = cifar10.load_data() - - # Input image dimensions. - input_shape = x_train.shape[1:] - # Normalize data. - x_train = x_train.astype('float32') / 255 - x_test = x_test.astype('float32') / 255 - - x_train_mean = np.mean(x_train, axis=0) - x_train -= x_train_mean - x_test -= x_train_mean - - print('x_train shape:', x_train.shape) - print(x_train.shape[0], 'train samples') - print(x_test.shape[0], 'test samples') - print('y_train shape:', y_train.shape) - - # Convert class vectors to binary class matrices. - y_train = tensorflow.keras.utils.to_categorical(y_train, num_classes) - y_test = tensorflow.keras.utils.to_categorical(y_test, num_classes) - - model = resnet_v2(input_shape=input_shape, depth=depth) - - model.compile(loss='categorical_crossentropy', - optimizer=tensorflow.keras.optimizers.Adam(learning_rate=0.01), - metrics=['accuracy']) - model.summary() - - lr_scheduler = LearningRateScheduler(lr_schedule) - - lr_reducer = ReduceLROnPlateau(factor=np.sqrt(0.1), - cooldown=0, - patience=5, - min_lr=0.5e-6) - - callbacks = [lr_reducer, lr_scheduler] - - # Run training, with or without data augmentation. - model.fit(x_train, y_train, - batch_size=batch_size, - epochs=epochs, - validation_data=(x_test, y_test), - shuffle=True, - callbacks=callbacks) - - - # Score trained model. - scores = model.evaluate(x_test, y_test, verbose=1) - print('Test loss:', scores[0]) - print('Test accuracy:', scores[1]) - model.save("baseline_model") - - -def q_func(compression_manager, model): - # Load the CIFAR10 data. - (x_train, y_train), (x_test, y_test) = cifar10.load_data() - - # Input image dimensions. - input_shape = x_train.shape[1:] - - # Normalize data. - x_train = x_train.astype('float32') / 255 - x_test = x_test.astype('float32') / 255 - - # If subtract pixel mean is enabled - x_train_mean = np.mean(x_train, axis=0) - x_train -= x_train_mean - x_test -= x_train_mean - - print('x_train shape:', x_train.shape) - print(x_train.shape[0], 'train samples') - print(x_test.shape[0], 'test samples') - print('y_train shape:', y_train.shape) - - # Convert class vectors to binary class matrices. - y_train = tensorflow.keras.utils.to_categorical(y_train, num_classes) - y_test = tensorflow.keras.utils.to_categorical(y_test, num_classes) - - model = tensorflow.keras.models.load_model("baseline_model") - - import tensorflow_model_optimization as tfmot - quantize_model = tfmot.quantization.keras.quantize_model - - # q_aware stands for for quantization aware. - q_aware_model = quantize_model(model) - - # `quantize_model` requires a recompile. - q_aware_model.compile(loss='categorical_crossentropy', - optimizer=tensorflow.keras.optimizers.Adam(learning_rate=0.001), - metrics=['accuracy']) - # q_aware_model.summary() - - train_images_subset = x_train[0:1000] # out of 60000 - train_labels_subset = y_train[0:1000] - - q_aware_model.fit(train_images_subset, train_labels_subset, - batch_size=500, epochs=1, - validation_data=(x_test, y_test)) - - _, q_aware_model_accuracy = q_aware_model.evaluate( - x_test, y_test, verbose=0) - - print('Quant test accuracy:', q_aware_model_accuracy) - - return q_aware_model - -class Dataset(object): - def __init__(self, batch_size=500): - (x_train, y_train), (x_test, y_test) = cifar10.load_data() - - x_train = x_train.astype('float32') / 255 - x_test = x_test.astype('float32') / 255 - - # If subtract pixel mean is enabled - x_train_mean = np.mean(x_train, axis=0) - x_train -= x_train_mean - x_test -= x_train_mean - - print('x_train shape:', x_train.shape) - print(x_train.shape[0], 'train samples') - print(x_test.shape[0], 'test samples') - print('y_train shape:', y_train.shape) - - # Convert class vectors to binary class matrices. - y_train = tensorflow.keras.utils.to_categorical(y_train, num_classes) - y_test = tensorflow.keras.utils.to_categorical(y_test, num_classes) - self.test_images = x_test - self.test_labels = y_test - - def __len__(self): - return len(self.test_images) - - def __getitem__(self, idx): - return self.test_images[idx], self.test_labels[idx] - -if __name__ == '__main__': - train() - - from neural_compressor.training import prepare_compression - from neural_compressor.config import QuantizationAwareTrainingConfig - conf = QuantizationAwareTrainingConfig(backend="tensorflow") - compression_manager = prepare_compression('./baseline_model', conf) - compression_manager.callbacks.on_train_begin() - q_aware_model = q_func(compression_manager, compression_manager.model) - q_aware_model.save("trained_qat_model") - compression_manager.callbacks.on_train_end() diff --git a/examples/tensorflow/image_recognition/tensorflow_models/distillation/README.md b/examples/tensorflow/image_recognition/tensorflow_models/distillation/README.md deleted file mode 100644 index 54feef51643..00000000000 --- a/examples/tensorflow/image_recognition/tensorflow_models/distillation/README.md +++ /dev/null @@ -1,6 +0,0 @@ -Details **TBD** -Please update conf.yaml with /PATH/TO/ImageNet -### Run pretraining -```shell -bash run_distillation.sh --topology=mobilenet --teacher=densenet201 --config=conf.yaml --output_model=path/to/output_model -``` diff --git a/examples/tensorflow/image_recognition/tensorflow_models/distillation/conf.yaml b/examples/tensorflow/image_recognition/tensorflow_models/distillation/conf.yaml deleted file mode 100644 index df01b53d927..00000000000 --- a/examples/tensorflow/image_recognition/tensorflow_models/distillation/conf.yaml +++ /dev/null @@ -1,70 +0,0 @@ -# -# Copyright (c) 2021 Intel Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -version: 1.0 - -model: - name: imagenet_distillation - framework: tensorflow - -distillation: - train: - start_epoch: 0 - end_epoch: 90 - iteration: 1000 - frequency: 1 - dataloader: - batch_size: 64 - dataset: - ImageFolder: - root: /path/to/dataset - transform: - Resize: - size: 224 - interpolation: nearest - KerasRescale: - rescale: [127.5, 1] - optimizer: - SGD: - learning_rate: 0.001 - momentum: 0.1 - nesterov: True - weight_decay: 0.001 - criterion: - KnowledgeDistillationLoss: - temperature: 1.0 - loss_types: ['CE', 'CE'] - loss_weights: [0.5, 0.5] - -evaluation: # optional. required if user doesn't provide eval_func in neural_compressor.Quantization. - accuracy: # optional. required if user doesn't provide eval_func in neural_compressor.Quantization. - metric: - topk: 1 # built-in metrics are topk, map, f1, allow user to register new metric. - dataloader: - batch_size: 64 - dataset: - ImageFolder: - root: /path/to/dataset - transform: - Resize: - size: 224 - interpolation: nearest - KerasRescale: - rescale: [127.5, 1] -tuning: - accuracy_criterion: - relative: 0.01 # the tuning target of accuracy loss percentage: 1% - exit_policy: - timeout: 0 # tuning timeout (seconds) - random_seed: 9527 # random seed \ No newline at end of file diff --git a/examples/tensorflow/image_recognition/tensorflow_models/distillation/main.py b/examples/tensorflow/image_recognition/tensorflow_models/distillation/main.py deleted file mode 100644 index 152be10f535..00000000000 --- a/examples/tensorflow/image_recognition/tensorflow_models/distillation/main.py +++ /dev/null @@ -1,90 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- -# -# Copyright (c) 2021 Intel Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import argparse -import os -import logging -import random -import shutil -import warnings -import tensorflow as tf -from neural_compressor.utils import logger -model_names = ['mobilenet','densenet201'] - -parser = argparse.ArgumentParser(description='Tensorflow ImageNet Training') -parser.add_argument('-t', '--topology', metavar='ARCH', default='resnet18', - choices=model_names, - help='model architecture: ' + - ' | '.join(model_names) + - ' (default: resnet18)') -parser.add_argument('--teacher', metavar='ARCH', default='resnet50', - choices=model_names, - help='model architecture: ' + - ' | '.join(model_names) + - ' (default: resnet50)') -parser.add_argument('-b', '--batch-size', default=256, type=int, - metavar='N', - help='mini-batch size (default: 256), this is the total ' - 'batch size of all GPUs on the current node when ' - 'using Data Parallel or Distributed Data Parallel') -parser.add_argument('--resume', default='', type=str, metavar='PATH', - help='path to latest checkpoint (default: none)') -parser.add_argument('--distillation', dest='distillation', action='store_true', - help='distillation model on training dataset') -parser.add_argument('--pretrained', dest='pretrained', action='store_true', - help='use pre-trained model') -parser.add_argument('--seed', default=None, type=int, - help='seed for initializing training. ') -parser.add_argument("--config", default=None, help="tuning config") -parser.add_argument("--output-model", default=None, help="output path", type=str) - -best_acc1 = 0 - - -def main(): - args = parser.parse_args() - - if args.seed is not None: - random.seed(args.seed) - - main_worker(args) - - -def main_worker(args): - global best_acc1 - - print("=> using pre-trained model '{}'".format(args.topology)) - model = tf.keras.applications.MobileNet(weights='imagenet') - - print("=> using pre-trained teacher model '{}'".format(args.teacher)) - teacher_model = tf.keras.applications.DenseNet201(weights='imagenet') - - if args.distillation: - from neural_compressor.experimental import Distillation - distiller = Distillation(args.config) - - distiller.model = model - distiller.teacher_model = teacher_model - model = distiller.fit() - model.save(args.output_model) - return - - - - -if __name__ == '__main__': - main() diff --git a/examples/tensorflow/image_recognition/tensorflow_models/distillation/run_distillation.sh b/examples/tensorflow/image_recognition/tensorflow_models/distillation/run_distillation.sh deleted file mode 100644 index 30f804218b2..00000000000 --- a/examples/tensorflow/image_recognition/tensorflow_models/distillation/run_distillation.sh +++ /dev/null @@ -1,45 +0,0 @@ -#!/bin/bash -set -x - -function main { - init_params "$@" - run_distillation - -} - -# init params -function init_params { - - for var in "$@" - do - case $var in - --topology=*) - topology=$(echo $var |cut -f2 -d=) - ;; - --teacher=*) - teacher=$(echo $var |cut -f2 -d=) - ;; - --config=*) - config=$(echo $var |cut -f2 -d=) - ;; - --output_model=*) - output_model=$(echo $var |cut -f2 -d=) - ;; - - esac - done - -} - -# run_tuning -function run_distillation { - python main.py \ - --topology=${topology} \ - --teacher=${teacher} \ - --distillation \ - --config=${config} \ - --pretrained \ - --output-model=${output_model} -} - -main "$@" diff --git a/examples/tensorflow/image_recognition/tensorflow_models/quantization/ptq/README.md b/examples/tensorflow/image_recognition/tensorflow_models/quantization/ptq/README.md deleted file mode 100644 index 1f1ca4254cd..00000000000 --- a/examples/tensorflow/image_recognition/tensorflow_models/quantization/ptq/README.md +++ /dev/null @@ -1,454 +0,0 @@ -Step-by-Step -============ - -This document list steps of reproducing Intel Optimized TensorFlow image recognition models tuning results via Neural Compressor. -This example can run on Intel CPUs and GPUs. - -> **Note**: -> Most of those models are both supported in Intel optimized TF 1.15.x and Intel optimized TF 2.x. -> [Version support](../../../../../../README.md#supported-frameworks) -# Prerequisite - -### 1. Installation -Recommend python 3.6 or higher version. - -```shell -# Install Intel® Neural Compressor -pip install neural-compressor -``` - -### 2. Install Intel Tensorflow -```shell -pip install intel-tensorflow -``` -> Note: Supported Tensorflow [Version](../../../../../../README.md#supported-frameworks). - -### 3. Installation Dependency packages -```shell -cd examples/tensorflow/object_detection/tensorflow_models/quantization/ptq -pip install -r requirements.txt -``` - -### 4. Install Intel Extension for Tensorflow -#### Quantizing the model on Intel GPU -Intel Extension for Tensorflow is mandatory to be installed for quantizing the model on Intel GPUs. - -```shell -pip install --upgrade intel-extension-for-tensorflow[gpu] -``` -For any more details, please follow the procedure in [install-gpu-drivers](https://github.com/intel-innersource/frameworks.ai.infrastructure.intel-extension-for-tensorflow.intel-extension-for-tensorflow/blob/master/docs/install/install_for_gpu.md#install-gpu-drivers) - -#### Quantizing the model on Intel CPU(Experimental) -Intel Extension for Tensorflow for Intel CPUs is experimental currently. It's not mandatory for quantizing the model on Intel CPUs. - -```shell -pip install --upgrade intel-extension-for-tensorflow[cpu] -``` - -### 5. Prepare Dataset - - TensorFlow [models](https://github.com/tensorflow/models) repo provides [scripts and instructions](https://github.com/tensorflow/models/tree/master/research/slim#an-automated-script-for-processing-imagenet-data) to download, process and convert the ImageNet dataset to the TF records format. - We also prepared related scripts in `imagenet_prepare` directory. To download the raw images, the user must create an account with image-net.org. If you have downloaded the raw data and preprocessed the validation data by moving the images into the appropriate sub-directory based on the label (synset) of the image. we can use below command ro convert it to tf records format. - - ```shell - cd examples/tensorflow/image_recognition/tensorflow_models/quantization/ptq - # convert validation subset - bash prepare_dataset.sh --output_dir=./data --raw_dir=/PATH/TO/img_raw/val/ --subset=validation - # convert train subset - bash prepare_dataset.sh --output_dir=./data --raw_dir=/PATH/TO/img_raw/train/ --subset=train - ``` - - > EfficientNet-b0 model use ImageNet raw and caffe label. Please download caffe label. - ```shell - cd /PATH/TO/img_raw/val/ - wget http://dl.caffe.berkeleyvision.org/caffe_ilsvrc12.tar.gz - tar -xvf caffe_ilsvrc12.tar.gz - ``` - -### 6. Prepare pre-trained model - In this version, Intel® Neural Compressor just support PB file as input for TensorFlow backend, so we need prepared model pre-trained pb files. For some models pre-trained pb can be found in [IntelAI Models](https://github.com/IntelAI/models/tree/v1.6.0/benchmarks#tensorflow-use-cases), we can found the download link in README file of each model. And for others models in Google [models](https://github.com/tensorflow/models/tree/master/research/slim#pre-trained-models), we can get the pb files by convert the checkpoint files. We will give a example with Inception_v1 to show how to get the pb file by a checkpoint file. - - 1. Download the checkpoint file from [here](https://github.com/tensorflow/models/tree/master/research/slim#pre-trained-models) - ```shell - wget http://download.tensorflow.org/models/inception_v1_2016_08_28.tar.gz - tar -xvf inception_v1_2016_08_28.tar.gz - ``` - - 2. Exporting the Inference Graph - ```shell - git clone https://github.com/tensorflow/models - cd models/research/slim - python export_inference_graph.py \ - --alsologtostderr \ - --model_name=inception_v1 \ - --output_file=/tmp/inception_v1_inf_graph.pb - ``` - Make sure to use intel-tensorflow v1.15, and pip install tf_slim. - #### Install Intel Tensorflow 1.15 up2 - Check your python version and use pip install 1.15.0 up2 from links below: - https://storage.googleapis.com/intel-optimized-tensorflow/intel_tensorflow-1.15.0up2-cp36-cp36m-manylinux2010_x86_64.whl - https://storage.googleapis.com/intel-optimized-tensorflow/intel_tensorflow-1.15.0up2-cp37-cp37m-manylinux2010_x86_64.whl - https://storage.googleapis.com/intel-optimized-tensorflow/intel_tensorflow-1.15.0up2-cp35-cp35m-manylinux2010_x86_64.whl - > Please note: The ImageNet dataset has 1001, the **VGG** and **ResNet V1** final layers have only 1000 outputs rather than 1001. So we need add the `--labels_offset=1` flag in the inference graph exporting command. - - 3. Use [Netron](https://lutzroeder.github.io/netron/) to get the input/output layer name of inference graph pb, for Inception_v1 the output layer name is `InceptionV1/Logits/Predictions/Reshape_1` - - 4. Freezing the exported Graph, please use the tool `freeze_graph.py` in [tensorflow v1.15.2](https://github.com/tensorflow/tensorflow/blob/v1.15.2/tensorflow/python/tools/freeze_graph.py) repo - ```shell - python freeze_graph.py \ - --input_graph=/tmp/inception_v1_inf_graph.pb \ - --input_checkpoint=./inception_v1.ckpt \ - --input_binary=true \ - --output_graph=./frozen_inception_v1.pb \ - --output_node_names=InceptionV1/Logits/Predictions/Reshape_1 - ``` - -# Run - -> *Note*: -> The model name with `*` means it comes from [models](https://github.com/tensorflow/models/tree/master/research/slim#pre-trained-models), please follow the step [Prepare pre-trained model](#3-prepare-pre-trained-model) to get the pb files. -> The densenet-series comes from [tensorflow-densenet](https://github.com/pudae/tensorflow-densenet), please also follow the step [Prepare pre-trained model](#3-prepare-pre-trained-model) to get the pb files or use openvino download tools. - ```shell - git clone https://github.com/openvinotoolkit/open_model_zoo.git - cd open_model_zoo/tools/downloader - git checkout tags/2021.2 - pip install -r requirements.in - python downloader.py --name densenet-{121|161|169}-tf -o /PATH/TO/MODEL - ``` - -### 1. ResNet50 V1.0 - - Download pre-trained PB - ```shell - wget https://storage.googleapis.com/intel-optimized-tensorflow/models/v1_6/resnet50_fp32_pretrained_model.pb - ``` - - ```shell - cd examples/tensorflow/image_recognition/tensorflow_models/quantization/ptq - bash run_tuning.sh --config=resnet50_v1.yaml \ - --input_model=/PATH/TO/resnet50_fp32_pretrained_model.pb \ - --output_model=./nc_resnet50_v1.pb - ``` - -### 2. ResNet50 V1.5 - - Download pre-trained PB - ```shell - wget https://zenodo.org/record/2535873/files/resnet50_v1.pb - ``` - - ```shell - cd examples/tensorflow/image_recognition/tensorflow_models/quantization/ptq - bash run_tuning.sh --config=resnet50_v1_5.yaml \ - --input_model=/PATH/TO/resnet50_v1.pb --output_model=./nc_resnet50_v15.pb - ``` - -### 3. ResNet101 - - Download pre-trained PB - ```shell - wget https://storage.googleapis.com/intel-optimized-tensorflow/models/v1_6/resnet101_fp32_pretrained_model.pb - ``` - - ```shell - cd examples/tensorflow/image_recognition/tensorflow_models/quantization/ptq - bash run_tuning.sh --config=resnet101.yaml \ - --input_model=/PATH/TO/resnet101_fp32_pretrained_model.pb \ - --output_model=./nc_resnet101.pb - ``` - -### 4. MobileNet V1 - - Download pre-trained PB - ```shell - wget https://storage.googleapis.com/intel-optimized-tensorflow/models/v1_6/mobilenet_v1_1.0_224_frozen.pb - ``` - - ```shell - cd examples/tensorflow/image_recognition/tensorflow_models/quantization/ptq - bash run_tuning.sh --config=mobilenet_v1.yaml \ - --input_model=/PATH/TO/mobilenet_v1_1.0_224_frozen.pb \ - --output_model=./nc_mobilenetv1.pb - ``` - -### 5. MobileNet V2* - - ```shell - cd examples/tensorflow/image_recognition/tensorflow_models/quantization/ptq - bash run_tuning.sh --config=mobilenet_v2.yaml \ - --input_model=/PATH/TO/frozen_mobilenet_v2.pb \ - --output_model=./nc_mobilenetv2.pb - ``` - -### 6. Inception V1* - - ```shell - cd examples/tensorflow/image_recognition/tensorflow_models/quantization/ptq - bash run_tuning.sh --config=inception_v1.yaml \ - --input_model=/PATH/TO/frozen_inception_v1.pb \ - --output_model=./nc_inceptionv1.pb - ``` - -### 7. Inception V2* - - ```shell - cd examples/tensorflow/image_recognition/tensorflow_models/quantization/ptq - bash run_tuning.sh --config=inception_v2.yaml \ - --input_model=/PATH/TO/frozen_inception_v2.pb \ - --output_model=./nc_inceptionv2.pb - ``` - -### 8. Inception V3 - - Download pre-trained PB - ```shell - wget https://storage.googleapis.com/intel-optimized-tensorflow/models/v1_6/inceptionv3_fp32_pretrained_model.pb - ``` - - ```shell - cd examples/tensorflow/image_recognition/tensorflow_models/quantization/ptq - bash run_tuning.sh --config=inception_v3.yaml \ - --input_model=/PATH/TO/inceptionv3_fp32_pretrained_model.pb \ - --output_model=./nc_inceptionv3.pb - ``` - -### 9. Inception V4 - - Download pre-trained PB - ```shell - wget https://storage.googleapis.com/intel-optimized-tensorflow/models/v1_6/inceptionv4_fp32_pretrained_model.pb - ``` - - ```shell - cd examples/tensorflow/image_recognition/tensorflow_models/quantization/ptq - bash run_tuning.sh --config=inception_v4.yaml \ - --input_model=/PATH/TO/inceptionv4_fp32_pretrained_model.pb \ - --output_model=./nc_inceptionv4.pb - ``` - -### 10. Inception ResNet V2* - - ```shell - cd examples/tensorflow/image_recognition/tensorflow_models/quantization/ptq - bash run_tuning.sh --config=inception_resnet_v2.yaml \ - --input_model=/PATH/TO/frozen_inception_resnet_v2.pb \ - --output_model=./nc_irv2.pb - ``` - -### 11. VGG 16* - - ```shell - cd examples/tensorflow/image_recognition/tensorflow_models/quantization/ptq - bash run_tuning.sh --config=vgg16.yaml \ - --input_model=/PATH/TO/frozen_vgg16.pb --output_model=./nc_vgg16.pb - ``` - -### 12. VGG 19* - - ```shell - cd examples/tensorflow/image_recognition/tensorflow_models/quantization/ptq - bash run_tuning.sh --config=vgg19.yaml \ - --input_model=/PATH/TO/frozen_vgg19.pb --output_model=./nc_vgg19.pb - ``` - -### 13. ResNet v2 50 - - ```shell - cd examples/tensorflow/image_recognition/tensorflow_models/quantization/ptq - bash run_tuning.sh --config=resnet_v2_50.yaml \ - --input_model=/PATH/TO/frozen_resnet50v2_50.pb --output_model=./nc_resnetv2_50.pb - ``` - -### 14. ResNet v2 101 - - ```shell - cd examples/tensorflow/image_recognition/tensorflow_models/quantization/ptq - bash run_tuning.sh --config=resnet_v2_101.yaml \ - --input_model=/PATH/TO/frozen_resnetv2_101.pb --output_model=./nc_resnetv2_101.pb - ``` - -### 15. ResNet v2 152 - - ```shell - cd examples/tensorflow/image_recognition/tensorflow_models/quantization/ptq - bash run_tuning.sh --config=resnet_v2_152.yaml \ - --input_model=/PATH/TO/frozen_resnetv2_152.pb \ - --output_model=./nc_resnetv2_152.pb - ``` - -### 16. Densenet-121 - - ```shell - cd examples/tensorflow/image_recognition/tensorflow_models/quantization/ptq - bash run_tuning.sh --config=densenet121.yaml \ - --input_model=/PATH/TO/densenet121.pb --output_model=./nc_densenet121 - ``` - -### 17. Densenet-161 - - ```shell - cd examples/tensorflow/image_recognition/tensorflow_models/quantization/ptq - bash run_tuning.sh --config=densenet161.yaml \ - --input_model=/PATH/TO/densenet161.pb --output_model=./nc_densenet161 - ``` - -### 18. Densenet-169 - - ```shell - cd examples/tensorflow/image_recognition/tensorflow_models/quantization/ptq - bash run_tuning.sh --config=densenet169.yaml \ - --input_model=/PATH/TO/densenet169.pb --output_model=./nc_densenet169 - ``` - -### 19. Nasnet-mobile* - - ```shell - cd examples/tensorflow/image_recognition/tensorflow_models/quantization/ptq - bash run_tuning.sh --config=nasnet_mobile.yaml \ - --input_model=/PATH/TO/frozen_nasnet_mobile.pb --output_model=./nc_nasnet_mobile - ``` - -### 20. EfficientNet-b0(experiment) - - Download pre-trained checkpoint - ```shell - wget https://storage.googleapis.com/cloud-tpu-checkpoints/efficientnet/ckpts/efficientnet-b0.tar.gz - tar -xvf efficientnet-b0.tar.gz - ``` - - ```shell - cd examples/tensorflow/image_recognition/tensorflow_models/quantization/ptq - bash run_tuning.sh --config=efficientnet-b0.yaml \ - --input_model=/PATH/TO/efficientnet-b0 \ - --output_model=./nc_efficientnet-b0.pb - ``` - -Examples of enabling Intel® Neural Compressor auto tuning on TensorFlow ResNet50 V1.5 -======================================================= - -This is a tutorial of how to enable a TensorFlow image recognition model with Intel® Neural Compressor. - -# User Code Analysis - -Intel® Neural Compressor supports two usages: - -1. User specifies fp32 "model", yaml configured calibration dataloader in calibration field and evaluation dataloader in evaluation field, metric in tuning.metric field of model-specific yaml config file. - -> *Note*: -> you should change the model-specific yaml file dataset path to your own dataset path - -2. User specifies fp32 "model", calibration dataset "q_dataloader" and a custom "eval_func" which encapsulates the evaluation dataset and metric by itself. - -As ResNet50 V1.5 is a typical image recognition model, use Top-K as metric which is built-in supported by Intel® Neural Compressor. So here we integrate Tensorflow [ResNet50 V1.5](https://github.com/IntelAI/models/tree/v1.6.0/models/image_recognition/tensorflow/resnet50v1_5/inference) in [IntelAI Models](https://github.com/IntelAI/models/tree/v1.6.0) with Intel® Neural Compressor by the first use case for simplicity. - -### Write Yaml config file - -In examples directory, there is a resnet50_v1_5.yaml for tuning the model on Intel CPUs. The 'framework' in the yaml is set to 'tensorflow'. If running this example on Intel GPUs, the 'framework' should be set to 'tensorflow_itex' and the device in yaml file should be set to 'gpu'. The resnet50_v1_5_itex.yaml is prepared for the GPU case. We could remove most of the items and only keep mandatory item for tuning. - - -```yaml -# resnet50_v1_5.yaml - -model: # mandatory. used to specify model specific information. - name: resnet50_v1_5 - framework: tensorflow # mandatory. supported values are tensorflow, pytorch, pytorch_ipex, onnxrt_integer, onnxrt_qlinear or mxnet; allow new framework backend extension. - inputs: input_tensor - outputs: softmax_tensor - -device: cpu # optional. default value is cpu, other value is gpu. - -quantization: # optional. tuning constraints on model-wise for advance user to reduce tuning space. - calibration: - sampling_size: 5, 10 # optional. default value is 100. used to set how many samples should be used in calibration. - dataloader: - dataset: - ImageRecord: - root: /path/to/calibration/dataset # NOTE: modify to calibration dataset location if needed - transform: - ResizeCropImagenet: - height: 224 - width: 224 - mean_value: [123.68, 116.78, 103.94] - model_wise: # optional. tuning constraints on model-wise for advance user to reduce tuning space. - activation: - algorithm: minmax - -evaluation: # optional. required if user doesn't provide eval_func in neural_compressor.Quantization. - accuracy: # optional. required if user doesn't provide eval_func in neural_compressor.Quantization. - metric: - topk: 1 # built-in metrics are topk, map, f1, allow user to register new metric. - dataloader: - batch_size: 10 - dataset: - ImageRecord: - root: /path/to/evaluation/dataset # NOTE: modify to evaluation dataset location if needed - transform: - ResizeCropImagenet: - height: 224 - width: 224 - mean_value: [123.68, 116.78, 103.94] - performance: # optional. used to benchmark performance of passing model. - configs: - cores_per_instance: 4 - num_of_instance: 7 - dataloader: - batch_size: 1 - dataset: - ImageRecord: - root: /path/to/evaluation/dataset # NOTE: modify to evaluation dataset location if needed - transform: - ResizeCropImagenet: - height: 224 - width: 224 - mean_value: [123.68, 116.78, 103.94] - -tuning: - accuracy_criterion: - relative: 0.01 # optional. default value is relative, other value is absolute. this example allows relative accuracy loss: 1%. - exit_policy: - timeout: 0 # optional. tuning timeout (seconds). default value is 0 which means early stop. combine with max_trials field to decide when to exit. - random_seed: 9527 # optional. random seed for deterministic tuning. - -``` - -Here we choose topk which is built-in metric and set accuracy criterion as tolerating 0.01 relative accuracy loss of baseline. The default tuning strategy is basic strategy. The timeout 0 means early stop as long as a tuning config meet accuracy target. - -### preparation - -There are three preparation steps in here: -1. Prepare environment -```shell -pip install intel-tensorflow==1.15.2 neural_compressor -``` -2. Get the model source code -```shell -git clone -b v1.6.0 https://github.com/IntelAI/models intelai_models -cd intelai_models/models/image_recognition/tensorflow/resnet50v1_5/inference -``` -3. Prepare the ImageNet dataset and pretrained PB file -```shell -wget https://zenodo.org/record/2535873/files/resnet50_v1.pb -``` -### code update - -After completed preparation steps, we just need to add below tuning part in `eval_classifier_optimized_graph` class. - -```python - def auto_tune(self): - """This is Intel® Neural Compressor tuning part to generate a quantized pb - - Returns: - graph: it will return a quantized pb - """ - from neural_compressor.experimental import Quantization, common - quantizer = Quantization(self.args.config) - quantizer.model = common.Model(self.args.input_graph) - q_model = quantizer.fit() - return q_model -``` - -Finally, add one line in `__main__` function of `eval_image_-classifier_inference.py` to use Intel® Neural Compressor by yourself as below. -```python -q_graph = evaluate_opt_graph.auto_tune() -``` -The quantizer.fit() function will return a best quantized model within timeout constrain. diff --git a/examples/tensorflow/image_recognition/tensorflow_models/quantization/ptq/densenet121.yaml b/examples/tensorflow/image_recognition/tensorflow_models/quantization/ptq/densenet121.yaml deleted file mode 100644 index b9da893f6da..00000000000 --- a/examples/tensorflow/image_recognition/tensorflow_models/quantization/ptq/densenet121.yaml +++ /dev/null @@ -1,89 +0,0 @@ -# -# Copyright (c) 2021 Intel Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -model: # mandatory. used to specify model specific information. - name: densenet121 - framework: tensorflow # mandatory. supported values are tensorflow, pytorch, pytorch_ipex, onnxrt_integer, onnxrt_qlinear or mxnet; allow new framework backend extension. - -device: cpu # optional. default value is cpu, other value is gpu. - -quantization: # optional. tuning constraints on model-wise for advance user to reduce tuning space. - calibration: - sampling_size: 5, 10, 50, 100 # optional. default value is 100. used to set how many samples should be used in calibration. - dataloader: - batch_size: 10 - dataset: - ImageRecord: - root: /path/to/calibration/dataset # NOTE: modify to calibration dataset location if needed - transform: - ResizeCropImagenet: - scale: 0.017 - height: 224 - width: 224 - mean_value: [123.68, 116.78, 103.94] - model_wise: # optional. tuning constraints on model-wise for advance user to reduce tuning space. - activation: - algorithm: minmax - weight: - granularity: per_channel - op_wise: { - 'densenet121/MaxPool2D/MaxPool': { - 'activation': {'dtype': ['fp32']} - }, - 'densenet121/transition_block[1-3]/AvgPool2D/AvgPool': { - 'activation': {'dtype': ['fp32']}, - } - } - -evaluation: # optional. required if user doesn't provide eval_func in neural_compressor.Quantization. - accuracy: # optional. required if user doesn't provide eval_func in neural_compressor.Quantization. - metric: - topk: 1 # built-in metrics are topk, map, f1, allow user to register new metric. - dataloader: - batch_size: 32 - dataset: - ImageRecord: - root: /path/to/evaluation/dataset # NOTE: modify to evaluation dataset location if needed - transform: - ResizeCropImagenet: - scale: 0.017 - height: 224 - width: 224 - mean_value: [123.68, 116.78, 103.94] - postprocess: - transform: - LabelShift: 1 - performance: # optional. used to benchmark performance of passing model. - configs: - cores_per_instance: 4 - num_of_instance: 7 - dataloader: - batch_size: 10 - dataset: - ImageRecord: - root: /path/to/evaluation/dataset # NOTE: modify to evaluation dataset location if needed - transform: - ResizeCropImagenet: - scale: 0.017 - height: 224 - width: 224 - mean_value: [123.68, 116.78, 103.94] - -tuning: - accuracy_criterion: - relative: 0.01 # optional. default value is relative, other value is absolute. this example allows relative accuracy loss: 1%. - exit_policy: - timeout: 0 # optional. tuning timeout (seconds). default value is 0 which means early stop. combine with max_trials field to decide when to exit. - random_seed: 9527 # optional. random seed for deterministic tuning. diff --git a/examples/tensorflow/image_recognition/tensorflow_models/quantization/ptq/densenet121_itex.yaml b/examples/tensorflow/image_recognition/tensorflow_models/quantization/ptq/densenet121_itex.yaml deleted file mode 100644 index 34492100da2..00000000000 --- a/examples/tensorflow/image_recognition/tensorflow_models/quantization/ptq/densenet121_itex.yaml +++ /dev/null @@ -1,89 +0,0 @@ -# -# Copyright (c) 2021 Intel Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -model: # mandatory. used to specify model specific information. - name: densenet121 - framework: tensorflow_itex # mandatory. supported values are tensorflow, tensorflow_itex, pytorch, pytorch_ipex, onnxrt_integer, onnxrt_qlinear or mxnet; allow new framework backend extension. - -device: gpu # optional. set cpu if installed intel-extension-for-tensorflow[cpu], set gpu if installed intel-extension-for-tensorflow[gpu]. - -quantization: # optional. tuning constraints on model-wise for advance user to reduce tuning space. - calibration: - sampling_size: 5, 10, 50, 100 # optional. default value is 100. used to set how many samples should be used in calibration. - dataloader: - batch_size: 10 - dataset: - ImageRecord: - root: /path/to/calibration/dataset # NOTE: modify to calibration dataset location if needed - transform: - ResizeCropImagenet: - scale: 0.017 - height: 224 - width: 224 - mean_value: [123.68, 116.78, 103.94] - model_wise: # optional. tuning constraints on model-wise for advance user to reduce tuning space. - activation: - algorithm: minmax - weight: - granularity: per_channel - op_wise: { - 'densenet121/MaxPool2D/MaxPool': { - 'activation': {'dtype': ['fp32']} - }, - 'densenet121/transition_block[1-3]/AvgPool2D/AvgPool': { - 'activation': {'dtype': ['fp32']}, - } - } - -evaluation: # optional. required if user doesn't provide eval_func in neural_compressor.Quantization. - accuracy: # optional. required if user doesn't provide eval_func in neural_compressor.Quantization. - metric: - topk: 1 # built-in metrics are topk, map, f1, allow user to register new metric. - dataloader: - batch_size: 32 - dataset: - ImageRecord: - root: /path/to/evaluation/dataset # NOTE: modify to evaluation dataset location if needed - transform: - ResizeCropImagenet: - scale: 0.017 - height: 224 - width: 224 - mean_value: [123.68, 116.78, 103.94] - postprocess: - transform: - LabelShift: 1 - performance: # optional. used to benchmark performance of passing model. - configs: - cores_per_instance: 4 - num_of_instance: 7 - dataloader: - batch_size: 10 - dataset: - ImageRecord: - root: /path/to/evaluation/dataset # NOTE: modify to evaluation dataset location if needed - transform: - ResizeCropImagenet: - scale: 0.017 - height: 224 - width: 224 - mean_value: [123.68, 116.78, 103.94] - -tuning: - accuracy_criterion: - relative: 0.01 # optional. default value is relative, other value is absolute. this example allows relative accuracy loss: 1%. - exit_policy: - timeout: 0 # optional. tuning timeout (seconds). default value is 0 which means early stop. combine with max_trials field to decide when to exit. - random_seed: 9527 # optional. random seed for deterministic tuning. diff --git a/examples/tensorflow/image_recognition/tensorflow_models/quantization/ptq/densenet161.yaml b/examples/tensorflow/image_recognition/tensorflow_models/quantization/ptq/densenet161.yaml deleted file mode 100644 index 5312ed341fa..00000000000 --- a/examples/tensorflow/image_recognition/tensorflow_models/quantization/ptq/densenet161.yaml +++ /dev/null @@ -1,89 +0,0 @@ -# -# Copyright (c) 2021 Intel Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -model: # mandatory. used to specify model specific information. - name: densenet161 - framework: tensorflow # mandatory. supported values are tensorflow, pytorch, pytorch_ipex, onnxrt_integer, onnxrt_qlinear or mxnet; allow new framework backend extension. - -device: cpu # optional. default value is cpu, other value is gpu. - -quantization: # optional. tuning constraints on model-wise for advance user to reduce tuning space. - calibration: - sampling_size: 50, 100 # optional. default value is 100. used to set how many samples should be used in calibration. - dataloader: - batch_size: 10 - dataset: - ImageRecord: - root: /path/to/calibration/dataset # NOTE: modify to calibration dataset location if needed - transform: - ResizeCropImagenet: - scale: 0.017 - height: 224 - width: 224 - mean_value: [123.68, 116.78, 103.94] - model_wise: # optional. tuning constraints on model-wise for advance user to reduce tuning space. - activation: - algorithm: minmax - weight: - granularity: per_channel - op_wise: { - 'densenet161/MaxPool2D/MaxPool': { - 'activation': {'dtype': ['fp32']} - }, - 'densenet161/transition_block[1-3]/AvgPool2D/AvgPool': { - 'activation': {'dtype': ['fp32']}, - } - } - -evaluation: # optional. required if user doesn't provide eval_func in neural_compressor.Quantization. - accuracy: # optional. required if user doesn't provide eval_func in neural_compressor.Quantization. - metric: - topk: 1 # built-in metrics are topk, map, f1, allow user to register new metric. - dataloader: - batch_size: 32 - dataset: - ImageRecord: - root: /path/to/evaluation/dataset # NOTE: modify to evaluation dataset location if needed - transform: - ResizeCropImagenet: - scale: 0.017 - height: 224 - width: 224 - mean_value: [123.68, 116.78, 103.94] - postprocess: - transform: - LabelShift: 1 - performance: # optional. used to benchmark performance of passing model. - configs: - cores_per_instance: 4 - num_of_instance: 7 - dataloader: - batch_size: 1 - dataset: - ImageRecord: - root: /path/to/evaluation/dataset # NOTE: modify to evaluation dataset location if needed - transform: - ResizeCropImagenet: - scale: 0.017 - height: 224 - width: 224 - mean_value: [123.68, 116.78, 103.94] - -tuning: - accuracy_criterion: - relative: 0.01 # optional. default value is relative, other value is absolute. this example allows relative accuracy loss: 1%. - exit_policy: - timeout: 0 # optional. tuning timeout (seconds). default value is 0 which means early stop. combine with max_trials field to decide when to exit. - random_seed: 9527 # optional. random seed for deterministic tuning. diff --git a/examples/tensorflow/image_recognition/tensorflow_models/quantization/ptq/densenet161_itex.yaml b/examples/tensorflow/image_recognition/tensorflow_models/quantization/ptq/densenet161_itex.yaml deleted file mode 100644 index 58f17d9f479..00000000000 --- a/examples/tensorflow/image_recognition/tensorflow_models/quantization/ptq/densenet161_itex.yaml +++ /dev/null @@ -1,89 +0,0 @@ -# -# Copyright (c) 2021 Intel Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -model: # mandatory. used to specify model specific information. - name: densenet161 - framework: tensorflow_itex # mandatory. supported values are tensorflow, tensorflow_itex, pytorch, pytorch_ipex, onnxrt_integer, onnxrt_qlinear or mxnet; allow new framework backend extension. - -device: gpu # optional. set cpu if installed intel-extension-for-tensorflow[cpu], set gpu if installed intel-extension-for-tensorflow[gpu]. - -quantization: # optional. tuning constraints on model-wise for advance user to reduce tuning space. - calibration: - sampling_size: 50, 100 # optional. default value is 100. used to set how many samples should be used in calibration. - dataloader: - batch_size: 10 - dataset: - ImageRecord: - root: /path/to/calibration/dataset # NOTE: modify to calibration dataset location if needed - transform: - ResizeCropImagenet: - scale: 0.017 - height: 224 - width: 224 - mean_value: [123.68, 116.78, 103.94] - model_wise: # optional. tuning constraints on model-wise for advance user to reduce tuning space. - activation: - algorithm: minmax - weight: - granularity: per_channel - op_wise: { - 'densenet161/MaxPool2D/MaxPool': { - 'activation': {'dtype': ['fp32']} - }, - 'densenet161/transition_block[1-3]/AvgPool2D/AvgPool': { - 'activation': {'dtype': ['fp32']}, - } - } - -evaluation: # optional. required if user doesn't provide eval_func in neural_compressor.Quantization. - accuracy: # optional. required if user doesn't provide eval_func in neural_compressor.Quantization. - metric: - topk: 1 # built-in metrics are topk, map, f1, allow user to register new metric. - dataloader: - batch_size: 32 - dataset: - ImageRecord: - root: /path/to/evaluation/dataset # NOTE: modify to evaluation dataset location if needed - transform: - ResizeCropImagenet: - scale: 0.017 - height: 224 - width: 224 - mean_value: [123.68, 116.78, 103.94] - postprocess: - transform: - LabelShift: 1 - performance: # optional. used to benchmark performance of passing model. - configs: - cores_per_instance: 4 - num_of_instance: 7 - dataloader: - batch_size: 1 - dataset: - ImageRecord: - root: /path/to/evaluation/dataset # NOTE: modify to evaluation dataset location if needed - transform: - ResizeCropImagenet: - scale: 0.017 - height: 224 - width: 224 - mean_value: [123.68, 116.78, 103.94] - -tuning: - accuracy_criterion: - relative: 0.01 # optional. default value is relative, other value is absolute. this example allows relative accuracy loss: 1%. - exit_policy: - timeout: 0 # optional. tuning timeout (seconds). default value is 0 which means early stop. combine with max_trials field to decide when to exit. - random_seed: 9527 # optional. random seed for deterministic tuning. diff --git a/examples/tensorflow/image_recognition/tensorflow_models/quantization/ptq/densenet169.yaml b/examples/tensorflow/image_recognition/tensorflow_models/quantization/ptq/densenet169.yaml deleted file mode 100644 index b63414d8acf..00000000000 --- a/examples/tensorflow/image_recognition/tensorflow_models/quantization/ptq/densenet169.yaml +++ /dev/null @@ -1,89 +0,0 @@ -# -# Copyright (c) 2021 Intel Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -model: # mandatory. used to specify model specific information. - name: densenet169 - framework: tensorflow # mandatory. supported values are tensorflow, pytorch, pytorch_ipex, onnxrt_integer, onnxrt_qlinear or mxnet; allow new framework backend extension. - -device: cpu # optional. default value is cpu, other value is gpu. - -quantization: # optional. tuning constraints on model-wise for advance user to reduce tuning space. - calibration: - sampling_size: 50, 100 # optional. default value is 100. used to set how many samples should be used in calibration. - dataloader: - batch_size: 10 - dataset: - ImageRecord: - root: /path/to/calibration/dataset # NOTE: modify to calibration dataset location if needed - transform: - ResizeCropImagenet: - scale: 0.017 - height: 224 - width: 224 - mean_value: [123.68, 116.78, 103.94] - model_wise: # optional. tuning constraints on model-wise for advance user to reduce tuning space. - activation: - algorithm: minmax - weight: - granularity: per_channel - op_wise: { - 'densenet169/MaxPool2D/MaxPool': { - 'activation': {'dtype': ['fp32']} - }, - 'densenet169/transition_block[1-3]/AvgPool2D/AvgPool': { - 'activation': {'dtype': ['fp32']}, - } - } - -evaluation: # optional. required if user doesn't provide eval_func in neural_compressor.Quantization. - accuracy: # optional. required if user doesn't provide eval_func in neural_compressor.Quantization. - metric: - topk: 1 # built-in metrics are topk, map, f1, allow user to register new metric. - dataloader: - batch_size: 32 - dataset: - ImageRecord: - root: /path/to/evaluation/dataset # NOTE: modify to evaluation dataset location if needed - transform: - ResizeCropImagenet: - scale: 0.017 - height: 224 - width: 224 - mean_value: [123.68, 116.78, 103.94] - postprocess: - transform: - LabelShift: 1 - performance: # optional. used to benchmark performance of passing model. - configs: - cores_per_instance: 4 - num_of_instance: 7 - dataloader: - batch_size: 1 - dataset: - ImageRecord: - root: /path/to/evaluation/dataset # NOTE: modify to evaluation dataset location if needed - transform: - ResizeCropImagenet: - scale: 0.017 - height: 224 - width: 224 - mean_value: [123.68, 116.78, 103.94] - -tuning: - accuracy_criterion: - relative: 0.01 # optional. default value is relative, other value is absolute. this example allows relative accuracy loss: 1%. - exit_policy: - timeout: 0 # optional. tuning timeout (seconds). default value is 0 which means early stop. combine with max_trials field to decide when to exit. - random_seed: 9527 # optional. random seed for deterministic tuning. diff --git a/examples/tensorflow/image_recognition/tensorflow_models/quantization/ptq/densenet169_itex.yaml b/examples/tensorflow/image_recognition/tensorflow_models/quantization/ptq/densenet169_itex.yaml deleted file mode 100644 index 55360042c94..00000000000 --- a/examples/tensorflow/image_recognition/tensorflow_models/quantization/ptq/densenet169_itex.yaml +++ /dev/null @@ -1,89 +0,0 @@ -# -# Copyright (c) 2021 Intel Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -model: # mandatory. used to specify model specific information. - name: densenet169 - framework: tensorflow_itex # mandatory. supported values are tensorflow, tensorflow_itex, pytorch, pytorch_ipex, onnxrt_integer, onnxrt_qlinear or mxnet; allow new framework backend extension. - -device: gpu # optional. set cpu if installed intel-extension-for-tensorflow[cpu], set gpu if installed intel-extension-for-tensorflow[gpu]. - -quantization: # optional. tuning constraints on model-wise for advance user to reduce tuning space. - calibration: - sampling_size: 50, 100 # optional. default value is 100. used to set how many samples should be used in calibration. - dataloader: - batch_size: 10 - dataset: - ImageRecord: - root: /path/to/calibration/dataset # NOTE: modify to calibration dataset location if needed - transform: - ResizeCropImagenet: - scale: 0.017 - height: 224 - width: 224 - mean_value: [123.68, 116.78, 103.94] - model_wise: # optional. tuning constraints on model-wise for advance user to reduce tuning space. - activation: - algorithm: minmax - weight: - granularity: per_channel - op_wise: { - 'densenet169/MaxPool2D/MaxPool': { - 'activation': {'dtype': ['fp32']} - }, - 'densenet169/transition_block[1-3]/AvgPool2D/AvgPool': { - 'activation': {'dtype': ['fp32']}, - } - } - -evaluation: # optional. required if user doesn't provide eval_func in neural_compressor.Quantization. - accuracy: # optional. required if user doesn't provide eval_func in neural_compressor.Quantization. - metric: - topk: 1 # built-in metrics are topk, map, f1, allow user to register new metric. - dataloader: - batch_size: 32 - dataset: - ImageRecord: - root: /path/to/evaluation/dataset # NOTE: modify to evaluation dataset location if needed - transform: - ResizeCropImagenet: - scale: 0.017 - height: 224 - width: 224 - mean_value: [123.68, 116.78, 103.94] - postprocess: - transform: - LabelShift: 1 - performance: # optional. used to benchmark performance of passing model. - configs: - cores_per_instance: 4 - num_of_instance: 7 - dataloader: - batch_size: 1 - dataset: - ImageRecord: - root: /path/to/evaluation/dataset # NOTE: modify to evaluation dataset location if needed - transform: - ResizeCropImagenet: - scale: 0.017 - height: 224 - width: 224 - mean_value: [123.68, 116.78, 103.94] - -tuning: - accuracy_criterion: - relative: 0.01 # optional. default value is relative, other value is absolute. this example allows relative accuracy loss: 1%. - exit_policy: - timeout: 0 # optional. tuning timeout (seconds). default value is 0 which means early stop. combine with max_trials field to decide when to exit. - random_seed: 9527 # optional. random seed for deterministic tuning. diff --git a/examples/tensorflow/image_recognition/tensorflow_models/quantization/ptq/efficientnet-b0.yaml b/examples/tensorflow/image_recognition/tensorflow_models/quantization/ptq/efficientnet-b0.yaml deleted file mode 100644 index 35dd2e08edf..00000000000 --- a/examples/tensorflow/image_recognition/tensorflow_models/quantization/ptq/efficientnet-b0.yaml +++ /dev/null @@ -1,90 +0,0 @@ -# -# Copyright (c) 2021 Intel Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -model: # mandatory. neural_compressor uses this model name and framework name to decide where to save tuning history and deploy yaml. - name: efficientnet-b0 - framework: tensorflow # mandatory. supported values are tensorflow, pytorch, pytorch_ipex, onnxrt_integer, onnxrt_qlinear or mxnet; allow new framework backend extension. - inputs: truediv - outputs: Squeeze - -device: cpu # optional. default value is cpu, other value is gpu. - -quantization: # optional. tuning constraints on model-wise for advance user to reduce tuning space. - calibration: - sampling_size: 5, 10, 50, 100 # optional. default value is the size of whole dataset. used to set how many portions of calibration dataset is used. exclusive with iterations field. - dataloader: - dataset: - ImagenetRaw: - data_path: /path/to/calibration/dataset # NOTE: modify to calibration dataset location if needed - image_list: /path/to/calibration/label # data file, record image_names and their labels - transform: - PaddedCenterCrop: - size: 224 - crop_padding: 32 - Resize: - size: 224 - interpolation: bicubic - Normalize: - mean: [123.675, 116.28, 103.53] - std: [58.395, 57.12, 57.375] - -evaluation: # optional. required if user doesn't provide eval_func in neural_compressor.Quantization. - accuracy: # optional. required if user doesn't provide eval_func in neural_compressor.Quantization. - metric: - topk: 1 # built-in metrics are topk, map, f1, allow user to register new metric. - dataloader: - batch_size: 32 - dataset: - ImagenetRaw: - data_path: /path/to/evaluation/dataset # NOTE: modify to evaluation dataset location if needed - image_list: /path/to/evaluation/label # data file, record image_names and their labels - transform: - PaddedCenterCrop: - size: 224 - crop_padding: 32 - Resize: - size: 224 - interpolation: bicubic - Normalize: - mean: [123.675, 116.28, 103.53] - std: [58.395, 57.12, 57.375] - performance: # optional. used to benchmark performance of passing model. - iteration: 100 - configs: - cores_per_instance: 4 - num_of_instance: 7 - dataloader: - batch_size: 1 - dataset: - ImagenetRaw: - data_path: /path/to/evaluation/dataset # NOTE: modify to evaluation dataset location if needed - image_list: /path/to/evaluation/label # data file, record image_names and their labels - transform: - PaddedCenterCrop: - size: 224 - crop_padding: 32 - Resize: - size: 224 - interpolation: bicubic - Normalize: - mean: [123.675, 116.28, 103.53] - std: [58.395, 57.12, 57.375] - -tuning: - accuracy_criterion: - relative: 0.01 # optional. default value is relative, other value is absolute. this example allows relative accuracy loss: 1%. - exit_policy: - timeout: 0 # optional. tuning timeout (seconds). default value is 0 which means early stop. combine with max_trials field to decide when to exit. - random_seed: 9527 # optional. random seed for deterministic tuning. diff --git a/examples/tensorflow/image_recognition/tensorflow_models/quantization/ptq/efficientnet-b0_itex.yaml b/examples/tensorflow/image_recognition/tensorflow_models/quantization/ptq/efficientnet-b0_itex.yaml deleted file mode 100644 index c986d701a38..00000000000 --- a/examples/tensorflow/image_recognition/tensorflow_models/quantization/ptq/efficientnet-b0_itex.yaml +++ /dev/null @@ -1,90 +0,0 @@ -# -# Copyright (c) 2021 Intel Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -model: # mandatory. neural_compressor uses this model name and framework name to decide where to save tuning history and deploy yaml. - name: efficientnet-b0 - framework: tensorflow_itex # mandatory. supported values are tensorflow, tensorflow_itex, pytorch, pytorch_ipex, onnxrt_integer, onnxrt_qlinear or mxnet; allow new framework backend extension. - inputs: truediv - outputs: Squeeze - -device: gpu # optional. set cpu if installed intel-extension-for-tensorflow[cpu], set gpu if installed intel-extension-for-tensorflow[gpu]. - -quantization: # optional. tuning constraints on model-wise for advance user to reduce tuning space. - calibration: - sampling_size: 5, 10, 50, 100 # optional. default value is the size of whole dataset. used to set how many portions of calibration dataset is used. exclusive with iterations field. - dataloader: - dataset: - ImagenetRaw: - data_path: /path/to/calibration/dataset # NOTE: modify to calibration dataset location if needed - image_list: /path/to/calibration/label # data file, record image_names and their labels - transform: - PaddedCenterCrop: - size: 224 - crop_padding: 32 - Resize: - size: 224 - interpolation: bicubic - Normalize: - mean: [123.675, 116.28, 103.53] - std: [58.395, 57.12, 57.375] - -evaluation: # optional. required if user doesn't provide eval_func in neural_compressor.Quantization. - accuracy: # optional. required if user doesn't provide eval_func in neural_compressor.Quantization. - metric: - topk: 1 # built-in metrics are topk, map, f1, allow user to register new metric. - dataloader: - batch_size: 32 - dataset: - ImagenetRaw: - data_path: /path/to/evaluation/dataset # NOTE: modify to evaluation dataset location if needed - image_list: /path/to/evaluation/label # data file, record image_names and their labels - transform: - PaddedCenterCrop: - size: 224 - crop_padding: 32 - Resize: - size: 224 - interpolation: bicubic - Normalize: - mean: [123.675, 116.28, 103.53] - std: [58.395, 57.12, 57.375] - performance: # optional. used to benchmark performance of passing model. - iteration: 100 - configs: - cores_per_instance: 4 - num_of_instance: 7 - dataloader: - batch_size: 1 - dataset: - ImagenetRaw: - data_path: /path/to/evaluation/dataset # NOTE: modify to evaluation dataset location if needed - image_list: /path/to/evaluation/label # data file, record image_names and their labels - transform: - PaddedCenterCrop: - size: 224 - crop_padding: 32 - Resize: - size: 224 - interpolation: bicubic - Normalize: - mean: [123.675, 116.28, 103.53] - std: [58.395, 57.12, 57.375] - -tuning: - accuracy_criterion: - relative: 0.01 # optional. default value is relative, other value is absolute. this example allows relative accuracy loss: 1%. - exit_policy: - timeout: 0 # optional. tuning timeout (seconds). default value is 0 which means early stop. combine with max_trials field to decide when to exit. - random_seed: 9527 # optional. random seed for deterministic tuning. diff --git a/examples/tensorflow/image_recognition/tensorflow_models/quantization/ptq/imagenet_prepare/build_imagenet_data.py b/examples/tensorflow/image_recognition/tensorflow_models/quantization/ptq/imagenet_prepare/build_imagenet_data.py deleted file mode 100644 index fea38a9fdfe..00000000000 --- a/examples/tensorflow/image_recognition/tensorflow_models/quantization/ptq/imagenet_prepare/build_imagenet_data.py +++ /dev/null @@ -1,567 +0,0 @@ -# Copyright 2016 Google Inc. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Converts ImageNet data to TFRecords file format with Example protos. - -The raw ImageNet data set is expected to reside in JPEG files located in the -following directory structure. - - data_dir/n01440764/ILSVRC2012_val_00000293.JPEG - data_dir/n01440764/ILSVRC2012_val_00000543.JPEG - ... - -where 'n01440764' is the unique synset label associated with -these images. - -The training data set consists of 1000 sub-directories (i.e. labels) -each containing 1200 JPEG images for a total of 1.2M JPEG images. - -The evaluation data set consists of 1000 sub-directories (i.e. labels) -each containing 50 JPEG images for a total of 50K JPEG images. - -This TensorFlow script converts the training and evaluation data into -a sharded data set consisting of 1024 and 128 TFRecord files, respectively. - - train_directory/train-00000-of-01024 - train_directory/train-00001-of-01024 - ... - train_directory/train-00127-of-01024 - -and - - validation_directory/validation-00000-of-00128 - validation_directory/validation-00001-of-00128 - ... - validation_directory/validation-00127-of-00128 - -Each validation TFRecord file contains ~390 records. Each training TFREcord -file contains ~1250 records. Each record within the TFRecord file is a -serialized Example proto. The Example proto contains the following fields: - - image/encoded: string containing JPEG encoded image in RGB colorspace - image/height: integer, image height in pixels - image/width: integer, image width in pixels - image/colorspace: string, specifying the colorspace, always 'RGB' - image/channels: integer, specifying the number of channels, always 3 - image/format: string, specifying the format, always'JPEG' - - image/filename: string containing the basename of the image file - e.g. 'n01440764_10026.JPEG' or 'ILSVRC2012_val_00000293.JPEG' - image/class/label: integer specifying the index in a classification layer. - The label ranges from [1, 1000] where 0 is not used. - image/class/synset: string specifying the unique ID of the label, - e.g. 'n01440764' - image/class/text: string specifying the human-readable version of the label - e.g. 'red fox, Vulpes vulpes' - -Note that the length of xmin is identical to the length of xmax, ymin and ymax -for each example. - -Running this script using 16 threads may take around ~2.5 hours on a HP Z420. -""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -from datetime import datetime -import os -import random -import sys -import threading - -import numpy as np -from six.moves import xrange # pylint: disable=redefined-builtin -import tensorflow as tf -tf.compat.v1.disable_eager_execution() - - -tf.compat.v1.app.flags.DEFINE_string('raw_directory', None, - 'Raw data directory') - -tf.compat.v1.app.flags.DEFINE_string('output_directory', None, - 'Output data directory') - -tf.compat.v1.app.flags.DEFINE_integer('shards', 1, - 'Number of shards in TFRecord files.') - -tf.compat.v1.app.flags.DEFINE_string('subset', 'validation', - 'Subset of imagenet, can be validation/train') - -tf.compat.v1.app.flags.DEFINE_integer('num_threads', 1, - 'Number of threads to preprocess the images.') - -# The labels file contains a list of valid labels are held in this file. -# Assumes that the file contains entries as such: -# n01440764 -# n01443537 -# n01484850 -# where each line corresponds to a label expressed as a synset. We map -# each synset contained in the file to an integer (based on the alphabetical -# ordering). See below for details. -tf.compat.v1.app.flags.DEFINE_string('labels_file', - 'imagenet_lsvrc_2015_synsets.txt', - 'Labels file') - -# This file containing mapping from synset to human-readable label. -# Assumes each line of the file looks like: -# -# n02119247 black fox -# n02119359 silver fox -# n02119477 red fox, Vulpes fulva -# -# where each line corresponds to a unique mapping. Note that each line is -# formatted as \t. -tf.compat.v1.app.flags.DEFINE_string('imagenet_metadata_file', - 'imagenet_metadata.txt', - 'ImageNet metadata file') - -FLAGS = tf.compat.v1.app.flags.FLAGS - - -def _int64_feature(value): - """Wrapper for inserting int64 features into Example proto.""" - if not isinstance(value, list): - value = [value] - return tf.train.Feature(int64_list=tf.train.Int64List(value=value)) - - -def _float_feature(value): - """Wrapper for inserting float features into Example proto.""" - if not isinstance(value, list): - value = [value] - return tf.train.Feature(float_list=tf.train.FloatList(value=value)) - - -def _bytes_feature(value): - """Wrapper for inserting bytes features into Example proto.""" - return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value])) - - -def _convert_to_example(filename, image_buffer, label, synset, human, - height, width): - """Build an Example proto for an example. - - Args: - filename: string, path to an image file, e.g., '/path/to/example.JPG' - image_buffer: string, JPEG encoding of RGB image - label: integer, identifier for the ground truth for the network - synset: string, unique WordNet ID specifying the label, e.g., 'n02323233' - human: string, human-readable label, e.g., 'red fox, Vulpes vulpes' - height: integer, image height in pixels - width: integer, image width in pixels - Returns: - Example proto - """ - - colorspace = b'RGB' - channels = 3 - image_format = b'JPEG' - - example = tf.train.Example(features=tf.train.Features(feature={ - 'image/height': _int64_feature(height), - 'image/width': _int64_feature(width), - 'image/colorspace': _bytes_feature(colorspace), - 'image/channels': _int64_feature(channels), - 'image/class/label': _int64_feature(label), - 'image/class/synset': _bytes_feature(bytes(synset,'utf-8')), - 'image/class/text': _bytes_feature(bytes(human,'utf-8')), - 'image/format': _bytes_feature(image_format), - 'image/filename': _bytes_feature(bytes(os.path.basename(filename),'utf-8')), - 'image/encoded': _bytes_feature(image_buffer)})) - return example - - -class ImageCoder(object): - """Helper class that provides TensorFlow image coding utilities.""" - - def __init__(self): - # Create a single Session to run all image coding calls. - self._sess = tf.compat.v1.Session() - - # Initializes function that converts PNG to JPEG data. - self._png_data = tf.compat.v1.placeholder(dtype=tf.string) - image = tf.image.decode_png(self._png_data, channels=3) - self._png_to_jpeg = tf.image.encode_jpeg(image, format='rgb', quality=100) - - # Initializes function that converts CMYK JPEG data to RGB JPEG data. - self._cmyk_data = tf.compat.v1.placeholder(dtype=tf.string) - image = tf.image.decode_jpeg(self._cmyk_data, channels=0) - self._cmyk_to_rgb = tf.image.encode_jpeg(image, format='rgb', quality=100) - - # Initializes function that decodes RGB JPEG data. - self._decode_jpeg_data = tf.compat.v1.placeholder(dtype=tf.string) - self._decode_jpeg = tf.image.decode_jpeg(self._decode_jpeg_data, channels=3) - - def png_to_jpeg(self, image_data): - return self._sess.run(self._png_to_jpeg, - feed_dict={self._png_data: image_data}) - - def cmyk_to_rgb(self, image_data): - return self._sess.run(self._cmyk_to_rgb, - feed_dict={self._cmyk_data: image_data}) - - def decode_jpeg(self, image_data): - image = self._sess.run(self._decode_jpeg, - feed_dict={self._decode_jpeg_data: image_data}) - assert len(image.shape) == 3 - assert image.shape[2] == 3 - return image - - -def _is_png(filename): - """Determine if a file contains a PNG format image. - - Args: - filename: string, path of the image file. - - Returns: - boolean indicating if the image is a PNG. - """ - # File list from: - # https://groups.google.com/forum/embed/?place=forum/torch7#!topic/torch7/fOSTXHIESSU - return 'n02105855_2933.JPEG' in filename - - -def _is_cmyk(filename): - """Determine if file contains a CMYK JPEG format image. - - Args: - filename: string, path of the image file. - - Returns: - boolean indicating if the image is a JPEG encoded with CMYK color space. - """ - # File list from: - # https://github.com/cytsai/ilsvrc-cmyk-image-list - blacklist = ['n01739381_1309.JPEG', 'n02077923_14822.JPEG', - 'n02447366_23489.JPEG', 'n02492035_15739.JPEG', - 'n02747177_10752.JPEG', 'n03018349_4028.JPEG', - 'n03062245_4620.JPEG', 'n03347037_9675.JPEG', - 'n03467068_12171.JPEG', 'n03529860_11437.JPEG', - 'n03544143_17228.JPEG', 'n03633091_5218.JPEG', - 'n03710637_5125.JPEG', 'n03961711_5286.JPEG', - 'n04033995_2932.JPEG', 'n04258138_17003.JPEG', - 'n04264628_27969.JPEG', 'n04336792_7448.JPEG', - 'n04371774_5854.JPEG', 'n04596742_4225.JPEG', - 'n07583066_647.JPEG', 'n13037406_4650.JPEG'] - return filename.split('/')[-1] in blacklist - - -def _process_image(filename, coder): - """Process a single image file. - - Args: - filename: string, path to an image file e.g., '/path/to/example.JPG'. - coder: instance of ImageCoder to provide TensorFlow image coding utils. - Returns: - image_buffer: string, JPEG encoding of RGB image. - height: integer, image height in pixels. - width: integer, image width in pixels. - """ - # Read the image file. - image_data = tf.io.gfile.GFile(filename, 'rb').read() - - # Clean the dirty data. - if _is_png(filename): - # 1 image is a PNG. - print('Converting PNG to JPEG for %s' % filename) - image_data = coder.png_to_jpeg(image_data) - elif _is_cmyk(filename): - # 22 JPEG images are in CMYK colorspace. - print('Converting CMYK to RGB for %s' % filename) - image_data = coder.cmyk_to_rgb(image_data) - - # Decode the RGB JPEG. - image = coder.decode_jpeg(image_data) - - # Check that image converted to RGB - assert len(image.shape) == 3 - height = image.shape[0] - width = image.shape[1] - assert image.shape[2] == 3 - - return image_data, height, width - - -def _process_image_files_batch(coder, thread_index, ranges, name, filenames, - synsets, labels, humans, num_shards): - """Processes and saves list of images as TFRecord in 1 thread. - - Args: - coder: instance of ImageCoder to provide TensorFlow image coding utils. - thread_index: integer, unique batch to run index is within [0, len(ranges)). - ranges: list of pairs of integers specifying ranges of each batches to - analyze in parallel. - name: string, unique identifier specifying the data set - filenames: list of strings; each string is a path to an image file - synsets: list of strings; each string is a unique WordNet ID - labels: list of integer; each integer identifies the ground truth - humans: list of strings; each string is a human-readable label - num_shards: integer number of shards for this data set. - """ - # Each thread produces N shards where N = int(num_shards / num_threads). - # For instance, if num_shards = 128, and the num_threads = 2, then the first - # thread would produce shards [0, 64). - num_threads = len(ranges) - assert not num_shards % num_threads - num_shards_per_batch = int(num_shards / num_threads) - - shard_ranges = np.linspace(ranges[thread_index][0], - ranges[thread_index][1], - num_shards_per_batch + 1).astype(int) - num_files_in_thread = ranges[thread_index][1] - ranges[thread_index][0] - - counter = 0 - for s in xrange(num_shards_per_batch): - # Generate a sharded version of the file name, e.g. 'train-00002-of-00010' - shard = thread_index * num_shards_per_batch + s - output_filename = '%s-%.5d-of-%.5d' % (name, shard, num_shards) - output_file = os.path.join(FLAGS.output_directory, output_filename) - writer = tf.io.TFRecordWriter(output_file) - - shard_counter = 0 - files_in_shard = np.arange(shard_ranges[s], shard_ranges[s + 1], dtype=int) # HERE - for i in files_in_shard: - filename = filenames[i] - label = labels[i] - synset = synsets[i] - human = humans[i] - - image_buffer, height, width = _process_image(filename, coder) - - example = _convert_to_example(filename, image_buffer, label, synset, human, height, width) - writer.write(example.SerializeToString()) - shard_counter += 1 - counter += 1 - - if not counter % 1000: - print('%s [thread %d]: Processed %d of %d images in thread batch.' % - (datetime.now(), thread_index, counter, num_files_in_thread)) - sys.stdout.flush() - - writer.close() - print('%s [thread %d]: Wrote %d images to %s' % - (datetime.now(), thread_index, shard_counter, output_file)) - sys.stdout.flush() - shard_counter = 0 - print('%s [thread %d]: Wrote %d images to %d shards.' % - (datetime.now(), thread_index, counter, num_files_in_thread)) - sys.stdout.flush() - - -def _process_image_files(name, filenames, synsets, labels, humans, num_shards): - """Process and save list of images as TFRecord of Example protos. - - Args: - name: string, unique identifier specifying the data set - filenames: list of strings; each string is a path to an image file - synsets: list of strings; each string is a unique WordNet ID - labels: list of integer; each integer identifies the ground truth - humans: list of strings; each string is a human-readable label - num_shards: integer number of shards for this data set. - """ - assert len(filenames) == len(synsets) - assert len(filenames) == len(labels) - assert len(filenames) == len(humans) - - # Break all images into batches with a [ranges[i][0], ranges[i][1]]. - spacing = np.linspace(0, len(filenames), FLAGS.num_threads + 1).astype(np.int32) - ranges = [] - threads = [] - for i in xrange(len(spacing) - 1): - ranges.append([spacing[i], spacing[i+1]]) - - # Launch a thread for each batch. - print('Launching %d threads for spacings: %s' % (FLAGS.num_threads, ranges)) - sys.stdout.flush() - - # Create a mechanism for monitoring when all threads are finished. - coord = tf.train.Coordinator() - - # Create a generic TensorFlow-based utility for converting all image codings. - coder = ImageCoder() - - threads = [] - for thread_index in xrange(len(ranges)): - args = (coder, thread_index, ranges, name, filenames, - synsets, labels, humans, num_shards) - t = threading.Thread(target=_process_image_files_batch, args=args) - t.start() - threads.append(t) - - # Wait for all the threads to terminate. - coord.join(threads) - print('%s: Finished writing all %d images in data set.' % - (datetime.now(), len(filenames))) - sys.stdout.flush() - - -def _find_image_files(data_dir, labels_file): - """Build a list of all images files and labels in the data set. - - Args: - data_dir: string, path to the root directory of images. - - Assumes that the ImageNet data set resides in JPEG files located in - the following directory structure. - - data_dir/n01440764/ILSVRC2012_val_00000293.JPEG - data_dir/n01440764/ILSVRC2012_val_00000543.JPEG - - where 'n01440764' is the unique synset label associated with these images. - - labels_file: string, path to the labels file. - - The list of valid labels are held in this file. Assumes that the file - contains entries as such: - n01440764 - n01443537 - n01484850 - where each line corresponds to a label expressed as a synset. We map - each synset contained in the file to an integer (based on the alphabetical - ordering) starting with the integer 1 corresponding to the synset - contained in the first line. - - The reason we start the integer labels at 1 is to reserve label 0 as an - unused background class. - - Returns: - filenames: list of strings; each string is a path to an image file. - synsets: list of strings; each string is a unique WordNet ID. - labels: list of integer; each integer identifies the ground truth. - """ - print('Determining list of input files and labels from %s.' % data_dir) - challenge_synsets = [l.strip() for l in - tf.compat.v1.gfile.FastGFile(labels_file, 'r').readlines()] - - labels = [] - filenames = [] - synsets = [] - - # Leave label index 0 empty as a background class. - label_index = 1 - - # Construct the list of JPEG files and labels. - for synset in challenge_synsets: - jpeg_file_path = '%s/%s/*.JPEG' % (data_dir, synset) - matching_files = tf.io.gfile.glob(jpeg_file_path) - - labels.extend([label_index] * len(matching_files)) - synsets.extend([synset] * len(matching_files)) - filenames.extend(matching_files) - - if not label_index % 100: - print('Finished finding files in %d of %d classes.' % ( - label_index, len(challenge_synsets))) - label_index += 1 - - # Shuffle the ordering of all image files in order to guarantee - # random ordering of the images with respect to label in the - # saved TFRecord files. Make the randomization repeatable. - shuffled_index = range(len(filenames)) - random.seed(12345) - - random.shuffle(list(range(len(shuffled_index)))) - - filenames = [filenames[i] for i in shuffled_index] - synsets = [synsets[i] for i in shuffled_index] - labels = [labels[i] for i in shuffled_index] - - print('Found %d JPEG files across %d labels inside %s.' % - (len(filenames), len(challenge_synsets), data_dir)) - return filenames, synsets, labels - - -def _find_human_readable_labels(synsets, synset_to_human): - """Build a list of human-readable labels. - - Args: - synsets: list of strings; each string is a unique WordNet ID. - synset_to_human: dict of synset to human labels, e.g., - 'n02119022' --> 'red fox, Vulpes vulpes' - - Returns: - List of human-readable strings corresponding to each synset. - """ - humans = [] - for s in synsets: - assert s in synset_to_human, ('Failed to find: %s' % s) - humans.append(synset_to_human[s]) - return humans - - -def _process_dataset(name, directory, num_shards, synset_to_human): - """Process a complete data set and save it as a TFRecord. - - Args: - name: string, unique identifier specifying the data set. - directory: string, root path to the data set. - num_shards: integer number of shards for this data set. - synset_to_human: dict of synset to human labels, e.g., - 'n02119022' --> 'red fox, Vulpes vulpes' - """ - filenames, synsets, labels = _find_image_files(directory, FLAGS.labels_file) - humans = _find_human_readable_labels(synsets, synset_to_human) - - _process_image_files(name, filenames, synsets, labels, - humans, num_shards) - - -def _build_synset_lookup(imagenet_metadata_file): - """Build lookup for synset to human-readable label. - - Args: - imagenet_metadata_file: string, path to file containing mapping from - synset to human-readable label. - - Assumes each line of the file looks like: - - n02119247 black fox - n02119359 silver fox - n02119477 red fox, Vulpes fulva - - where each line corresponds to a unique mapping. Note that each line is - formatted as \t. - - Returns: - Dictionary of synset to human labels, such as: - 'n02119022' --> 'red fox, Vulpes vulpes' - """ - lines = tf.compat.v1.gfile.FastGFile(imagenet_metadata_file, 'r').readlines() - synset_to_human = {} - for l in lines: - if l: - parts = l.strip().split('\t') - assert len(parts) == 2 - synset = parts[0] - human = parts[1] - synset_to_human[synset] = human - return synset_to_human - - -def main(unused_argv): - assert not FLAGS.shards % FLAGS.num_threads, ( - 'Please make the FLAGS.num_threads commensurate with FLAGS.shards') - - print('Saving results to %s' % FLAGS.output_directory) - - # Build a map from synset to human-readable label. - synset_to_human = _build_synset_lookup(FLAGS.imagenet_metadata_file) - - if(FLAGS.raw_directory != None): - _process_dataset(FLAGS.subset, FLAGS.raw_directory,FLAGS.shards, synset_to_human) - -if __name__ == '__main__': - tf.compat.v1.app.run() diff --git a/examples/tensorflow/image_recognition/tensorflow_models/quantization/ptq/imagenet_prepare/download_and_convert_imagenet.sh b/examples/tensorflow/image_recognition/tensorflow_models/quantization/ptq/imagenet_prepare/download_and_convert_imagenet.sh deleted file mode 100644 index f9baa85ab07..00000000000 --- a/examples/tensorflow/image_recognition/tensorflow_models/quantization/ptq/imagenet_prepare/download_and_convert_imagenet.sh +++ /dev/null @@ -1,100 +0,0 @@ -#!/bin/bash -# Copyright 2016 Google Inc. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -# Script to download and preprocess ImageNet Challenge 2012 -# training and validation data set. -# -# The final output of this script are sharded TFRecord files containing -# serialized Example protocol buffers. See build_imagenet_data.py for -# details of how the Example protocol buffers contain the ImageNet data. -# -# The final output of this script appears as such: -# -# data_dir/train-00000-of-01024 -# data_dir/train-00001-of-01024 -# ... -# data_dir/train-00127-of-01024 -# -# and -# -# data_dir/validation-00000-of-00128 -# data_dir/validation-00001-of-00128 -# ... -# data_dir/validation-00127-of-00128 -# -# Note that this script may take several hours to run to completion. The -# conversion of the ImageNet data to TFRecords alone takes 2-3 hours depending -# on the speed of your machine. Please be patient. -# -# **IMPORTANT** -# To download the raw images, the user must create an account with image-net.org -# and generate a username and access_key. The latter two are required for -# downloading the raw images. -# - -set -e - -if [ -z "$1" ]; then - echo "usage download_and_convert_imagenet.sh [data dir]" - exit -fi - -# Create the output and temporary directories. -DATA_DIR="${1%/}" -SCRATCH_DIR="${DATA_DIR}/raw-data/" -mkdir -p "${DATA_DIR}" -mkdir -p "${SCRATCH_DIR}" -WORK_DIR="$0.runfiles/__main__" - -# Download the ImageNet data. -LABELS_FILE="${WORK_DIR}/datasets/imagenet_lsvrc_2015_synsets.txt" -DOWNLOAD_SCRIPT="${WORK_DIR}/datasets/download_imagenet.sh" -"${DOWNLOAD_SCRIPT}" "${SCRATCH_DIR}" "${LABELS_FILE}" - -# Note the locations of the train and validation data. -TRAIN_DIRECTORY="${SCRATCH_DIR}train/" -VALIDATION_DIRECTORY="${SCRATCH_DIR}validation/" - -# Preprocess the validation data by moving the images into the appropriate -# sub-directory based on the label (synset) of the image. -echo "Organizing the validation data into sub-directories." -PREPROCESS_VAL_SCRIPT="${WORK_DIR}/datasets/preprocess_imagenet_validation_data.py" -VAL_LABELS_FILE="${WORK_DIR}/datasets/imagenet_2012_validation_synset_labels.txt" - -"${PREPROCESS_VAL_SCRIPT}" "${VALIDATION_DIRECTORY}" "${VAL_LABELS_FILE}" - -# Convert the XML files for bounding box annotations into a single CSV. -echo "Extracting bounding box information from XML." -BOUNDING_BOX_SCRIPT="${WORK_DIR}/datasets/process_bounding_boxes.py" -BOUNDING_BOX_FILE="${SCRATCH_DIR}/imagenet_2012_bounding_boxes.csv" -BOUNDING_BOX_DIR="${SCRATCH_DIR}bounding_boxes/" - -"${BOUNDING_BOX_SCRIPT}" "${BOUNDING_BOX_DIR}" "${LABELS_FILE}" \ - | sort >"${BOUNDING_BOX_FILE}" -echo "Finished downloading and preprocessing the ImageNet data." - -# Build the TFRecords version of the ImageNet data. -BUILD_SCRIPT="${WORK_DIR}/build_imagenet_data" -OUTPUT_DIRECTORY="${DATA_DIR}" -IMAGENET_METADATA_FILE="${WORK_DIR}/datasets/imagenet_metadata.txt" - -"${BUILD_SCRIPT}" \ - --train_directory="${TRAIN_DIRECTORY}" \ - --validation_directory="${VALIDATION_DIRECTORY}" \ - --output_directory="${OUTPUT_DIRECTORY}" \ - --imagenet_metadata_file="${IMAGENET_METADATA_FILE}" \ - --labels_file="${LABELS_FILE}" \ - --bounding_box_file="${BOUNDING_BOX_FILE}" diff --git a/examples/tensorflow/image_recognition/tensorflow_models/quantization/ptq/imagenet_prepare/download_imagenet.sh b/examples/tensorflow/image_recognition/tensorflow_models/quantization/ptq/imagenet_prepare/download_imagenet.sh deleted file mode 100644 index c780e179f93..00000000000 --- a/examples/tensorflow/image_recognition/tensorflow_models/quantization/ptq/imagenet_prepare/download_imagenet.sh +++ /dev/null @@ -1,99 +0,0 @@ -#!/bin/bash -# Copyright 2016 Google Inc. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -# Script to download ImageNet Challenge 2012 training and validation data set. -# -# Downloads and decompresses raw images and bounding boxes. -# -# **IMPORTANT** -# To download the raw images, the user must create an account with image-net.org -# and generate a username and access_key. The latter two are required for -# downloading the raw images. -# -# usage: -# ./download_imagenet.sh [dirname] -set -e - -if [ "x$IMAGENET_ACCESS_KEY" == x -o "x$IMAGENET_USERNAME" == x ]; then - cat < str: - """ - Returns the model download url - Returns: url string - - """ - pass - - @property - @abc.abstractmethod - def package_name(self) -> str: - """ - Returns the downloaded package path - Returns: path string - """ - pass - - def get_pretrained_model(self, destination): - """ - Obtains a ready to use pretrained model file. - Args: - destination: path to where the file should be stored - """ - print("Downloading the model from: {0}".format(self.model_url)) - os.system("curl -o {0} {1}".format(self.package_name, self.model_url)) - if tarfile.is_tarfile(self.package_name): - with tarfile.open(self.package_name) as tar: - if not os.path.exists(destination): - os.makedirs(destination) - print("Extracting the model package to {0}".format(destination)) - tar.extractall(destination) - - -class InceptionV1(Model): - """ Concrete implementation of the Model base class for Inception_v1""" - # TODO This will download the ckpt file, need to add handling for - # https://github.com/tensorflow/models/tree/master/research/slim#exporting-the-inference-graph - - @property - def model_url(self) -> str: - """ - Gets model download url - Returns: model url - - """ - return "http://download.tensorflow.org/models/inception_v1_2016_08_28.tar.gz" - - @property - def package_name(self) -> str: - """ - Gets the package name - Returns: package name - - """ - return "inception_v1.tar.gz" - - -class InceptionV2(Model): - """ Concrete implementation of the Model base class for Inception_v2""" - # TODO This will download the ckpt file, need to add handling for - # https://github.com/tensorflow/models/tree/master/research/slim#exporting-the-inference-graph - - @property - def model_url(self) -> str: - """ - Gets model download url - Returns: model url - - """ - return "http://download.tensorflow.org/models/inception_v2_2016_08_28.tar.gz" - - @property - def package_name(self) -> str: - """ - Gets the package name - Returns: package name - - """ - return "inception_v2.tar.gz" - - -class InceptionV3(Model): - """ Concrete implementation of the Model base class for Inception_v3""" - - @property - def model_url(self) -> str: - """ - Gets model download url - Returns: model url - - """ - return "https://storage.googleapis.com/intel-optimized-tensorflow/models/v1_6/inceptionv3_fp32_pretrained_model.pb" - - @property - def package_name(self) -> str: - """ - Gets the package name - Returns: package name - - """ - return "inception_v3.pb" - - -class InceptionV4(Model): - """ Concrete implementation of the Model base class for Inception_v4""" - - @property - def model_url(self) -> str: - """ - Gets model download url - Returns: model url - - """ - return "https://storage.googleapis.com/intel-optimized-tensorflow/models/v1_6/inceptionv4_fp32_pretrained_model.pb" - - @property - def package_name(self) -> str: - """ - Gets the package name - Returns: package name - - """ - return "inception_v4.pb" - - -class Resnet50V1(Model): - """ Concrete implementation of the Model base class for resnet50_V1""" - - @property - def model_url(self) -> str: - """ - Gets model download url - Returns: model url - - """ - return "https://storage.googleapis.com/intel-optimized-tensorflow/models/v1_6/resnet50_fp32_pretrained_model.pb" - - @property - def package_name(self) -> str: - """ - Gets the package name - Returns: package name - - """ - return "resnet_v1_50.pb" - - -class Resnet101V1(Model): - """ Concrete implementation of the Model base class for resnet101_V1""" - - @property - def model_url(self) -> str: - """ - Gets model download url - Returns: model url - - """ - return "https://storage.googleapis.com/intel-optimized-tensorflow/models/v1_6/resnet101_fp32_pretrained_model.pb" - - @property - def package_name(self) -> str: - """ - Gets the package name - Returns: package name - - """ - return "resnet_v1_101.pb" - - -class ResnetV250(Model): - """ Concrete implementation of the Model base class for Resnet v2 50 """ - def __init__(self): - raise NotImplementedError("Resnet_V2_50 is not supported yet") - - @property - def model_url(self) -> str: - """ - Gets model download url - Returns: model url - - """ - pass - - @property - def package_name(self) -> str: - """ - Gets the package name - Returns: package name - - """ - pass - - -class ResnetV2101(Model): - """ Concrete implementation of the Model base class for Resnet V2 101""" - def __init__(self): - raise NotImplementedError("Resnet_V2_101 is not supported yet") - - @property - def model_url(self) -> str: - """ - Gets model download url - Returns: model url - - """ - pass - - @property - def package_name(self) -> str: - """ - Gets the package name - Returns: package name - - """ - pass - - -class ResnetV2152(Model): - """ Concrete implementation of the Model base class for Resnet v2 151""" - def __init__(self): - raise NotImplementedError("Resnet_V2_152 is not supported yet") - - @property - def model_url(self) -> str: - """ - Gets model download url - Returns: model url - - """ - pass - - @property - def package_name(self) -> str: - """ - Gets the package name - Returns: package name - - """ - pass - - -class MobilenetV1(Model): - """ Concrete implementation of the Model base class for Mobilenetv1""" - - @property - def model_url(self) -> str: - """ - Gets model download url - Returns: model url - - """ - return "http://download.tensorflow.org/models/mobilenet_v1_2018_02_22/mobilenet_v1_1.0_224.tgz" - - @property - def package_name(self) -> str: - """ - Gets the package name - Returns: package name - - """ - return "mobilenet_v1_1.0_224.tgz" - - -class MobilenetV2(Model): - """ Concrete implementation of the Model base class for Mobilenetv2""" - @property - def model_url(self) -> str: - """ - Gets model download url - Returns: model url - - """ - return "https://storage.googleapis.com/mobilenet_v2/checkpoints/mobilenet_v2_1.0_224.tgz" - - @property - def package_name(self) -> str: - """ - Gets the package name - Returns: package name - - """ - return "mobilenet_v2_1.0_224.tgz" - - -class MobilenetV3(Model): - """ Concrete implementation of the Model base class for Mobilenetv3""" - - @property - def model_url(self) -> str: - """ - Gets model download url - Returns: model url - - """ - return "https://storage.googleapis.com/mobilenet_v3/checkpoints/v3-large_224_1.0_float.tgz" - - @property - def package_name(self) -> str: - """ - Gets the package name - Returns: package name - - """ - return "v3-large_224_1.0_float.tgz" - - -def get_model(model: SupportedModels) -> Model: - """ - Factory method that returns the requested model object - Args: - model: model from SupportedModels enumeration - - Returns: Concrete object inheriting the Model base class - - """ - model_map = { - SupportedModels.inception_v1: InceptionV1(), - SupportedModels.inception_v2: InceptionV2(), - SupportedModels.inception_v3: InceptionV3(), - SupportedModels.inception_v4: InceptionV4(), - SupportedModels.mobilenet_v1: MobilenetV1(), - SupportedModels.mobilenet_v2: MobilenetV2(), - SupportedModels.mobilenet_v3: MobilenetV3(), - SupportedModels.resnet50_v1: Resnet50V1(), - SupportedModels.resnet101_v1: Resnet101V1() - } - return model_map.get(model) - - -if __name__ == "__main__": - parser = argparse.ArgumentParser(description='Prepare pre-trained model for COCO object detection') - parser.add_argument('--model_name', type=str, default='inception_v1', - help='model to download, default is inception_v1', - choices=["inception_v1", "inception_v2", "inception_v3", "inception_v4", - "mobilenet_v1", "mobilenet_v2", "mobilenet_v3", - "resnet50_v1", "resnet101_v1", - "resnet_v2_50", "resnet_v2_101", "resnet_v2_152"]) - parser.add_argument('--model_path', type=str, default='{0}/model'.format(os.getcwd()), - help='directory to put models, default is {0}/model'.format(os.getcwd())) - - args = parser.parse_args() - model_name = args.model_name - model_path = args.model_path - try: - model = get_model(SupportedModels(model_name)) - model.get_pretrained_model(model_path) - except AttributeError: - print("The model {0} is not supported. Supported models: {1}" - .format(model_name, SupportedModels.__members__.keys())) diff --git a/examples/tensorflow/image_recognition/tensorflow_models/quantization/ptq/requirements.txt b/examples/tensorflow/image_recognition/tensorflow_models/quantization/ptq/requirements.txt deleted file mode 100644 index 16ea87a7151..00000000000 --- a/examples/tensorflow/image_recognition/tensorflow_models/quantization/ptq/requirements.txt +++ /dev/null @@ -1,2 +0,0 @@ -intel-tensorflow -neural-compressor diff --git a/examples/tensorflow/image_recognition/tensorflow_models/quantization/ptq/resnet101.yaml b/examples/tensorflow/image_recognition/tensorflow_models/quantization/ptq/resnet101.yaml deleted file mode 100644 index 20d1fbaad94..00000000000 --- a/examples/tensorflow/image_recognition/tensorflow_models/quantization/ptq/resnet101.yaml +++ /dev/null @@ -1,77 +0,0 @@ -# -# Copyright (c) 2021 Intel Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -model: # mandatory. used to specify model specific information. - name: resnet_v1_101 - framework: tensorflow # mandatory. supported values are tensorflow, pytorch, pytorch_ipex, onnxrt_integer, onnxrt_qlinear or mxnet; allow new framework backend extension. - -device: cpu # optional. default value is cpu, other value is gpu. - -quantization: # optional. tuning constraints on model-wise for advance user to reduce tuning space. - calibration: - sampling_size: 50, 100 # optional. default value is 100. used to set how many samples should be used in calibration. - dataloader: - batch_size: 10 - dataset: - ImageRecord: - root: /path/to/calibration/dataset # NOTE: modify to calibration dataset location if needed - transform: - ResizeCropImagenet: - height: 224 - width: 224 - mean_value: [123.68, 116.78, 103.94] - model_wise: # optional. tuning constraints on model-wise for advance user to reduce tuning space. - activation: - algorithm: minmax - -evaluation: # optional. required if user doesn't provide eval_func in neural_compressor.Quantization. - accuracy: # optional. required if user doesn't provide eval_func in neural_compressor.Quantization. - metric: - topk: 1 # built-in metrics are topk, map, f1, allow user to register new metric. - dataloader: - batch_size: 32 - dataset: - ImageRecord: - root: /path/to/evaluation/dataset # NOTE: modify to evaluation dataset location if needed - transform: - ResizeCropImagenet: - height: 224 - width: 224 - mean_value: [123.68, 116.78, 103.94] - postprocess: - transform: - LabelShift: 1 - performance: # optional. used to benchmark performance of passing model. - iteration: 100 - configs: - cores_per_instance: 4 - num_of_instance: 7 - dataloader: - batch_size: 1 - dataset: - ImageRecord: - root: /path/to/evaluation/dataset - transform: - ResizeCropImagenet: - height: 224 - width: 224 - mean_value: [123.68, 116.78, 103.94] - -tuning: - accuracy_criterion: - relative: 0.01 # optional. default value is relative, other value is absolute. this example allows relative accuracy loss: 1%. - exit_policy: - timeout: 0 # optional. tuning timeout (seconds). default value is 0 which means early stop. combine with max_trials field to decide when to exit. - random_seed: 9527 # optional. random seed for deterministic tuning. diff --git a/examples/tensorflow/image_recognition/tensorflow_models/quantization/ptq/resnet101_itex.yaml b/examples/tensorflow/image_recognition/tensorflow_models/quantization/ptq/resnet101_itex.yaml deleted file mode 100644 index 27f43a3020a..00000000000 --- a/examples/tensorflow/image_recognition/tensorflow_models/quantization/ptq/resnet101_itex.yaml +++ /dev/null @@ -1,77 +0,0 @@ -# -# Copyright (c) 2021 Intel Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -model: # mandatory. used to specify model specific information. - name: resnet_v1_101 - framework: tensorflow_itex # mandatory. supported values are tensorflow, tensorflow_itex, pytorch, pytorch_ipex, onnxrt_integer, onnxrt_qlinear or mxnet; allow new framework backend extension. - -device: gpu # optional. set cpu if installed intel-extension-for-tensorflow[cpu], set gpu if installed intel-extension-for-tensorflow[gpu]. - -quantization: # optional. tuning constraints on model-wise for advance user to reduce tuning space. - calibration: - sampling_size: 50, 100 # optional. default value is 100. used to set how many samples should be used in calibration. - dataloader: - batch_size: 10 - dataset: - ImageRecord: - root: /path/to/calibration/dataset # NOTE: modify to calibration dataset location if needed - transform: - ResizeCropImagenet: - height: 224 - width: 224 - mean_value: [123.68, 116.78, 103.94] - model_wise: # optional. tuning constraints on model-wise for advance user to reduce tuning space. - activation: - algorithm: minmax - -evaluation: # optional. required if user doesn't provide eval_func in neural_compressor.Quantization. - accuracy: # optional. required if user doesn't provide eval_func in neural_compressor.Quantization. - metric: - topk: 1 # built-in metrics are topk, map, f1, allow user to register new metric. - dataloader: - batch_size: 32 - dataset: - ImageRecord: - root: /path/to/evaluation/dataset # NOTE: modify to evaluation dataset location if needed - transform: - ResizeCropImagenet: - height: 224 - width: 224 - mean_value: [123.68, 116.78, 103.94] - postprocess: - transform: - LabelShift: 1 - performance: # optional. used to benchmark performance of passing model. - iteration: 100 - configs: - cores_per_instance: 4 - num_of_instance: 7 - dataloader: - batch_size: 1 - dataset: - ImageRecord: - root: /path/to/evaluation/dataset - transform: - ResizeCropImagenet: - height: 224 - width: 224 - mean_value: [123.68, 116.78, 103.94] - -tuning: - accuracy_criterion: - relative: 0.01 # optional. default value is relative, other value is absolute. this example allows relative accuracy loss: 1%. - exit_policy: - timeout: 0 # optional. tuning timeout (seconds). default value is 0 which means early stop. combine with max_trials field to decide when to exit. - random_seed: 9527 # optional. random seed for deterministic tuning. diff --git a/examples/tensorflow/image_recognition/tensorflow_models/quantization/ptq/resnet50_v1.yaml b/examples/tensorflow/image_recognition/tensorflow_models/quantization/ptq/resnet50_v1.yaml deleted file mode 100644 index 3e59160457a..00000000000 --- a/examples/tensorflow/image_recognition/tensorflow_models/quantization/ptq/resnet50_v1.yaml +++ /dev/null @@ -1,71 +0,0 @@ -# -# Copyright (c) 2021 Intel Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -model: # mandatory. used to specify model specific information. - name: resnet50_v1 - framework: tensorflow # mandatory. supported values are tensorflow, pytorch, pytorch_ipex, onnxrt_integer, onnxrt_qlinear or mxnet; allow new framework backend extension. - -device: cpu # optional. default value is cpu, other value is gpu. - -quantization: # optional. tuning constraints on model-wise for advance user to reduce tuning space. - calibration: - sampling_size: 50, 100 # optional. default value is 100. used to set how many samples should be used in calibration. - dataloader: - batch_size: 10 - dataset: - ImageRecord: - root: /path/to/calibration/dataset # NOTE: modify to calibration dataset location if needed - transform: - ResizeCropImagenet: - height: 224 - width: 224 - model_wise: # optional. tuning constraints on model-wise for advance user to reduce tuning space. - activation: - algorithm: minmax - -evaluation: # optional. required if user doesn't provide eval_func in neural_compressor.Quantization. - accuracy: # optional. required if user doesn't provide eval_func in neural_compressor.Quantization. - metric: - topk: 1 # built-in metrics are topk, map, f1, allow user to register new metric. - dataloader: - batch_size: 32 - dataset: - ImageRecord: - root: /path/to/evaluation/dataset # NOTE: modify to evaluation dataset location if needed - transform: - ResizeCropImagenet: - height: 224 - width: 224 - performance: # optional. used to benchmark performance of passing model. - iteration: 100 - configs: - cores_per_instance: 4 - num_of_instance: 7 - dataloader: - batch_size: 1 - dataset: - ImageRecord: - root: /path/to/evaluation/dataset # NOTE: modify to evaluation dataset location if needed - transform: - BilinearImagenet: - height: 224 - width: 224 - -tuning: - accuracy_criterion: - relative: 0.01 # optional. default value is relative, other value is absolute. this example allows relative accuracy loss: 1%. - exit_policy: - timeout: 0 # optional. tuning timeout (seconds). default value is 0 which means early stop. combine with max_trials field to decide when to exit. - random_seed: 9527 # optional. random seed for deterministic tuning. diff --git a/examples/tensorflow/image_recognition/tensorflow_models/quantization/ptq/resnet50_v1_5.yaml b/examples/tensorflow/image_recognition/tensorflow_models/quantization/ptq/resnet50_v1_5.yaml deleted file mode 100644 index 6857e013035..00000000000 --- a/examples/tensorflow/image_recognition/tensorflow_models/quantization/ptq/resnet50_v1_5.yaml +++ /dev/null @@ -1,76 +0,0 @@ -# -# Copyright (c) 2021 Intel Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -version: 1.0 - -model: # mandatory. used to specify model specific information. - name: resnet50_v1_5 - framework: tensorflow # mandatory. supported values are tensorflow, pytorch, pytorch_ipex, onnxrt_integer, onnxrt_qlinear or mxnet; allow new framework backend extension. - outputs: softmax_tensor - -device: cpu # optional. default value is cpu, other value is gpu. - -quantization: # optional. tuning constraints on model-wise for advance user to reduce tuning space. - calibration: - sampling_size: 50, 100 # optional. default value is 100. used to set how many samples should be used in calibration. - dataloader: - batch_size: 10 - dataset: - ImageRecord: - root: /path/to/calibration/dataset # NOTE: modify to calibration dataset location if needed - transform: - ResizeCropImagenet: - height: 224 - width: 224 - mean_value: [123.68, 116.78, 103.94] - model_wise: # optional. tuning constraints on model-wise for advance user to reduce tuning space. - activation: - algorithm: minmax - -evaluation: # optional. required if user doesn't provide eval_func in neural_compressor.Quantization. - accuracy: # optional. required if user doesn't provide eval_func in neural_compressor.Quantization. - metric: - topk: 1 # built-in metrics are topk, map, f1, allow user to register new metric. - dataloader: - batch_size: 32 - dataset: - ImageRecord: - root: /path/to/evaluation/dataset # NOTE: modify to evaluation dataset location if needed - transform: - ResizeCropImagenet: - height: 224 - width: 224 - mean_value: [123.68, 116.78, 103.94] - performance: # optional. used to benchmark performance of passing model. - configs: - cores_per_instance: 4 - num_of_instance: 7 - dataloader: - batch_size: 1 - dataset: - ImageRecord: - root: /path/to/calibration/dataset - transform: - ResizeCropImagenet: - height: 224 - width: 224 - mean_value: [123.68, 116.78, 103.94] - -tuning: - accuracy_criterion: - relative: 0.01 # optional. default value is relative, other value is absolute. this example allows relative accuracy loss: 1%. - exit_policy: - timeout: 0 # optional. tuning timeout (seconds). default value is 0 which means early stop. combine with max_trials field to decide when to exit. - random_seed: 9527 # optional. random seed for deterministic tuning. diff --git a/examples/tensorflow/image_recognition/tensorflow_models/quantization/ptq/resnet50_v1_5_itex.yaml b/examples/tensorflow/image_recognition/tensorflow_models/quantization/ptq/resnet50_v1_5_itex.yaml deleted file mode 100644 index 49e9a7f674c..00000000000 --- a/examples/tensorflow/image_recognition/tensorflow_models/quantization/ptq/resnet50_v1_5_itex.yaml +++ /dev/null @@ -1,74 +0,0 @@ -# -# Copyright (c) 2021 Intel Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -model: # mandatory. used to specify model specific information. - name: resnet50_v1_5 - framework: tensorflow_itex # mandatory. supported values are tensorflow, tensorflow_itex, pytorch, pytorch_ipex, onnxrt_integer, onnxrt_qlinear or mxnet; allow new framework backend extension. - outputs: softmax_tensor - -device: gpu # optional. set cpu if installed intel-extension-for-tensorflow[cpu], set gpu if installed intel-extension-for-tensorflow[gpu]. - -quantization: # optional. tuning constraints on model-wise for advance user to reduce tuning space. - calibration: - sampling_size: 50, 100 # optional. default value is 100. used to set how many samples should be used in calibration. - dataloader: - batch_size: 10 - dataset: - ImageRecord: - root: /path/to/calibration/dataset # NOTE: modify to calibration dataset location if needed - transform: - ResizeCropImagenet: - height: 224 - width: 224 - mean_value: [123.68, 116.78, 103.94] - model_wise: # optional. tuning constraints on model-wise for advance user to reduce tuning space. - activation: - algorithm: minmax - -evaluation: # optional. required if user doesn't provide eval_func in neural_compressor.Quantization. - accuracy: # optional. required if user doesn't provide eval_func in neural_compressor.Quantization. - metric: - topk: 1 # built-in metrics are topk, map, f1, allow user to register new metric. - dataloader: - batch_size: 32 - dataset: - ImageRecord: - root: /path/to/evaluation/dataset # NOTE: modify to evaluation dataset location if needed - transform: - ResizeCropImagenet: - height: 224 - width: 224 - mean_value: [123.68, 116.78, 103.94] - performance: # optional. used to benchmark performance of passing model. - configs: - cores_per_instance: 4 - num_of_instance: 7 - dataloader: - batch_size: 1 - dataset: - ImageRecord: - root: /path/to/evaluation/dataset - transform: - ResizeCropImagenet: - height: 224 - width: 224 - mean_value: [123.68, 116.78, 103.94] - -tuning: - accuracy_criterion: - relative: 0.01 # optional. default value is relative, other value is absolute. this example allows relative accuracy loss: 1%. - exit_policy: - timeout: 0 # optional. tuning timeout (seconds). default value is 0 which means early stop. combine with max_trials field to decide when to exit. - random_seed: 9527 # optional. random seed for deterministic tuning. diff --git a/examples/tensorflow/image_recognition/tensorflow_models/quantization/ptq/resnet50_v1_itex.yaml b/examples/tensorflow/image_recognition/tensorflow_models/quantization/ptq/resnet50_v1_itex.yaml deleted file mode 100644 index fdadaf7d412..00000000000 --- a/examples/tensorflow/image_recognition/tensorflow_models/quantization/ptq/resnet50_v1_itex.yaml +++ /dev/null @@ -1,71 +0,0 @@ -# -# Copyright (c) 2021 Intel Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -model: # mandatory. used to specify model specific information. - name: resnet50_v1 - framework: tensorflow_itex # mandatory. supported values are tensorflow, tensorflow_itex, pytorch, pytorch_ipex, onnxrt_integer, onnxrt_qlinear or mxnet; allow new framework backend extension. - -device: gpu # optional. set cpu if installed intel-extension-for-tensorflow[cpu], set gpu if installed intel-extension-for-tensorflow[gpu]. - -quantization: # optional. tuning constraints on model-wise for advance user to reduce tuning space. - calibration: - sampling_size: 50, 100 # optional. default value is 100. used to set how many samples should be used in calibration. - dataloader: - batch_size: 10 - dataset: - ImageRecord: - root: /path/to/calibration/dataset # NOTE: modify to calibration dataset location if needed - transform: - ResizeCropImagenet: - height: 224 - width: 224 - model_wise: # optional. tuning constraints on model-wise for advance user to reduce tuning space. - activation: - algorithm: minmax - -evaluation: # optional. required if user doesn't provide eval_func in neural_compressor.Quantization. - accuracy: # optional. required if user doesn't provide eval_func in neural_compressor.Quantization. - metric: - topk: 1 # built-in metrics are topk, map, f1, allow user to register new metric. - dataloader: - batch_size: 32 - dataset: - ImageRecord: - root: /path/to/evaluation/dataset # NOTE: modify to evaluation dataset location if needed - transform: - ResizeCropImagenet: - height: 224 - width: 224 - performance: # optional. used to benchmark performance of passing model. - iteration: 100 - configs: - cores_per_instance: 4 - num_of_instance: 7 - dataloader: - batch_size: 1 - dataset: - ImageRecord: - root: /path/to/evaluation/dataset # NOTE: modify to evaluation dataset location if needed - transform: - BilinearImagenet: - height: 224 - width: 224 - -tuning: - accuracy_criterion: - relative: 0.01 # optional. default value is relative, other value is absolute. this example allows relative accuracy loss: 1%. - exit_policy: - timeout: 0 # optional. tuning timeout (seconds). default value is 0 which means early stop. combine with max_trials field to decide when to exit. - random_seed: 9527 # optional. random seed for deterministic tuning. diff --git a/examples/tensorflow/image_recognition/tensorflow_models/quantization/ptq/resnet_v2_101.yaml b/examples/tensorflow/image_recognition/tensorflow_models/quantization/ptq/resnet_v2_101.yaml deleted file mode 100644 index bbdaeed4511..00000000000 --- a/examples/tensorflow/image_recognition/tensorflow_models/quantization/ptq/resnet_v2_101.yaml +++ /dev/null @@ -1,70 +0,0 @@ -# -# Copyright (c) 2021 Intel Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -model: # mandatory. used to specify model specific information. - name: resnet_v2_101 - framework: tensorflow # mandatory. supported values are tensorflow, pytorch, pytorch_ipex, onnxrt_integer, onnxrt_qlinear or mxnet; allow new framework backend extension. - -device: cpu # optional. default value is cpu, other value is gpu. - -quantization: # optional. tuning constraints on model-wise for advance user to reduce tuning space. - calibration: - sampling_size: 50, 100 # optional. default value is 100. used to set how many samples should be used in calibration. - dataloader: - batch_size: 10 - dataset: - ImageRecord: - root: /path/to/calibration/dataset # NOTE: modify to calibration dataset location if needed - transform: - BilinearImagenet: - height: 224 - width: 224 - model_wise: # optional. tuning constraints on model-wise for advance user to reduce tuning space. - activation: - algorithm: minmax - -evaluation: # optional. required if user doesn't provide eval_func in neural_compressor.Quantization. - accuracy: # optional. required if user doesn't provide eval_func in neural_compressor.Quantization. - metric: - topk: 1 # built-in metrics are topk, map, f1, allow user to register new metric. - dataloader: - batch_size: 32 - dataset: - ImageRecord: - root: /path/to/evaluation/dataset # NOTE: modify to evaluation dataset location if needed - transform: - BilinearImagenet: - height: 224 - width: 224 - performance: # optional. used to benchmark performance of passing model. - configs: - cores_per_instance: 4 - num_of_instance: 7 - dataloader: - batch_size: 1 - dataset: - ImageRecord: - root: /path/to/evaluation/dataset # NOTE: modify to evaluation dataset location if needed - transform: - BilinearImagenet: - height: 224 - width: 224 - -tuning: - accuracy_criterion: - relative: 0.01 # optional. default value is relative, other value is absolute. this example allows relative accuracy loss: 1%. - exit_policy: - timeout: 0 # optional. tuning timeout (seconds). default value is 0 which means early stop. combine with max_trials field to decide when to exit. - random_seed: 9527 # optional. random seed for deterministic tuning. diff --git a/examples/tensorflow/image_recognition/tensorflow_models/quantization/ptq/resnet_v2_101_itex.yaml b/examples/tensorflow/image_recognition/tensorflow_models/quantization/ptq/resnet_v2_101_itex.yaml deleted file mode 100644 index 69ed4cc2a73..00000000000 --- a/examples/tensorflow/image_recognition/tensorflow_models/quantization/ptq/resnet_v2_101_itex.yaml +++ /dev/null @@ -1,70 +0,0 @@ -# -# Copyright (c) 2021 Intel Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -model: # mandatory. used to specify model specific information. - name: resnet_v2_101 - framework: tensorflow_itex # mandatory. supported values are tensorflow, tensorflow_itex, pytorch, pytorch_ipex, onnxrt_integer, onnxrt_qlinear or mxnet; allow new framework backend extension. - -device: gpu # optional. set cpu if installed intel-extension-for-tensorflow[cpu], set gpu if installed intel-extension-for-tensorflow[gpu]. - -quantization: # optional. tuning constraints on model-wise for advance user to reduce tuning space. - calibration: - sampling_size: 50, 100 # optional. default value is 100. used to set how many samples should be used in calibration. - dataloader: - batch_size: 10 - dataset: - ImageRecord: - root: /path/to/calibration/dataset # NOTE: modify to calibration dataset location if needed - transform: - BilinearImagenet: - height: 224 - width: 224 - model_wise: # optional. tuning constraints on model-wise for advance user to reduce tuning space. - activation: - algorithm: minmax - -evaluation: # optional. required if user doesn't provide eval_func in neural_compressor.Quantization. - accuracy: # optional. required if user doesn't provide eval_func in neural_compressor.Quantization. - metric: - topk: 1 # built-in metrics are topk, map, f1, allow user to register new metric. - dataloader: - batch_size: 32 - dataset: - ImageRecord: - root: /path/to/evaluation/dataset # NOTE: modify to evaluation dataset location if needed - transform: - BilinearImagenet: - height: 224 - width: 224 - performance: # optional. used to benchmark performance of passing model. - configs: - cores_per_instance: 4 - num_of_instance: 7 - dataloader: - batch_size: 1 - dataset: - ImageRecord: - root: /path/to/evaluation/dataset # NOTE: modify to evaluation dataset location if needed - transform: - BilinearImagenet: - height: 224 - width: 224 - -tuning: - accuracy_criterion: - relative: 0.01 # optional. default value is relative, other value is absolute. this example allows relative accuracy loss: 1%. - exit_policy: - timeout: 0 # optional. tuning timeout (seconds). default value is 0 which means early stop. combine with max_trials field to decide when to exit. - random_seed: 9527 # optional. random seed for deterministic tuning. diff --git a/examples/tensorflow/image_recognition/tensorflow_models/quantization/ptq/resnet_v2_152.yaml b/examples/tensorflow/image_recognition/tensorflow_models/quantization/ptq/resnet_v2_152.yaml deleted file mode 100644 index 398e8115237..00000000000 --- a/examples/tensorflow/image_recognition/tensorflow_models/quantization/ptq/resnet_v2_152.yaml +++ /dev/null @@ -1,70 +0,0 @@ -# -# Copyright (c) 2021 Intel Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -model: # mandatory. used to specify model specific information. - name: resnet_v2_152 - framework: tensorflow # mandatory. supported values are tensorflow, pytorch, pytorch_ipex, onnxrt_integer, onnxrt_qlinear or mxnet; allow new framework backend extension. - -device: cpu # optional. default value is cpu, other value is gpu. - -quantization: # optional. tuning constraints on model-wise for advance user to reduce tuning space. - calibration: - sampling_size: 50, 100 # optional. default value is 100. used to set how many samples should be used in calibration. - dataloader: - batch_size: 10 - dataset: - ImageRecord: - root: /path/to/calibration/dataset # NOTE: modify to calibration dataset location if needed - transform: - BilinearImagenet: - height: 224 - width: 224 - model_wise: # optional. tuning constraints on model-wise for advance user to reduce tuning space. - activation: - algorithm: minmax - -evaluation: # optional. required if user doesn't provide eval_func in neural_compressor.Quantization. - accuracy: # optional. required if user doesn't provide eval_func in neural_compressor.Quantization. - metric: - topk: 1 # built-in metrics are topk, map, f1, allow user to register new metric. - dataloader: - batch_size: 32 - dataset: - ImageRecord: - root: /path/to/evaluation/dataset # NOTE: modify to evaluation dataset location if needed - transform: - BilinearImagenet: - height: 224 - width: 224 - performance: # optional. used to benchmark performance of passing model. - configs: - cores_per_instance: 4 - num_of_instance: 7 - dataloader: - batch_size: 1 - dataset: - ImageRecord: - root: /path/to/evaluation/dataset # NOTE: modify to evaluation dataset location if needed - transform: - BilinearImagenet: - height: 224 - width: 224 - -tuning: - accuracy_criterion: - relative: 0.01 # optional. default value is relative, other value is absolute. this example allows relative accuracy loss: 1%. - exit_policy: - timeout: 0 # optional. tuning timeout (seconds). default value is 0 which means early stop. combine with max_trials field to decide when to exit. - random_seed: 9527 # optional. random seed for deterministic tuning. diff --git a/examples/tensorflow/image_recognition/tensorflow_models/quantization/ptq/resnet_v2_152_itex.yaml b/examples/tensorflow/image_recognition/tensorflow_models/quantization/ptq/resnet_v2_152_itex.yaml deleted file mode 100644 index 44e1e3dff49..00000000000 --- a/examples/tensorflow/image_recognition/tensorflow_models/quantization/ptq/resnet_v2_152_itex.yaml +++ /dev/null @@ -1,70 +0,0 @@ -# -# Copyright (c) 2021 Intel Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -model: # mandatory. used to specify model specific information. - name: resnet_v2_152 - framework: tensorflow_itex # mandatory. supported values are tensorflow, tensorflow_itex, pytorch, pytorch_ipex, onnxrt_integer, onnxrt_qlinear or mxnet; allow new framework backend extension. - -device: gpu # optional. set cpu if installed intel-extension-for-tensorflow[cpu], set gpu if installed intel-extension-for-tensorflow[gpu]. - -quantization: # optional. tuning constraints on model-wise for advance user to reduce tuning space. - calibration: - sampling_size: 50, 100 # optional. default value is 100. used to set how many samples should be used in calibration. - dataloader: - batch_size: 10 - dataset: - ImageRecord: - root: /path/to/calibration/dataset # NOTE: modify to calibration dataset location if needed - transform: - BilinearImagenet: - height: 224 - width: 224 - model_wise: # optional. tuning constraints on model-wise for advance user to reduce tuning space. - activation: - algorithm: minmax - -evaluation: # optional. required if user doesn't provide eval_func in neural_compressor.Quantization. - accuracy: # optional. required if user doesn't provide eval_func in neural_compressor.Quantization. - metric: - topk: 1 # built-in metrics are topk, map, f1, allow user to register new metric. - dataloader: - batch_size: 32 - dataset: - ImageRecord: - root: /path/to/evaluation/dataset # NOTE: modify to evaluation dataset location if needed - transform: - BilinearImagenet: - height: 224 - width: 224 - performance: # optional. used to benchmark performance of passing model. - configs: - cores_per_instance: 4 - num_of_instance: 7 - dataloader: - batch_size: 1 - dataset: - ImageRecord: - root: /path/to/evaluation/dataset # NOTE: modify to evaluation dataset location if needed - transform: - BilinearImagenet: - height: 224 - width: 224 - -tuning: - accuracy_criterion: - relative: 0.01 # optional. default value is relative, other value is absolute. this example allows relative accuracy loss: 1%. - exit_policy: - timeout: 0 # optional. tuning timeout (seconds). default value is 0 which means early stop. combine with max_trials field to decide when to exit. - random_seed: 9527 # optional. random seed for deterministic tuning. diff --git a/examples/tensorflow/image_recognition/tensorflow_models/quantization/ptq/resnet_v2_50.yaml b/examples/tensorflow/image_recognition/tensorflow_models/quantization/ptq/resnet_v2_50.yaml deleted file mode 100644 index f3f996c85cd..00000000000 --- a/examples/tensorflow/image_recognition/tensorflow_models/quantization/ptq/resnet_v2_50.yaml +++ /dev/null @@ -1,70 +0,0 @@ -# -# Copyright (c) 2021 Intel Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -model: # mandatory. used to specify model specific information. - name: resnet_v2_50 - framework: tensorflow # mandatory. supported values are tensorflow, pytorch, pytorch_ipex, onnxrt_integer, onnxrt_qlinear or mxnet; allow new framework backend extension. - -device: cpu # optional. default value is cpu, other value is gpu. - -quantization: # optional. tuning constraints on model-wise for advance user to reduce tuning space. - calibration: - sampling_size: 50, 100 # optional. default value is 100. used to set how many samples should be used in calibration. - dataloader: - batch_size: 10 - dataset: - ImageRecord: - root: /path/to/calibration/dataset # NOTE: modify to calibration dataset location if needed - transform: - BilinearImagenet: - height: 224 - width: 224 - model_wise: # optional. tuning constraints on model-wise for advance user to reduce tuning space. - activation: - algorithm: minmax - -evaluation: # optional. required if user doesn't provide eval_func in neural_compressor.Quantization. - accuracy: # optional. required if user doesn't provide eval_func in neural_compressor.Quantization. - metric: - topk: 1 # built-in metrics are topk, map, f1, allow user to register new metric. - dataloader: - batch_size: 32 - dataset: - ImageRecord: - root: /path/to/evaluation/dataset # NOTE: modify to evaluation dataset location if needed - transform: - BilinearImagenet: - height: 224 - width: 224 - performance: # optional. used to benchmark performance of passing model. - configs: - cores_per_instance: 4 - num_of_instance: 7 - dataloader: - batch_size: 1 - dataset: - ImageRecord: - root: /path/to/evaluation/dataset # NOTE: modify to evaluation dataset location if needed - transform: - BilinearImagenet: - height: 224 - width: 224 - -tuning: - accuracy_criterion: - relative: 0.01 # optional. default value is relative, other value is absolute. this example allows relative accuracy loss: 1%. - exit_policy: - timeout: 0 # optional. tuning timeout (seconds). default value is 0 which means early stop. combine with max_trials field to decide when to exit. - random_seed: 9527 # optional. random seed for deterministic tuning. diff --git a/examples/tensorflow/image_recognition/tensorflow_models/quantization/ptq/resnet_v2_50_itex.yaml b/examples/tensorflow/image_recognition/tensorflow_models/quantization/ptq/resnet_v2_50_itex.yaml deleted file mode 100644 index 28efda5dee8..00000000000 --- a/examples/tensorflow/image_recognition/tensorflow_models/quantization/ptq/resnet_v2_50_itex.yaml +++ /dev/null @@ -1,70 +0,0 @@ -# -# Copyright (c) 2021 Intel Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -model: # mandatory. used to specify model specific information. - name: resnet_v2_50 - framework: tensorflow_itex # mandatory. supported values are tensorflow, tensorflow_itex, pytorch, pytorch_ipex, onnxrt_integer, onnxrt_qlinear or mxnet; allow new framework backend extension. - -device: gpu # optional. set cpu if installed intel-extension-for-tensorflow[cpu], set gpu if installed intel-extension-for-tensorflow[gpu]. - -quantization: # optional. tuning constraints on model-wise for advance user to reduce tuning space. - calibration: - sampling_size: 50, 100 # optional. default value is 100. used to set how many samples should be used in calibration. - dataloader: - batch_size: 10 - dataset: - ImageRecord: - root: /path/to/calibration/dataset # NOTE: modify to calibration dataset location if needed - transform: - BilinearImagenet: - height: 224 - width: 224 - model_wise: # optional. tuning constraints on model-wise for advance user to reduce tuning space. - activation: - algorithm: minmax - -evaluation: # optional. required if user doesn't provide eval_func in neural_compressor.Quantization. - accuracy: # optional. required if user doesn't provide eval_func in neural_compressor.Quantization. - metric: - topk: 1 # built-in metrics are topk, map, f1, allow user to register new metric. - dataloader: - batch_size: 32 - dataset: - ImageRecord: - root: /path/to/evaluation/dataset # NOTE: modify to evaluation dataset location if needed - transform: - BilinearImagenet: - height: 224 - width: 224 - performance: # optional. used to benchmark performance of passing model. - configs: - cores_per_instance: 4 - num_of_instance: 7 - dataloader: - batch_size: 1 - dataset: - ImageRecord: - root: /path/to/evaluation/dataset # NOTE: modify to evaluation dataset location if needed - transform: - BilinearImagenet: - height: 224 - width: 224 - -tuning: - accuracy_criterion: - relative: 0.01 # optional. default value is relative, other value is absolute. this example allows relative accuracy loss: 1%. - exit_policy: - timeout: 0 # optional. tuning timeout (seconds). default value is 0 which means early stop. combine with max_trials field to decide when to exit. - random_seed: 9527 # optional. random seed for deterministic tuning. diff --git a/examples/tensorflow/image_recognition/tensorflow_models/quantization/ptq/run_benchmark.sh b/examples/tensorflow/image_recognition/tensorflow_models/quantization/ptq/run_benchmark.sh deleted file mode 100644 index 2304baaea20..00000000000 --- a/examples/tensorflow/image_recognition/tensorflow_models/quantization/ptq/run_benchmark.sh +++ /dev/null @@ -1,40 +0,0 @@ -#!/bin/bash -set -x - -function main { - - init_params "$@" - run_benchmark - -} - -# init params -function init_params { - for var in "$@" - do - case $var in - --config=*) - config=$(echo $var |cut -f2 -d=) - ;; - --input_model=*) - input_model=$(echo $var |cut -f2 -d=) - ;; - --mode=*) - mode=$(echo $var |cut -f2 -d=) - ;; - esac - done - -} - -# run_tuning -function run_benchmark { - - python main.py \ - --input-graph ${input_model} \ - --config ${config} \ - --mode ${mode} \ - --benchmark -} - -main "$@" diff --git a/examples/tensorflow/image_recognition/tensorflow_models/quantization/ptq/run_tuning.sh b/examples/tensorflow/image_recognition/tensorflow_models/quantization/ptq/run_tuning.sh deleted file mode 100644 index 5ecbd6b31ea..00000000000 --- a/examples/tensorflow/image_recognition/tensorflow_models/quantization/ptq/run_tuning.sh +++ /dev/null @@ -1,39 +0,0 @@ -#!/bin/bash -set -x - -function main { - init_params "$@" - run_tuning - -} - -# init params -function init_params { - - for var in "$@" - do - case $var in - --config=*) - config=$(echo $var |cut -f2 -d=) - ;; - --input_model=*) - input_model=$(echo $var |cut -f2 -d=) - ;; - --output_model=*) - output_model=$(echo $var |cut -f2 -d=) - ;; - esac - done - -} - -# run_tuning -function run_tuning { - python main.py \ - --input-graph ${input_model} \ - --output-graph ${output_model} \ - --config ${config} \ - --tune -} - -main "$@" diff --git a/examples/tensorflow/image_recognition/tensorflow_models/quantization/ptq/slim/README.md b/examples/tensorflow/image_recognition/tensorflow_models/quantization/ptq/slim/README.md deleted file mode 100644 index fc625d75b72..00000000000 --- a/examples/tensorflow/image_recognition/tensorflow_models/quantization/ptq/slim/README.md +++ /dev/null @@ -1,294 +0,0 @@ -Step-by-Step -============ - -This document is used to list steps of reproducing Intel Optimized TensorFlow slim models tuning result. - -> **Note**: -> Slim models are only supported in Intel optimized TF 1.15.x. We use 1.15.2 as an example. - -# Prerequisite - -### 1. Installation - Recommend python 3.6 or higher version. - - ```shell - pip install -r requirements.txt - - ``` - -### 2. Prepare Dataset - - TensorFlow [models](https://github.com/tensorflow/models) repo provides [scripts and instructions](https://github.com/tensorflow/models/tree/master/research/slim#an-automated-script-for-processing-imagenet-data) to download, process and convert the ImageNet dataset to the TF records format. - We also prepared related scripts in `imagenet_prepare` directory. To download the raw images, the user must create an account with image-net.org. If you have downloaded the raw data and preprocessed the validation data by moving the images into the appropriate sub-directory based on the label (synset) of the image. we can use below command ro convert it to tf records format. - - ```shell - cd examples/tensorflow/image_recognition/tensorflow_models/quantization/ptq - # convert validation subset - bash prepare_dataset.sh --output_dir=./data --raw_dir=/PATH/TO/img_raw/val/ --subset=validation - # convert train subset - bash prepare_dataset.sh --output_dir=./data --raw_dir=/PATH/TO/img_raw/train/ --subset=train - ``` - -### 3. Prepare pre-trained model - This tool support slim ckpt file as input for TensorFlow backend, so we can directly download the ckpt model. The demonstrated models are in Google [models](https://github.com/tensorflow/models/tree/master/research/slim#pre-trained-models). We will give a example with Inception_v1: - - Download the checkpoint file from [here](https://github.com/tensorflow/models/tree/master/research/slim#pre-trained-models) - ```shell - wget http://download.tensorflow.org/models/inception_v1_2016_08_28.tar.gz - tar -xvf inception_v1_2016_08_28.tar.gz - ``` -> **Note**: -> slim model need module tf_slim by default and to run the slim nets, user specific model should define model_func, and arg_scope and register use TFSlimNetsFactory's register API, there is an example model inception_v4.py and registered in main.py - ```python - factory = TFSlimNetsFactory() - input_shape = [None, 299, 299, 3] - factory.register('inception_v4', inception_v4, input_shape, inception_v4_arg_scope) - - ``` -> tf_slim default supported nets are: ['alexnet_v2', 'overfeat', 'vgg_a', 'vgg_16', 'vgg_19', 'inception_v1', 'inception_v2', 'inception_v3','resnet_v1_50', 'resnet_v1_101', 'resnet_v1_152', 'resnet_v1_200','resnet_v2_50', 'resnet_v2_101', 'resnet_v2_152', 'resnet_v2_200'] -> make sure you input_graph name like the default nets, eg: vgg_16.ckpt will map to nets vgg_16 while vgg16 will throw a not found error. - -# Run -## tune - ./run_tuning.sh --config=model.yaml --input_model=/path/to/input_model.ckpt --output=/path/to/save/nc_tuned.pb - -## benchmark - ./run_tuning.sh --config=model.yaml --input_model=/path/to/nc_tuned.pb - -### 1. resnet_v1_50 - - ```shell - cd examples/tensorflow/image_recognition/tensorflow_models/quantization/ptq/slim - bash run_tuning.sh --config=resnet_v1_50.yaml \ - --input_model=/PATH/TO/resnet_v1_50.ckpt \ - --output_model=./nc_resnet_v1_50.pb - ``` - -### 2. resnet_v1_101 - - ```shell - cd examples/tensorflow/image_recognition/tensorflow_models/quantization/ptq/slim - bash run_tuning.sh --config=../resnet101.yaml \ - --input_model=/PATH/TO/resnet_v1_101.ckpt \ - --output_model=./nc_resnet_v1_101.pb - ``` - -### 3. resnet_v1_152 - - ```shell - cd examples/tensorflow/image_recognition/tensorflow_models/quantization/ptq/slim - bash run_tuning.sh --config=resnet_v1_152.yaml \ - --input_model=/PATH/TO/resnet_v1_152.ckpt \ - --output_model=./nc_resnet_v1_152.pb - - ``` - -### 4. resnet_v2_50 - - ```shell - cd examples/tensorflow/image_recognition/tensorflow_models/quantization/ptq/slim - bash run_tuning.sh --config=../resnet_v2_50.yaml \ - --input_model=/PATH/TO/resnet_v2_50.ckpt \ - --output_model=./nc_resnet_v2_50.pb - - ``` - -### 5. resnet_v2_101 - - ```shell - cd examples/tensorflow/image_recognition/tensorflow_models/quantization/ptq/slim - bash run_tuning.sh --config=../resnet_v2_101.yaml \ - --input_model=/PATH/TO/resnet_v2_101.ckpt \ - --output_model=./nc_resnet_v2_101.pb - - ``` - -### 6. resnet_v2_152 - - ```shell - cd examples/tensorflow/image_recognition/tensorflow_models/quantization/ptq/slim - bash run_tuning.sh --config=../resnet_v2_152.yaml \ - --input_model=/PATH/TO/resnet_v2_152.ckpt \ - --output_model=./nc_resnet_v2_152.pb - - ``` - -### 7. inception_v1 - - ```shell - cd examples/tensorflow/image_recognition/tensorflow_models/quantization/ptq/slim - bash run_tuning.sh --config=../inception_v1.yaml \ - --input_model=/PATH/TO/inception_v1.ckpt \ - --output_model=./nc_inception_v1.pb - - ``` - -### 8. inception_v2 - - ```shell - cd examples/tensorflow/image_recognition/tensorflow_models/quantization/ptq/slim - bash run_tuning.sh --config=../inception_v2.yaml \ - --input_model=/PATH/TO/inception_v2.ckpt \ - --output_model=./nc_inception_v2.pb - ``` - -### 9. inception_v3 - - ```shell - cd examples/tensorflow/image_recognition/tensorflow_models/quantization/ptq/slim - bash run_tuning.sh --config=inception_v3.yaml \ - --input_model=/PATH/TO/inception_v3.ckpt \ - --output_model=./nc_inception_v3.pb - ``` - -### 10. inception_v4 - - ```shell - cd examples/tensorflow/image_recognition/tensorflow_models/quantization/ptq/slim - bash run_tuning.sh --config=../inception_v4.yaml \ - --input_model=/PATH/TO/inception_v4.ckpt \ - --output_model=./nc_inception_v4.pb - ``` - -### 11. vgg16 - - ```shell - cd examples/tensorflow/image_recognition/tensorflow_models/quantization/ptq/slim - bash run_tuning.sh --config=../vgg16.yaml \ - --input_model=/PATH/TO/vgg_16.ckpt \ - --output_model=./nc_vgg_16.pb - ``` - -### 12. vgg19 - - ```shell - cd examples/tensorflow/image_recognition/tensorflow_models/quantization/ptq/slim - bash run_tuning.sh --config=../vgg19.yaml \ - --input_model=/PATH/TO/vgg_19.ckpt \ - --output_model=./nc_vgg_19.pb - ``` - -Examples of enabling Intel® Neural Compressor auto tuning on TensorFlow Inception V1 -======================================================= - -This is a tutorial of how to enable a TensorFlow slim model with Intel® Neural Compressor. - -# User Code Analysis - -Intel® Neural Compressor supports two usages: - -1. User specifies fp32 "model", yaml configured calibration dataloader in calibration field and evaluation dataloader in evaluation field, metric in tuning.metric field of model-specific yaml config file. - -> *Note*: -> you should change the model-specific yaml file dataset path to your own dataset path - -2. User specifies fp32 "model", calibration dataset "q_dataloader" and a custom "eval_func" which encapsulates the evaluation dataset and metric by itself. - -As Inception V1 is a typical image recognition model, use Top-K as metric which is built-in supported by Intel® Neural Compressor. It's easy to directly use 1 method that to configure a yaml file. - -### Write Yaml config file - -In examples directory, there is a template.yaml. We could remove most of the items and only keep mandatory item for tuning. - - -```yaml -# inceptionv1.yaml - -model: # mandatory. used to specify model specific information. - name: inceptionv1 - framework: tensorflow # mandatory. supported values are tensorflow, pytorch, pytorch_ipex, onnxrt_integer, onnxrt_qlinear or mxnet; allow new framework backend extension. - inputs: input - outputs: InceptionV1/Logits/Predictions/Reshape_1 - -device: cpu # optional. default value is cpu, other value is gpu. - -quantization: # optional. tuning constraints on model-wise for advance user to reduce tuning space. - calibration: - sampling_size: 5, 10 # optional. default value is 100. used to set how many samples should be used in calibration. - dataloader: - dataset: - ImageRecord: - root: /path/to/calibration/dataset # NOTE: modify to calibration dataset location if needed - transform: - ResizeCropImagenet: - height: 224 - width: 224 - mean_value: [123.68, 116.78, 103.94] - model_wise: # optional. tuning constraints on model-wise for advance user to reduce tuning space. - activation: - algorithm: minmax - -evaluation: # optional. required if user doesn't provide eval_func in neural_compressor.Quantization. - accuracy: # optional. required if user doesn't provide eval_func in neural_compressor.Quantization. - metric: - topk: 1 # built-in metrics are topk, map, f1, allow user to register new metric. - dataloader: - batch_size: 10 - dataset: - ImageRecord: - root: /path/to/evaluation/dataset # NOTE: modify to evaluation dataset location if needed - transform: - ResizeCropImagenet: - height: 224 - width: 224 - mean_value: [123.68, 116.78, 103.94] - performance: # optional. used to benchmark performance of passing model. - configs: - cores_per_instance: 4 - num_of_instance: 7 - dataloader: - batch_size: 1 - dataset: - ImageRecord: - root: /path/to/evaluation/dataset # NOTE: modify to evaluation dataset location if needed - transform: - ResizeCropImagenet: - height: 224 - width: 224 - mean_value: [123.68, 116.78, 103.94] - -tuning: - accuracy_criterion: - relative: 0.01 # optional. default value is relative, other value is absolute. this example allows relative accuracy loss: 1%. - exit_policy: - timeout: 0 # optional. tuning timeout (seconds). default value is 0 which means early stop. combine with max_trials field to decide when to exit. - random_seed: 9527 # optional. random seed for deterministic tuning. - -``` - -Here we choose topk built-in metric and set accuracy target as tolerating 0.01 relative accuracy loss of baseline. The default tuning strategy is basic strategy. The timeout 0 means early stop as well as a tuning config meet accuracy target. - -### prepare - -There are three preparation steps in here: -1. Prepare environment -```shell -pip install intel-tensorflow==1.15.2 neural_compressor -``` -2. Prepare the ImageNet dataset and pretrained ckpt file -```shell -wget http://download.tensorflow.org/models/inception_v1_2016_08_28.tar.gz -``` - -### code update - -This tool support tune and benchmark the model, when in the tune phase, make sure to use get_slim_graph to get the slim graph and transfer to the tool - -```python - - from neural_compressor.experimental import Quantization - from neural_compressor.adaptor.tf_utils.util import get_slim_graph - quantizer = Quantization(self.args.config) - slim_graph = get_slim_graph(args.input_graph, model_func, arg_scope, images, **kwargs) - q_model = quantizer(slim_graph) - save(q_model, args.output_graph) -``` - -when in benchmark phase: - -```python - - from neural_compressor.experimental import Benchmark - evaluator = Benchmark(args.config) - results = evaluator(model=args.input_graph) -``` diff --git a/examples/tensorflow/image_recognition/tensorflow_models/quantization/ptq/slim/inception_utils.py b/examples/tensorflow/image_recognition/tensorflow_models/quantization/ptq/slim/inception_utils.py deleted file mode 100644 index af12859456f..00000000000 --- a/examples/tensorflow/image_recognition/tensorflow_models/quantization/ptq/slim/inception_utils.py +++ /dev/null @@ -1,82 +0,0 @@ -# Copyright 2016 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Contains common code shared by all inception models. - -Usage of arg scope: - with slim.arg_scope(inception_arg_scope()): - logits, end_points = inception.inception_v3(images, num_classes, - is_training=is_training) - -""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import tensorflow.compat.v1 as tf -import tf_slim as slim - - -def inception_arg_scope( - weight_decay=0.00004, - use_batch_norm=True, - batch_norm_decay=0.9997, - batch_norm_epsilon=0.001, - activation_fn=tf.nn.relu, - batch_norm_updates_collections=tf.GraphKeys.UPDATE_OPS, - batch_norm_scale=False): - """Defines the default arg scope for inception models. - - Args: - weight_decay: The weight decay to use for regularizing the model. - use_batch_norm: "If `True`, batch_norm is applied after each convolution. - batch_norm_decay: Decay for batch norm moving average. - batch_norm_epsilon: Small float added to variance to avoid dividing by zero - in batch norm. - activation_fn: Activation function for conv2d. - batch_norm_updates_collections: Collection for the update ops for - batch norm. - batch_norm_scale: If True, uses an explicit `gamma` multiplier to scale the - activations in the batch normalization layer. - - Returns: - An `arg_scope` to use for the inception models. - """ - batch_norm_params = { - # Decay for the moving averages. - 'decay': batch_norm_decay, - # epsilon to prevent 0s in variance. - 'epsilon': batch_norm_epsilon, - # collection containing update_ops. - 'updates_collections': batch_norm_updates_collections, - # use fused batch norm if possible. - 'fused': None, - 'scale': batch_norm_scale, - } - if use_batch_norm: - normalizer_fn = slim.batch_norm - normalizer_params = batch_norm_params - else: - normalizer_fn = None - normalizer_params = {} - # Set weight_decay for weights in Conv and FC layers. - with slim.arg_scope([slim.conv2d, slim.fully_connected], - weights_regularizer=slim.l2_regularizer(weight_decay)): - with slim.arg_scope( - [slim.conv2d], - weights_initializer=slim.variance_scaling_initializer(), - activation_fn=activation_fn, - normalizer_fn=normalizer_fn, - normalizer_params=normalizer_params) as sc: - return sc diff --git a/examples/tensorflow/image_recognition/tensorflow_models/quantization/ptq/slim/inception_v3.yaml b/examples/tensorflow/image_recognition/tensorflow_models/quantization/ptq/slim/inception_v3.yaml deleted file mode 100644 index 219a99a7bbf..00000000000 --- a/examples/tensorflow/image_recognition/tensorflow_models/quantization/ptq/slim/inception_v3.yaml +++ /dev/null @@ -1,76 +0,0 @@ -# -# Copyright (c) 2021 Intel Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -model: # mandatory. used to specify model specific information. - name: inception_v3 - framework: tensorflow # mandatory. supported values are tensorflow, pytorch, pytorch_ipex, onnxrt_integer, onnxrt_qlinear or mxnet; allow new framework backend extension. - -device: cpu # optional. default value is cpu, other value is gpu. - -quantization: # optional. tuning constraints on model-wise for advance user to reduce tuning space. - calibration: - sampling_size: 50, 100 # optional. default value is 100. used to set how many samples should be used in calibration. - dataloader: - batch_size: 10 - dataset: - ImageRecord: - root: /path/to/calibration/dataset # NOTE: modify to calibration dataset location if needed - transform: - BilinearImagenet: - height: 299 - width: 299 - model_wise: # optional. tuning constraints on model-wise for advance user to reduce tuning space. - activation: - algorithm: minmax - op_wise: { - 'v0/cg/conv0/conv2d/Conv2D': { - 'activation': {'dtype': ['fp32']}, - } - } - -evaluation: # optional. required if user doesn't provide eval_func in neural_compressor.Quantization. - accuracy: # optional. required if user doesn't provide eval_func in neural_compressor.Quantization. - metric: - topk: 1 # built-in metrics are topk, map, f1, allow user to register new metric. - dataloader: - batch_size: 32 - dataset: - ImageRecord: - root: /path/to/evaluation/dataset # NOTE: modify to evaluation dataset location if needed - transform: - BilinearImagenet: - height: 299 - width: 299 - performance: # optional. used to benchmark performance of passing model. - iteration: 100 - configs: - cores_per_instance: 4 - num_of_instance: 7 - dataloader: - batch_size: 1 - dataset: - ImageRecord: - root: /path/to/evaluation/dataset # NOTE: modify to evaluation dataset location if needed - transform: - BilinearImagenet: - height: 299 - width: 299 - -tuning: - accuracy_criterion: - relative: 0.01 # optional. default value is relative, other value is absolute. this example allows relative accuracy loss: 1%. - exit_policy: - timeout: 0 # optional. tuning timeout (seconds). default value is 0 which means early stop. combine with max_trials field to decide when to exit. - random_seed: 9527 # optional. random seed for deterministic tuning. diff --git a/examples/tensorflow/image_recognition/tensorflow_models/quantization/ptq/slim/inception_v4.py b/examples/tensorflow/image_recognition/tensorflow_models/quantization/ptq/slim/inception_v4.py deleted file mode 100644 index 5d43124ff9d..00000000000 --- a/examples/tensorflow/image_recognition/tensorflow_models/quantization/ptq/slim/inception_v4.py +++ /dev/null @@ -1,344 +0,0 @@ -# Copyright 2016 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Contains the definition of the Inception V4 architecture. - -As described in http://arxiv.org/abs/1602.07261. - - Inception-v4, Inception-ResNet and the Impact of Residual Connections - on Learning - Christian Szegedy, Sergey Ioffe, Vincent Vanhoucke, Alex Alemi -""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import tensorflow.compat.v1 as tf -import tf_slim as slim -import inception_utils - - -def block_inception_a(inputs, scope=None, reuse=None): - """Builds Inception-A block for Inception v4 network.""" - # By default use stride=1 and SAME padding - with slim.arg_scope([slim.conv2d, slim.avg_pool2d, slim.max_pool2d], - stride=1, padding='SAME'): - with tf.variable_scope( - scope, 'BlockInceptionA', [inputs], reuse=reuse): - with tf.variable_scope('Branch_0'): - branch_0 = slim.conv2d(inputs, 96, [1, 1], scope='Conv2d_0a_1x1') - with tf.variable_scope('Branch_1'): - branch_1 = slim.conv2d(inputs, 64, [1, 1], scope='Conv2d_0a_1x1') - branch_1 = slim.conv2d(branch_1, 96, [3, 3], scope='Conv2d_0b_3x3') - with tf.variable_scope('Branch_2'): - branch_2 = slim.conv2d(inputs, 64, [1, 1], scope='Conv2d_0a_1x1') - branch_2 = slim.conv2d(branch_2, 96, [3, 3], scope='Conv2d_0b_3x3') - branch_2 = slim.conv2d(branch_2, 96, [3, 3], scope='Conv2d_0c_3x3') - with tf.variable_scope('Branch_3'): - branch_3 = slim.avg_pool2d(inputs, [3, 3], scope='AvgPool_0a_3x3') - branch_3 = slim.conv2d(branch_3, 96, [1, 1], scope='Conv2d_0b_1x1') - return tf.concat(axis=3, values=[branch_0, branch_1, branch_2, branch_3]) - - -def block_reduction_a(inputs, scope=None, reuse=None): - """Builds Reduction-A block for Inception v4 network.""" - # By default use stride=1 and SAME padding - with slim.arg_scope([slim.conv2d, slim.avg_pool2d, slim.max_pool2d], - stride=1, padding='SAME'): - with tf.variable_scope( - scope, 'BlockReductionA', [inputs], reuse=reuse): - with tf.variable_scope('Branch_0'): - branch_0 = slim.conv2d(inputs, 384, [3, 3], stride=2, padding='VALID', - scope='Conv2d_1a_3x3') - with tf.variable_scope('Branch_1'): - branch_1 = slim.conv2d(inputs, 192, [1, 1], scope='Conv2d_0a_1x1') - branch_1 = slim.conv2d(branch_1, 224, [3, 3], scope='Conv2d_0b_3x3') - branch_1 = slim.conv2d(branch_1, 256, [3, 3], stride=2, - padding='VALID', scope='Conv2d_1a_3x3') - with tf.variable_scope('Branch_2'): - branch_2 = slim.max_pool2d(inputs, [3, 3], stride=2, padding='VALID', - scope='MaxPool_1a_3x3') - return tf.concat(axis=3, values=[branch_0, branch_1, branch_2]) - - -def block_inception_b(inputs, scope=None, reuse=None): - """Builds Inception-B block for Inception v4 network.""" - # By default use stride=1 and SAME padding - with slim.arg_scope([slim.conv2d, slim.avg_pool2d, slim.max_pool2d], - stride=1, padding='SAME'): - with tf.variable_scope( - scope, 'BlockInceptionB', [inputs], reuse=reuse): - with tf.variable_scope('Branch_0'): - branch_0 = slim.conv2d(inputs, 384, [1, 1], scope='Conv2d_0a_1x1') - with tf.variable_scope('Branch_1'): - branch_1 = slim.conv2d(inputs, 192, [1, 1], scope='Conv2d_0a_1x1') - branch_1 = slim.conv2d(branch_1, 224, [1, 7], scope='Conv2d_0b_1x7') - branch_1 = slim.conv2d(branch_1, 256, [7, 1], scope='Conv2d_0c_7x1') - with tf.variable_scope('Branch_2'): - branch_2 = slim.conv2d(inputs, 192, [1, 1], scope='Conv2d_0a_1x1') - branch_2 = slim.conv2d(branch_2, 192, [7, 1], scope='Conv2d_0b_7x1') - branch_2 = slim.conv2d(branch_2, 224, [1, 7], scope='Conv2d_0c_1x7') - branch_2 = slim.conv2d(branch_2, 224, [7, 1], scope='Conv2d_0d_7x1') - branch_2 = slim.conv2d(branch_2, 256, [1, 7], scope='Conv2d_0e_1x7') - with tf.variable_scope('Branch_3'): - branch_3 = slim.avg_pool2d(inputs, [3, 3], scope='AvgPool_0a_3x3') - branch_3 = slim.conv2d(branch_3, 128, [1, 1], scope='Conv2d_0b_1x1') - return tf.concat(axis=3, values=[branch_0, branch_1, branch_2, branch_3]) - - -def block_reduction_b(inputs, scope=None, reuse=None): - """Builds Reduction-B block for Inception v4 network.""" - # By default use stride=1 and SAME padding - with slim.arg_scope([slim.conv2d, slim.avg_pool2d, slim.max_pool2d], - stride=1, padding='SAME'): - with tf.variable_scope( - scope, 'BlockReductionB', [inputs], reuse=reuse): - with tf.variable_scope('Branch_0'): - branch_0 = slim.conv2d(inputs, 192, [1, 1], scope='Conv2d_0a_1x1') - branch_0 = slim.conv2d(branch_0, 192, [3, 3], stride=2, - padding='VALID', scope='Conv2d_1a_3x3') - with tf.variable_scope('Branch_1'): - branch_1 = slim.conv2d(inputs, 256, [1, 1], scope='Conv2d_0a_1x1') - branch_1 = slim.conv2d(branch_1, 256, [1, 7], scope='Conv2d_0b_1x7') - branch_1 = slim.conv2d(branch_1, 320, [7, 1], scope='Conv2d_0c_7x1') - branch_1 = slim.conv2d(branch_1, 320, [3, 3], stride=2, - padding='VALID', scope='Conv2d_1a_3x3') - with tf.variable_scope('Branch_2'): - branch_2 = slim.max_pool2d(inputs, [3, 3], stride=2, padding='VALID', - scope='MaxPool_1a_3x3') - return tf.concat(axis=3, values=[branch_0, branch_1, branch_2]) - - -def block_inception_c(inputs, scope=None, reuse=None): - """Builds Inception-C block for Inception v4 network.""" - # By default use stride=1 and SAME padding - with slim.arg_scope([slim.conv2d, slim.avg_pool2d, slim.max_pool2d], - stride=1, padding='SAME'): - with tf.variable_scope( - scope, 'BlockInceptionC', [inputs], reuse=reuse): - with tf.variable_scope('Branch_0'): - branch_0 = slim.conv2d(inputs, 256, [1, 1], scope='Conv2d_0a_1x1') - with tf.variable_scope('Branch_1'): - branch_1 = slim.conv2d(inputs, 384, [1, 1], scope='Conv2d_0a_1x1') - branch_1 = tf.concat(axis=3, values=[ - slim.conv2d(branch_1, 256, [1, 3], scope='Conv2d_0b_1x3'), - slim.conv2d(branch_1, 256, [3, 1], scope='Conv2d_0c_3x1')]) - with tf.variable_scope('Branch_2'): - branch_2 = slim.conv2d(inputs, 384, [1, 1], scope='Conv2d_0a_1x1') - branch_2 = slim.conv2d(branch_2, 448, [3, 1], scope='Conv2d_0b_3x1') - branch_2 = slim.conv2d(branch_2, 512, [1, 3], scope='Conv2d_0c_1x3') - branch_2 = tf.concat(axis=3, values=[ - slim.conv2d(branch_2, 256, [1, 3], scope='Conv2d_0d_1x3'), - slim.conv2d(branch_2, 256, [3, 1], scope='Conv2d_0e_3x1')]) - with tf.variable_scope('Branch_3'): - branch_3 = slim.avg_pool2d(inputs, [3, 3], scope='AvgPool_0a_3x3') - branch_3 = slim.conv2d(branch_3, 256, [1, 1], scope='Conv2d_0b_1x1') - return tf.concat(axis=3, values=[branch_0, branch_1, branch_2, branch_3]) - - -def inception_v4_base(inputs, final_endpoint='Mixed_7d', scope=None): - """Creates the Inception V4 network up to the given final endpoint. - - Args: - inputs: a 4-D tensor of size [batch_size, height, width, 3]. - final_endpoint: specifies the endpoint to construct the network up to. - It can be one of [ 'Conv2d_1a_3x3', 'Conv2d_2a_3x3', 'Conv2d_2b_3x3', - 'Mixed_3a', 'Mixed_4a', 'Mixed_5a', 'Mixed_5b', 'Mixed_5c', 'Mixed_5d', - 'Mixed_5e', 'Mixed_6a', 'Mixed_6b', 'Mixed_6c', 'Mixed_6d', 'Mixed_6e', - 'Mixed_6f', 'Mixed_6g', 'Mixed_6h', 'Mixed_7a', 'Mixed_7b', 'Mixed_7c', - 'Mixed_7d'] - scope: Optional variable_scope. - - Returns: - logits: the logits outputs of the model. - end_points: the set of end_points from the inception model. - - Raises: - ValueError: if final_endpoint is not set to one of the predefined values, - """ - end_points = {} - - def add_and_check_final(name, net): - end_points[name] = net - return name == final_endpoint - - with tf.variable_scope(scope, 'InceptionV4', [inputs]): - with slim.arg_scope([slim.conv2d, slim.max_pool2d, slim.avg_pool2d], - stride=1, padding='SAME'): - # 299 x 299 x 3 - net = slim.conv2d(inputs, 32, [3, 3], stride=2, - padding='VALID', scope='Conv2d_1a_3x3') - if add_and_check_final('Conv2d_1a_3x3', net): return net, end_points - # 149 x 149 x 32 - net = slim.conv2d(net, 32, [3, 3], padding='VALID', - scope='Conv2d_2a_3x3') - if add_and_check_final('Conv2d_2a_3x3', net): return net, end_points - # 147 x 147 x 32 - net = slim.conv2d(net, 64, [3, 3], scope='Conv2d_2b_3x3') - if add_and_check_final('Conv2d_2b_3x3', net): return net, end_points - # 147 x 147 x 64 - with tf.variable_scope('Mixed_3a'): - with tf.variable_scope('Branch_0'): - branch_0 = slim.max_pool2d(net, [3, 3], stride=2, padding='VALID', - scope='MaxPool_0a_3x3') - with tf.variable_scope('Branch_1'): - branch_1 = slim.conv2d(net, 96, [3, 3], stride=2, padding='VALID', - scope='Conv2d_0a_3x3') - net = tf.concat(axis=3, values=[branch_0, branch_1]) - if add_and_check_final('Mixed_3a', net): return net, end_points - - # 73 x 73 x 160 - with tf.variable_scope('Mixed_4a'): - with tf.variable_scope('Branch_0'): - branch_0 = slim.conv2d(net, 64, [1, 1], scope='Conv2d_0a_1x1') - branch_0 = slim.conv2d(branch_0, 96, [3, 3], padding='VALID', - scope='Conv2d_1a_3x3') - with tf.variable_scope('Branch_1'): - branch_1 = slim.conv2d(net, 64, [1, 1], scope='Conv2d_0a_1x1') - branch_1 = slim.conv2d(branch_1, 64, [1, 7], scope='Conv2d_0b_1x7') - branch_1 = slim.conv2d(branch_1, 64, [7, 1], scope='Conv2d_0c_7x1') - branch_1 = slim.conv2d(branch_1, 96, [3, 3], padding='VALID', - scope='Conv2d_1a_3x3') - net = tf.concat(axis=3, values=[branch_0, branch_1]) - if add_and_check_final('Mixed_4a', net): return net, end_points - - # 71 x 71 x 192 - with tf.variable_scope('Mixed_5a'): - with tf.variable_scope('Branch_0'): - branch_0 = slim.conv2d(net, 192, [3, 3], stride=2, padding='VALID', - scope='Conv2d_1a_3x3') - with tf.variable_scope('Branch_1'): - branch_1 = slim.max_pool2d(net, [3, 3], stride=2, padding='VALID', - scope='MaxPool_1a_3x3') - net = tf.concat(axis=3, values=[branch_0, branch_1]) - if add_and_check_final('Mixed_5a', net): return net, end_points - - # 35 x 35 x 384 - # 4 x Inception-A blocks - for idx in range(4): - block_scope = 'Mixed_5' + chr(ord('b') + idx) - net = block_inception_a(net, block_scope) - if add_and_check_final(block_scope, net): return net, end_points - - # 35 x 35 x 384 - # Reduction-A block - net = block_reduction_a(net, 'Mixed_6a') - if add_and_check_final('Mixed_6a', net): return net, end_points - - # 17 x 17 x 1024 - # 7 x Inception-B blocks - for idx in range(7): - block_scope = 'Mixed_6' + chr(ord('b') + idx) - net = block_inception_b(net, block_scope) - if add_and_check_final(block_scope, net): return net, end_points - - # 17 x 17 x 1024 - # Reduction-B block - net = block_reduction_b(net, 'Mixed_7a') - if add_and_check_final('Mixed_7a', net): return net, end_points - - # 8 x 8 x 1536 - # 3 x Inception-C blocks - for idx in range(3): - block_scope = 'Mixed_7' + chr(ord('b') + idx) - net = block_inception_c(net, block_scope) - if add_and_check_final(block_scope, net): return net, end_points - raise ValueError('Unknown final endpoint %s' % final_endpoint) - - -def inception_v4(inputs, num_classes=1001, is_training=True, - dropout_keep_prob=0.8, - reuse=None, - scope='InceptionV4', - create_aux_logits=True): - """Creates the Inception V4 model. - - Args: - inputs: a 4-D tensor of size [batch_size, height, width, 3]. - num_classes: number of predicted classes. If 0 or None, the logits layer - is omitted and the input features to the logits layer (before dropout) - are returned instead. - is_training: whether is training or not. - dropout_keep_prob: float, the fraction to keep before final layer. - reuse: whether or not the network and its variables should be reused. To be - able to reuse 'scope' must be given. - scope: Optional variable_scope. - create_aux_logits: Whether to include the auxiliary logits. - - Returns: - net: a Tensor with the logits (pre-softmax activations) if num_classes - is a non-zero integer, or the non-dropped input to the logits layer - if num_classes is 0 or None. - end_points: the set of end_points from the inception model. - """ - end_points = {} - with tf.variable_scope( - scope, 'InceptionV4', [inputs], reuse=reuse) as scope: - with slim.arg_scope([slim.batch_norm, slim.dropout], - is_training=is_training): - net, end_points = inception_v4_base(inputs, scope=scope) - - with slim.arg_scope([slim.conv2d, slim.max_pool2d, slim.avg_pool2d], - stride=1, padding='SAME'): - # Auxiliary Head logits - if create_aux_logits and num_classes: - with tf.variable_scope('AuxLogits'): - # 17 x 17 x 1024 - aux_logits = end_points['Mixed_6h'] - aux_logits = slim.avg_pool2d(aux_logits, [5, 5], stride=3, - padding='VALID', - scope='AvgPool_1a_5x5') - aux_logits = slim.conv2d(aux_logits, 128, [1, 1], - scope='Conv2d_1b_1x1') - aux_logits = slim.conv2d(aux_logits, 768, - aux_logits.get_shape()[1:3], - padding='VALID', scope='Conv2d_2a') - aux_logits = slim.flatten(aux_logits) - aux_logits = slim.fully_connected(aux_logits, num_classes, - activation_fn=None, - scope='Aux_logits') - end_points['AuxLogits'] = aux_logits - - # Final pooling and prediction - # TODO(sguada,arnoegw): Consider adding a parameter global_pool which - # can be set to False to disable pooling here (as in resnet_*()). - with tf.variable_scope('Logits'): - # 8 x 8 x 1536 - kernel_size = net.get_shape()[1:3] - if kernel_size.is_fully_defined(): - net = slim.avg_pool2d(net, kernel_size, padding='VALID', - scope='AvgPool_1a') - else: - net = tf.reduce_mean( - input_tensor=net, - axis=[1, 2], - keepdims=True, - name='global_pool') - end_points['global_pool'] = net - if not num_classes: - return net, end_points - # 1 x 1 x 1536 - net = slim.dropout(net, dropout_keep_prob, scope='Dropout_1b') - net = slim.flatten(net, scope='PreLogitsFlatten') - end_points['PreLogitsFlatten'] = net - # 1536 - logits = slim.fully_connected(net, num_classes, activation_fn=None, - scope='Logits') - end_points['Logits'] = logits - end_points['Predictions'] = tf.nn.softmax(logits, name='Predictions') - return logits, end_points -inception_v4.default_image_size = 299 - - -inception_v4_arg_scope = inception_utils.inception_arg_scope diff --git a/examples/tensorflow/image_recognition/tensorflow_models/quantization/ptq/slim/main.py b/examples/tensorflow/image_recognition/tensorflow_models/quantization/ptq/slim/main.py deleted file mode 100644 index aaf3e99c52c..00000000000 --- a/examples/tensorflow/image_recognition/tensorflow_models/quantization/ptq/slim/main.py +++ /dev/null @@ -1,81 +0,0 @@ -# -# -*- coding: utf-8 -*- -# -# Copyright (c) 2020 Intel Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -# - -import numpy as np -from argparse import ArgumentParser -import tensorflow as tf -from neural_compressor.model.nets_factory import TFSlimNetsFactory -import copy - -tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR) -tf.compat.v1.disable_eager_execution() -from inception_v4 import inception_v4, inception_v4_arg_scope - -def save(model, path): - from tensorflow.python.platform import gfile - f = gfile.GFile(path, 'wb') - try: - f.write(model.as_graph_def().SerializeToString()) - except AttributeError as no_model: - print("None of the quantized models fits the \ - accuracy criteria: {0}".format(no_model)) - except Exception as exc: - print("Unexpected error while saving the model: {0}".format(exc)) - -def main(_): - arg_parser = ArgumentParser(description='Parse args') - - arg_parser.add_argument("--input-graph", - help='Specify the slim model', - dest='input_graph') - - arg_parser.add_argument("--output-graph", - help='Specify tune result model save dir', - dest='output_graph') - - arg_parser.add_argument("--config", default=None, help="tuning config") - - arg_parser.add_argument('--benchmark', dest='benchmark', action='store_true', help='run benchmark') - - arg_parser.add_argument('--mode', dest='mode', default='performance', help='benchmark mode') - - arg_parser.add_argument('--tune', dest='tune', action='store_true', help='use neural_compressor to tune.') - - args = arg_parser.parse_args() - - factory = TFSlimNetsFactory() - # user specific model can register to slim net factory - input_shape = [None, 299, 299, 3] - factory.register('inception_v4', inception_v4, input_shape, inception_v4_arg_scope) - - if args.tune: - from neural_compressor.experimental import Quantization - quantizer = Quantization(args.config) - quantizer.model = args.input_graph - q_model = quantizer.fit() - q_model.save(args.output_graph) - - if args.benchmark: - from neural_compressor.experimental import Benchmark - evaluator = Benchmark(args.config) - evaluator.model = args.input_graph - evaluator(args.mode) - -if __name__ == '__main__': - tf.compat.v1.app.run() diff --git a/examples/tensorflow/image_recognition/tensorflow_models/quantization/ptq/slim/requirements.txt b/examples/tensorflow/image_recognition/tensorflow_models/quantization/ptq/slim/requirements.txt deleted file mode 100644 index 6c690dfa4d7..00000000000 --- a/examples/tensorflow/image_recognition/tensorflow_models/quantization/ptq/slim/requirements.txt +++ /dev/null @@ -1,3 +0,0 @@ -intel-tensorflow==1.15.2 -neural-compressor -tf_slim diff --git a/examples/tensorflow/image_recognition/tensorflow_models/quantization/ptq/slim/resnet_v1_152.yaml b/examples/tensorflow/image_recognition/tensorflow_models/quantization/ptq/slim/resnet_v1_152.yaml deleted file mode 100644 index b96faa54cd3..00000000000 --- a/examples/tensorflow/image_recognition/tensorflow_models/quantization/ptq/slim/resnet_v1_152.yaml +++ /dev/null @@ -1,76 +0,0 @@ -# -# Copyright (c) 2021 Intel Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -model: # mandatory. used to specify model specific information. - name: resnet_v1_152 - framework: tensorflow # mandatory. supported values are tensorflow, pytorch, pytorch_ipex, onnxrt_integer, onnxrt_qlinear or mxnet; allow new framework backend extension. - -device: cpu # optional. default value is cpu, other value is gpu. - -quantization: # optional. tuning constraints on model-wise for advance user to reduce tuning space. - calibration: - sampling_size: 50, 100 # optional. default value is 100. used to set how many samples should be used in calibration. - dataloader: - batch_size: 10 - dataset: - ImageRecord: - root: /path/to/calibration/dataset # NOTE: modify to calibration dataset location if needed - transform: - ResizeCropImagenet: - height: 224 - width: 224 - mean_value: [123.68, 116.78, 103.94] - model_wise: # optional. tuning constraints on model-wise for advance user to reduce tuning space. - activation: - algorithm: minmax - -evaluation: # optional. required if user doesn't provide eval_func in neural_compressor.Quantization. - accuracy: # optional. required if user doesn't provide eval_func in neural_compressor.Quantization. - metric: - topk: 1 # built-in metrics are topk, map, f1, allow user to register new metric. - dataloader: - batch_size: 32 - dataset: - ImageRecord: - root: /path/to/evaluation/dataset # NOTE: modify to evaluation dataset location if needed - transform: - ResizeCropImagenet: - height: 224 - width: 224 - mean_value: [123.68, 116.78, 103.94] - postprocess: - transform: - LabelShift: 1 - performance: # optional. used to benchmark performance of passing model. - configs: - cores_per_instance: 4 - num_of_instance: 7 - dataloader: - batch_size: 1 - dataset: - ImageRecord: - root: /path/to/evaluation/dataset # NOTE: modify to evaluation dataset location if needed - transform: - ResizeCropImagenet: - height: 224 - width: 224 - mean_value: [123.68, 116.78, 103.94] - -tuning: - accuracy_criterion: - relative: 0.01 # optional. default value is relative, other value is absolute. this example allows relative accuracy loss: 1%. - exit_policy: - timeout: 0 # optional. tuning timeout (seconds). default value is 0 which means early stop. combine with max_trials field to decide when to exit. - random_seed: 9527 # optional. random seed for deterministic tuning. diff --git a/examples/tensorflow/image_recognition/tensorflow_models/quantization/ptq/slim/resnet_v1_50.yaml b/examples/tensorflow/image_recognition/tensorflow_models/quantization/ptq/slim/resnet_v1_50.yaml deleted file mode 100644 index d92354e4b49..00000000000 --- a/examples/tensorflow/image_recognition/tensorflow_models/quantization/ptq/slim/resnet_v1_50.yaml +++ /dev/null @@ -1,76 +0,0 @@ -# -# Copyright (c) 2021 Intel Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -model: # mandatory. used to specify model specific information. - name: resnet_v1_50 - framework: tensorflow # mandatory. supported values are tensorflow, pytorch, pytorch_ipex, onnxrt_integer, onnxrt_qlinear or mxnet; allow new framework backend extension. - -device: cpu # optional. default value is cpu, other value is gpu. - -quantization: # optional. tuning constraints on model-wise for advance user to reduce tuning space. - calibration: - sampling_size: 50, 100 # optional. default value is 100. used to set how many samples should be used in calibration. - dataloader: - batch_size: 10 - dataset: - ImageRecord: - root: /path/to/calibration/dataset # NOTE: modify to calibration dataset location if needed - transform: - ResizeCropImagenet: - height: 224 - width: 224 - mean_value: [123.68, 116.78, 103.94] - model_wise: # optional. tuning constraints on model-wise for advance user to reduce tuning space. - activation: - algorithm: minmax - -evaluation: # optional. required if user doesn't provide eval_func in neural_compressor.Quantization. - accuracy: # optional. required if user doesn't provide eval_func in neural_compressor.Quantization. - metric: - topk: 1 # built-in metrics are topk, map, f1, allow user to register new metric. - dataloader: - batch_size: 32 - dataset: - ImageRecord: - root: /path/to/evaluation/dataset # NOTE: modify to evaluation dataset location if needed - transform: - ResizeCropImagenet: - height: 224 - width: 224 - mean_value: [123.68, 116.78, 103.94] - postprocess: - transform: - LabelShift: 1 - performance: # optional. used to benchmark performance of passing model. - configs: - cores_per_instance: 4 - num_of_instance: 7 - dataloader: - batch_size: 1 - dataset: - ImageRecord: - root: /path/to/evaluation/dataset # NOTE: modify to evaluation dataset location if needed - transform: - ResizeCropImagenet: - height: 224 - width: 224 - mean_value: [123.68, 116.78, 103.94] - -tuning: - accuracy_criterion: - relative: 0.01 # optional. default value is relative, other value is absolute. this example allows relative accuracy loss: 1%. - exit_policy: - timeout: 0 # optional. tuning timeout (seconds). default value is 0 which means early stop. combine with max_trials field to decide when to exit. - random_seed: 9527 # optional. random seed for deterministic tuning. diff --git a/examples/tensorflow/image_recognition/tensorflow_models/quantization/ptq/slim/run_benchmark.sh b/examples/tensorflow/image_recognition/tensorflow_models/quantization/ptq/slim/run_benchmark.sh deleted file mode 100644 index 2304baaea20..00000000000 --- a/examples/tensorflow/image_recognition/tensorflow_models/quantization/ptq/slim/run_benchmark.sh +++ /dev/null @@ -1,40 +0,0 @@ -#!/bin/bash -set -x - -function main { - - init_params "$@" - run_benchmark - -} - -# init params -function init_params { - for var in "$@" - do - case $var in - --config=*) - config=$(echo $var |cut -f2 -d=) - ;; - --input_model=*) - input_model=$(echo $var |cut -f2 -d=) - ;; - --mode=*) - mode=$(echo $var |cut -f2 -d=) - ;; - esac - done - -} - -# run_tuning -function run_benchmark { - - python main.py \ - --input-graph ${input_model} \ - --config ${config} \ - --mode ${mode} \ - --benchmark -} - -main "$@" diff --git a/examples/tensorflow/image_recognition/tensorflow_models/quantization/ptq/slim/run_tuning.sh b/examples/tensorflow/image_recognition/tensorflow_models/quantization/ptq/slim/run_tuning.sh deleted file mode 100644 index 5ecbd6b31ea..00000000000 --- a/examples/tensorflow/image_recognition/tensorflow_models/quantization/ptq/slim/run_tuning.sh +++ /dev/null @@ -1,39 +0,0 @@ -#!/bin/bash -set -x - -function main { - init_params "$@" - run_tuning - -} - -# init params -function init_params { - - for var in "$@" - do - case $var in - --config=*) - config=$(echo $var |cut -f2 -d=) - ;; - --input_model=*) - input_model=$(echo $var |cut -f2 -d=) - ;; - --output_model=*) - output_model=$(echo $var |cut -f2 -d=) - ;; - esac - done - -} - -# run_tuning -function run_tuning { - python main.py \ - --input-graph ${input_model} \ - --output-graph ${output_model} \ - --config ${config} \ - --tune -} - -main "$@" diff --git a/examples/tensorflow/image_recognition/tensorflow_models/quantization/ptq/vgg16.yaml b/examples/tensorflow/image_recognition/tensorflow_models/quantization/ptq/vgg16.yaml deleted file mode 100644 index 928903f8b20..00000000000 --- a/examples/tensorflow/image_recognition/tensorflow_models/quantization/ptq/vgg16.yaml +++ /dev/null @@ -1,76 +0,0 @@ -# -# Copyright (c) 2021 Intel Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -model: # mandatory. used to specify model specific information. - name: vgg_16 - framework: tensorflow # mandatory. supported values are tensorflow, pytorch, pytorch_ipex, onnxrt_integer, onnxrt_qlinear or mxnet; allow new framework backend extension. - -device: cpu # optional. default value is cpu, other value is gpu. - -quantization: # optional. tuning constraints on model-wise for advance user to reduce tuning space. - calibration: - sampling_size: 50, 100 # optional. default value is 100. used to set how many samples should be used in calibration. - dataloader: - batch_size: 10 - dataset: - ImageRecord: - root: /path/to/calibration/dataset # NOTE: modify to calibration dataset location if needed - transform: - ResizeCropImagenet: - height: 224 - width: 224 - mean_value: [123.68, 116.78, 103.94] - model_wise: # optional. tuning constraints on model-wise for advance user to reduce tuning space. - activation: - algorithm: minmax - -evaluation: # optional. required if user doesn't provide eval_func in neural_compressor.Quantization. - accuracy: # optional. required if user doesn't provide eval_func in neural_compressor.Quantization. - metric: - topk: 1 # built-in metrics are topk, map, f1, allow user to register new metric. - dataloader: - batch_size: 32 - dataset: - ImageRecord: - root: /path/to/evaluation/dataset # NOTE: modify to evaluation dataset location if needed - transform: - ResizeCropImagenet: - height: 224 - width: 224 - mean_value: [123.68, 116.78, 103.94] - postprocess: - transform: - LabelShift: 1 - performance: # optional. used to benchmark performance of passing model. - configs: - cores_per_instance: 4 - num_of_instance: 7 - dataloader: - batch_size: 1 - dataset: - ImageRecord: - root: /path/to/evaluation/dataset # NOTE: modify to evaluation dataset location if needed - transform: - ResizeCropImagenet: - height: 224 - width: 224 - mean_value: [123.68, 116.78, 103.94] - -tuning: - accuracy_criterion: - relative: 0.01 # optional. default value is relative, other value is absolute. this example allows relative accuracy loss: 1%. - exit_policy: - timeout: 0 # optional. tuning timeout (seconds). default value is 0 which means early stop. combine with max_trials field to decide when to exit. - random_seed: 9527 # optional. random seed for deterministic tuning. diff --git a/examples/tensorflow/image_recognition/tensorflow_models/quantization/ptq/vgg16_itex.yaml b/examples/tensorflow/image_recognition/tensorflow_models/quantization/ptq/vgg16_itex.yaml deleted file mode 100644 index 716cb757642..00000000000 --- a/examples/tensorflow/image_recognition/tensorflow_models/quantization/ptq/vgg16_itex.yaml +++ /dev/null @@ -1,76 +0,0 @@ -# -# Copyright (c) 2021 Intel Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -model: # mandatory. used to specify model specific information. - name: vgg_16 - framework: tensorflow_itex # mandatory. supported values are tensorflow, tensorflow_itex, pytorch, pytorch_ipex, onnxrt_integer, onnxrt_qlinear or mxnet; allow new framework backend extension. - -device: gpu # optional. set cpu if installed intel-extension-for-tensorflow[cpu], set gpu if installed intel-extension-for-tensorflow[gpu]. - -quantization: # optional. tuning constraints on model-wise for advance user to reduce tuning space. - calibration: - sampling_size: 50, 100 # optional. default value is 100. used to set how many samples should be used in calibration. - dataloader: - batch_size: 10 - dataset: - ImageRecord: - root: /path/to/calibration/dataset # NOTE: modify to calibration dataset location if needed - transform: - ResizeCropImagenet: - height: 224 - width: 224 - mean_value: [123.68, 116.78, 103.94] - model_wise: # optional. tuning constraints on model-wise for advance user to reduce tuning space. - activation: - algorithm: minmax - -evaluation: # optional. required if user doesn't provide eval_func in neural_compressor.Quantization. - accuracy: # optional. required if user doesn't provide eval_func in neural_compressor.Quantization. - metric: - topk: 1 # built-in metrics are topk, map, f1, allow user to register new metric. - dataloader: - batch_size: 32 - dataset: - ImageRecord: - root: /path/to/evaluation/dataset # NOTE: modify to evaluation dataset location if needed - transform: - ResizeCropImagenet: - height: 224 - width: 224 - mean_value: [123.68, 116.78, 103.94] - postprocess: - transform: - LabelShift: 1 - performance: # optional. used to benchmark performance of passing model. - configs: - cores_per_instance: 4 - num_of_instance: 7 - dataloader: - batch_size: 1 - dataset: - ImageRecord: - root: /path/to/evaluation/dataset # NOTE: modify to evaluation dataset location if needed - transform: - ResizeCropImagenet: - height: 224 - width: 224 - mean_value: [123.68, 116.78, 103.94] - -tuning: - accuracy_criterion: - relative: 0.01 # optional. default value is relative, other value is absolute. this example allows relative accuracy loss: 1%. - exit_policy: - timeout: 0 # optional. tuning timeout (seconds). default value is 0 which means early stop. combine with max_trials field to decide when to exit. - random_seed: 9527 # optional. random seed for deterministic tuning. diff --git a/examples/tensorflow/image_recognition/tensorflow_models/quantization/ptq/vgg19.yaml b/examples/tensorflow/image_recognition/tensorflow_models/quantization/ptq/vgg19.yaml deleted file mode 100644 index 62453705a94..00000000000 --- a/examples/tensorflow/image_recognition/tensorflow_models/quantization/ptq/vgg19.yaml +++ /dev/null @@ -1,78 +0,0 @@ -# -# Copyright (c) 2021 Intel Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -version: 1.0 - -model: # mandatory. used to specify model specific information. - name: vgg_19 - framework: tensorflow # mandatory. supported values are tensorflow, pytorch, pytorch_ipex, onnxrt_integer, onnxrt_qlinear or mxnet; allow new framework backend extension. - -device: cpu # optional. default value is cpu, other value is gpu. - -quantization: # optional. tuning constraints on model-wise for advance user to reduce tuning space. - calibration: - sampling_size: 50, 100 # optional. default value is 100. used to set how many samples should be used in calibration. - dataloader: - batch_size: 10 - dataset: - ImageRecord: - root: /path/to/calibration/dataset # NOTE: modify to calibration dataset location if needed - transform: - ResizeCropImagenet: - height: 224 - width: 224 - mean_value: [123.68, 116.78, 103.94] - model_wise: # optional. tuning constraints on model-wise for advance user to reduce tuning space. - activation: - algorithm: minmax - -evaluation: # optional. required if user doesn't provide eval_func in neural_compressor.Quantization. - accuracy: # optional. required if user doesn't provide eval_func in neural_compressor.Quantization. - metric: - topk: 1 # built-in metrics are topk, map, f1, allow user to register new metric. - dataloader: - batch_size: 32 - dataset: - ImageRecord: - root: /path/to/evaluation/dataset # NOTE: modify to evaluation dataset location if needed - transform: - ResizeCropImagenet: - height: 224 - width: 224 - mean_value: [123.68, 116.78, 103.94] - postprocess: - transform: - LabelShift: 1 - performance: # optional. used to benchmark performance of passing model. - configs: - cores_per_instance: 4 - num_of_instance: 7 - dataloader: - batch_size: 1 - dataset: - ImageRecord: - root: /path/to/evaluation/dataset # NOTE: modify to evaluation dataset location if needed - transform: - ResizeCropImagenet: - height: 224 - width: 224 - mean_value: [123.68, 116.78, 103.94] - -tuning: - accuracy_criterion: - relative: 0.01 # optional. default value is relative, other value is absolute. this example allows relative accuracy loss: 1%. - exit_policy: - timeout: 0 # optional. tuning timeout (seconds). default value is 0 which means early stop. combine with max_trials field to decide when to exit. - random_seed: 9527 # optional. random seed for deterministic tuning. diff --git a/examples/tensorflow/image_recognition/tensorflow_models/quantization/ptq/vgg19_itex.yaml b/examples/tensorflow/image_recognition/tensorflow_models/quantization/ptq/vgg19_itex.yaml deleted file mode 100644 index 9b7b37ebbf8..00000000000 --- a/examples/tensorflow/image_recognition/tensorflow_models/quantization/ptq/vgg19_itex.yaml +++ /dev/null @@ -1,76 +0,0 @@ -# -# Copyright (c) 2021 Intel Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -model: # mandatory. used to specify model specific information. - name: vgg_19 - framework: tensorflow_itex # mandatory. supported values are tensorflow, tensorflow_itex, pytorch, pytorch_ipex, onnxrt_integer, onnxrt_qlinear or mxnet; allow new framework backend extension. - -device: gpu # optional. set cpu if installed intel-extension-for-tensorflow[cpu], set gpu if installed intel-extension-for-tensorflow[gpu]. - -quantization: # optional. tuning constraints on model-wise for advance user to reduce tuning space. - calibration: - sampling_size: 50, 100 # optional. default value is 100. used to set how many samples should be used in calibration. - dataloader: - batch_size: 10 - dataset: - ImageRecord: - root: /path/to/calibration/dataset # NOTE: modify to calibration dataset location if needed - transform: - ResizeCropImagenet: - height: 224 - width: 224 - mean_value: [123.68, 116.78, 103.94] - model_wise: # optional. tuning constraints on model-wise for advance user to reduce tuning space. - activation: - algorithm: minmax - -evaluation: # optional. required if user doesn't provide eval_func in neural_compressor.Quantization. - accuracy: # optional. required if user doesn't provide eval_func in neural_compressor.Quantization. - metric: - topk: 1 # built-in metrics are topk, map, f1, allow user to register new metric. - dataloader: - batch_size: 32 - dataset: - ImageRecord: - root: /path/to/evaluation/dataset # NOTE: modify to evaluation dataset location if needed - transform: - ResizeCropImagenet: - height: 224 - width: 224 - mean_value: [123.68, 116.78, 103.94] - postprocess: - transform: - LabelShift: 1 - performance: # optional. used to benchmark performance of passing model. - configs: - cores_per_instance: 4 - num_of_instance: 7 - dataloader: - batch_size: 1 - dataset: - ImageRecord: - root: /path/to/evaluation/dataset # NOTE: modify to evaluation dataset location if needed - transform: - ResizeCropImagenet: - height: 224 - width: 224 - mean_value: [123.68, 116.78, 103.94] - -tuning: - accuracy_criterion: - relative: 0.01 # optional. default value is relative, other value is absolute. this example allows relative accuracy loss: 1%. - exit_policy: - timeout: 0 # optional. tuning timeout (seconds). default value is 0 which means early stop. combine with max_trials field to decide when to exit. - random_seed: 9527 # optional. random seed for deterministic tuning. diff --git a/examples/tensorflow/nlp/bert_base_mrpc/quantization/ptq/README.md b/examples/tensorflow/nlp/bert_base_mrpc/quantization/ptq/README.md index d978b0ed335..495d87691ba 100644 --- a/examples/tensorflow/nlp/bert_base_mrpc/quantization/ptq/README.md +++ b/examples/tensorflow/nlp/bert_base_mrpc/quantization/ptq/README.md @@ -12,9 +12,9 @@ This example can run on Intel CPUs and GPUs. # Install Intel® Neural Compressor pip install neural-compressor ``` -### 2. Install Intel Tensorflow +### 2. Install Tensorflow ```shell -pip install intel-tensorflow +pip install tensorflow ``` ### 3. Install Intel Extension for Tensorflow @@ -25,7 +25,8 @@ Intel Extension for Tensorflow is mandatory to be installed for quantizing the m ```shell pip install --upgrade intel-extension-for-tensorflow[gpu] ``` -For any more details, please follow the procedure in [install-gpu-drivers](https://github.com/intel-innersource/frameworks.ai.infrastructure.intel-extension-for-tensorflow.intel-extension-for-tensorflow/blob/master/docs/install/install_for_gpu.md#install-gpu-drivers) +Please refer to the [Installation Guides](https://dgpu-docs.intel.com/installation-guides/ubuntu/ubuntu-focal-dc.html) for latest Intel GPU driver installation. +For any more details, please follow the procedure in [install-gpu-drivers](https://github.com/intel-innersource/frameworks.ai.infrastructure.intel-extension-for-tensorflow.intel-extension-for-tensorflow/blob/master/docs/install/install_for_gpu.md#install-gpu-drivers). #### Quantizing the model on Intel CPU(Experimental) Intel Extension for Tensorflow for Intel CPUs is experimental currently. It's not mandatory for quantizing the model on Intel CPUs. @@ -62,97 +63,44 @@ bash prepare_model.sh --dataset_location=./data --output_dir=./model Make sure the data and model have been generated successfully which located at ./data and ./model respectively. And your output_model will be located at ./output_model like the command below ```shell - python run_classifier.py \ - --task_name=MRPC \ - --data_dir=data/MRPC \ - --vocab_file=model/vocab.txt \ - --bert_config_file=model/bert_config.json \ - --init_checkpoint=model/model.ckpt-343 \ - --max_seq_length=128 \ - --train_batch_size=32 \ - --learning_rate=2e-5 \ - --num_train_epochs=3.0 \ - --output_dir=model \ - --output_model=output_model \ - --config=mrpc.yaml \ - --tune + bash run_tuning.sh --input_model=./model --dataset_location=./data --output_model=output_model ``` If you want the model without iterator inside the graph, you can add --strip_iterator like: ```shell - python run_classifier.py \ - --task_name=MRPC \ - --data_dir=data/MRPC \ - --vocab_file=model/vocab.txt \ - --bert_config_file=model/bert_config.json \ - --init_checkpoint=model/model.ckpt-343 \ - --max_seq_length=128 \ - --train_batch_size=32 \ - --learning_rate=2e-5 \ - --num_train_epochs=3.0 \ - --output_dir=model \ - --output_model=output_model \ - --config=mrpc.yaml \ - --tune \ - --strip_iterator + bash run_tuning.sh --input_model=./model --dataset_location=./data --output_model=output_model --strip_iterator + ``` +To run benchmark: + ```shell + bash run_benchmark.sh --input_model=./output_model.pb --init_checkpoint=./model --dataset_location=./data --batch_size=8 --mode=performance + ``` + ```shell + bash run_benchmark.sh --input_model=./output_model.pb --init_checkpoint=./model --dataset_location=./data --batch_size=8 --mode=accuracy ``` - -python run_classifier.py --task_name=MRPC --data_dir=data/MRPC --vocab_file=model/vocab.txt --bert_config_file=model/bert_config.json --init_checkpoint=model/model.ckpt-343 --max_seq_length=128 --train_batch_size=32 --learning_rate=2e-5 --num_train_epochs=3.0 --output_dir=model --output_model=output_model --config=bert.yaml --tune Details of enabling Intel® Neural Compressor on bert model for Tensorflow. ========================= This is a tutorial of how to enable bert model with Intel® Neural Compressor. ## User Code Analysis -1. User specifies fp32 *model*, calibration dataset *q_dataloader*, evaluation dataset *eval_dataloader* and metric in tuning.metric field of model-specific yaml config file. +1. User specifies fp32 *model*, calibration dataset *q_dataloader*, evaluation dataset *eval_dataloader* and metric. 2. User specifies fp32 *model*, calibration dataset *q_dataloader* and a custom *eval_func* which encapsulates the evaluation dataset and metric by itself. -For bert, we applied the first one as we already have write dataset and metric for bert mrpc task. - -### Write Yaml config file -In examples directory, there is a mrpc.yaml for tuning the model on Intel CPUs. The 'framework' in the yaml is set to 'tensorflow'. If running this example on Intel GPUs, the 'framework' should be set to 'tensorflow_itex' and the device in yaml file should be set to 'gpu'. The mrpc_itex.yaml is prepared for the GPU case. We could remove most of items and only keep mandatory item for tuning. We also implement a calibration dataloader and have evaluation field for creation of evaluation function at internal neural_compressor. - -```yaml -model: - name: bert - framework: tensorflow - inputs: input_file, batch_size - outputs: loss/Softmax:0, IteratorGetNext:3 - -device: cpu # optional. default value is cpu, other value is gpu. - -evaluation: - accuracy: {} - performance: - iteration: 20 - warmup: 5 - configs: - num_of_instance: 1 - cores_per_instance: 28 - kmp_blocktime: 1 - -quantization: - calibration: - sampling_size: 500 - model_wise: - weight: - granularity: per_channel - op_wise: { - 'loss/MatMul': { - 'activation': {'dtype': ['fp32']}, - 'weight': {'dtype': ['fp32']}, - } - } -tuning: - accuracy_criterion: - relative: 0.01 - exit_policy: - timeout: 0 - max_trials: 100 - random_seed: 9527 +For bert, we applied the first one as we already have write dataset and metric for bert mrpc task. +### Quantization Config +The Quantization Config class has default parameters setting for running on Intel CPUs. If running this example on Intel GPUs, the 'backend' parameter should be set to 'itex' and the 'device' parameter should be set to 'gpu'. + +``` +config = PostTrainingQuantConfig( + device="gpu", + backend="itex", + inputs=["input_file", "batch_size"], + outputs=["loss/Softmax:0", "IteratorGetNext:3"], + ... + ) ``` -Here we set the input tensor and output tensors name into *inputs* and *outputs* field. In this case we calibrate and quantize the model, and use our calibration dataloader initialized from a 'Dataset' object. +Here we set the input tensor and output tensors name into *inputs* and *outputs* args. In this case we calibrate and quantize the model, and use our calibration dataloader initialized from a 'Dataset' object. ### Code update @@ -160,31 +108,32 @@ After prepare step is done, we add tune and benchmark code to generate quantized #### Tune ```python - from neural_compressor.experimental import Quantization, common - quantizer = Quantization(FLAGS.config) - dataset = Dataset(eval_file, FLAGS.eval_batch_size) - quantizer.model = common.Model(estimator, input_fn=estimator_input_fn) - quantizer.calib_dataloader = common.DataLoader(dataset, collate_fn=collate_fn) - quantizer.eval_dataloader = common.DataLoader(dataset, collate_fn=collate_fn) - quantizer.metric = Accuracy() - q_model = quantizer.fit() - q_model.save(FLAGS.output_model) + from neural_compressor import quantization + from neural_compressor.config import PostTrainingQuantConfig + from neural_compressor.data.dataloaders.dataloader import DataLoader + config = PostTrainingQuantConfig( + inputs=["input_file", "batch_size"], + outputs=["loss/Softmax:0", "IteratorGetNext:3"], + calibration_sampling_size=[500],) + calib_dataloader=DataLoader(dataset, collate_fn=collate_fn, framework='tensorflow') + eval_dataloader=DataLoader(dataset, collate_fn=collate_fn, framework='tensorflow') + q_model = quantization.fit(model=model, conf=config, calib_dataloader=calib_dataloader, + eval_dataloader=eval_dataloader, eval_metric=Accuracy()) + + if FLAGS.strip_iterator: + q_model.graph_def = strip_iterator(q_model.graph_def) + q_model.save(FLAGS.output_model) ``` #### Benchmark ```python - from neural_compressor.experimental import Benchmark, common - from neural_compressor.model.tensorflow_model import get_model_type - evaluator = Benchmark(FLAGS.config) - dataset = Dataset(eval_file, FLAGS.eval_batch_size) - evaluator.b_dataloader = common.DataLoader(\ - dataset, batch_size=FLAGS.eval_batch_size, collate_fn=collate_fn) - model_type = get_model_type(FLAGS.input_model) - evaluator.metric = Accuracy() - if model_type == 'frozen_pb': - evaluator.model = FLAGS.input_model - else: - evaluator.model = common.Model(estimator, input_fn=estimator_input_fn) - evaluator(FLAGS.mode) -``` -The Intel® Neural Compressor quantizer.fit() function will return a best quantized model under time constraint. - + from neural_compressor.benchmark import fit + from neural_compressor.config import BenchmarkConfig + if FLAGS.mode == 'performance': + conf = BenchmarkConfig(cores_per_instance=28, num_of_instance=1) + fit(model, conf, b_func=evaluate) + else: + accuracy = evaluate(model.graph_def) + print('Batch size = %d' % FLAGS.eval_batch_size) + print("Accuracy: %.5f" % accuracy) + +The Intel® Neural Compressor quantization.fit() function will return a best quantized model under time constraint. diff --git a/examples/tensorflow/nlp/bert_base_mrpc/quantization/ptq/modeling.py b/examples/tensorflow/nlp/bert_base_mrpc/quantization/ptq/modeling.py index 4fde829d811..093cfd818f6 100644 --- a/examples/tensorflow/nlp/bert_base_mrpc/quantization/ptq/modeling.py +++ b/examples/tensorflow/nlp/bert_base_mrpc/quantization/ptq/modeling.py @@ -1,5 +1,5 @@ # coding=utf-8 -# Copyright 2018 The Google AI Language Team Authors. +# Copyright 2022 The Google AI Language Team Authors. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/examples/tensorflow/nlp/bert_base_mrpc/quantization/ptq/mrpc.yaml b/examples/tensorflow/nlp/bert_base_mrpc/quantization/ptq/mrpc.yaml deleted file mode 100644 index 8e2a4141f91..00000000000 --- a/examples/tensorflow/nlp/bert_base_mrpc/quantization/ptq/mrpc.yaml +++ /dev/null @@ -1,46 +0,0 @@ -# -# Copyright (c) 2021 Intel Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -model: - name: bert - framework: tensorflow - inputs: input_file, batch_size - outputs: loss/Softmax:0, IteratorGetNext:3 - -device: cpu # optional. default value is cpu, other value is gpu. - -evaluation: - accuracy: {} - performance: - iteration: 20 - warmup: 5 - configs: - num_of_instance: 1 - cores_per_instance: 28 - kmp_blocktime: 1 - -quantization: - calibration: - sampling_size: 500 - model_wise: - weight: - granularity: per_channel -tuning: - accuracy_criterion: - relative: 0.01 - exit_policy: - timeout: 0 - max_trials: 100 - random_seed: 9527 diff --git a/examples/tensorflow/nlp/bert_base_mrpc/quantization/ptq/mrpc_itex.yaml b/examples/tensorflow/nlp/bert_base_mrpc/quantization/ptq/mrpc_itex.yaml deleted file mode 100644 index 000daa59cdd..00000000000 --- a/examples/tensorflow/nlp/bert_base_mrpc/quantization/ptq/mrpc_itex.yaml +++ /dev/null @@ -1,46 +0,0 @@ -# -# Copyright (c) 2021 Intel Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -model: - name: bert - framework: tensorflow_itex - inputs: input_file, batch_size - outputs: loss/Softmax:0, IteratorGetNext:3 - -device: gpu # optional. set cpu if installed intel-extension-for-tensorflow[cpu], set gpu if installed intel-extension-for-tensorflow[gpu]. - -evaluation: - accuracy: {} - performance: - iteration: 20 - warmup: 5 - configs: - num_of_instance: 1 - cores_per_instance: 28 - kmp_blocktime: 1 - -quantization: - calibration: - sampling_size: 500 - model_wise: - weight: - granularity: per_channel -tuning: - accuracy_criterion: - relative: 0.01 - exit_policy: - timeout: 0 - max_trials: 100 - random_seed: 9527 diff --git a/examples/tensorflow/nlp/bert_base_mrpc/quantization/ptq/optimization.py b/examples/tensorflow/nlp/bert_base_mrpc/quantization/ptq/optimization.py index f5a52d9c8d4..ff20634e0a3 100644 --- a/examples/tensorflow/nlp/bert_base_mrpc/quantization/ptq/optimization.py +++ b/examples/tensorflow/nlp/bert_base_mrpc/quantization/ptq/optimization.py @@ -1,5 +1,5 @@ # coding=utf-8 -# Copyright 2018 The Google AI Language Team Authors. +# Copyright 2022 The Google AI Language Team Authors. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/examples/tensorflow/nlp/bert_base_mrpc/quantization/ptq/prepare_dataset.py b/examples/tensorflow/nlp/bert_base_mrpc/quantization/ptq/prepare_dataset.py index d55b09093f8..1e5fa953519 100644 --- a/examples/tensorflow/nlp/bert_base_mrpc/quantization/ptq/prepare_dataset.py +++ b/examples/tensorflow/nlp/bert_base_mrpc/quantization/ptq/prepare_dataset.py @@ -1,3 +1,21 @@ +# +# -*- coding: utf-8 -*- +# +# Copyright (c) 2022 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + ''' Script for downloading all GLUE data. Note: for legal reasons, we are unable to host MRPC. You can either use the version hosted by the SentEval team, which is already tokenized, diff --git a/examples/tensorflow/nlp/bert_base_mrpc/quantization/ptq/run_benchmark.sh b/examples/tensorflow/nlp/bert_base_mrpc/quantization/ptq/run_benchmark.sh index 46bfcc6cf71..6fa60de7e06 100644 --- a/examples/tensorflow/nlp/bert_base_mrpc/quantization/ptq/run_benchmark.sh +++ b/examples/tensorflow/nlp/bert_base_mrpc/quantization/ptq/run_benchmark.sh @@ -14,9 +14,6 @@ function init_params { for var in "$@" do case $var in - --config=*) - config=$(echo $var |cut -f2 -d=) - ;; --input_model=*) input_model=$(echo $var |cut -f2 -d=) ;; @@ -32,10 +29,6 @@ function init_params { --dataset_location=*) dataset_location=$(echo $var |cut -f2 -d=) ;; - *) - echo "Error: No such parameter: ${var}" - exit 1 - ;; esac done @@ -57,7 +50,6 @@ function run_benchmark { --num_train_epochs=3.0 \ --output_dir=$init_checkpoint \ --input_model=$input_model \ - --config=$config \ --mode=$mode \ --benchmark \ diff --git a/examples/tensorflow/nlp/bert_base_mrpc/quantization/ptq/run_classifier.py b/examples/tensorflow/nlp/bert_base_mrpc/quantization/ptq/run_classifier.py index 5620ab3775a..a1f35265d9a 100644 --- a/examples/tensorflow/nlp/bert_base_mrpc/quantization/ptq/run_classifier.py +++ b/examples/tensorflow/nlp/bert_base_mrpc/quantization/ptq/run_classifier.py @@ -1,5 +1,5 @@ # coding=utf-8 -# Copyright 2018 The Google AI Language Team Authors. +# Copyright 2022 The Google AI Language Team Authors. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -24,6 +24,7 @@ import collections import csv import os +import numpy as np import modeling as modeling import optimization as optimization import time @@ -40,10 +41,6 @@ "tune", False, "neural_compressor tune the model.") -flags.DEFINE_string( - "config", None, - "neural_compressor config for the model.") - flags.DEFINE_string( "input_model", None, "neural_compressor input model path.") @@ -221,7 +218,6 @@ class InputExample(object): def __init__(self, guid, text_a, text_b=None, label=None): """Constructs a InputExample. - Args: guid: Unique id for the example. text_a: string. The untokenized text of the first sequence. For single @@ -239,12 +235,10 @@ def __init__(self, guid, text_a, text_b=None, label=None): class PaddingInputExample(object): """Fake example so the num input examples is a multiple of the batch size. - When running eval/predict on the TPU, we need to pad the number of examples to be a multiple of the batch size, because the TPU requires a fixed batch size. The alternative is to drop the last batch, which is bad because it means the entire output data won't be generated. - We use this class instead of `None` because treating `None` as padding battches could cause silent errors. """ @@ -855,273 +849,331 @@ def input_fn(params): def convert_examples_to_features(examples, label_list, max_seq_length, tokenizer, output_file): - """Convert a set of `InputExample`s to a list of `InputFeatures`.""" - writer = tf.compat.v1.python_io.TFRecordWriter(output_file) + """Convert a set of `InputExample`s to a list of `InputFeatures`.""" + writer = tf.compat.v1.python_io.TFRecordWriter(output_file) - for (ex_index, example) in enumerate(examples): - if ex_index % 10000 == 0: - tf.compat.v1.logging.info("Writing example %d of %d" % (ex_index, len(examples))) + for (ex_index, example) in enumerate(examples): + if ex_index % 10000 == 0: + tf.compat.v1.logging.info("Writing example %d of %d" % (ex_index, len(examples))) - feature = convert_single_example(ex_index, example, label_list, - max_seq_length, tokenizer) + feature = convert_single_example(ex_index, example, label_list, + max_seq_length, tokenizer) - def create_int_feature(values): - f = tf.train.Feature(int64_list=tf.train.Int64List(value=list(values))) - return f + def create_int_feature(values): + f = tf.train.Feature(int64_list=tf.train.Int64List(value=list(values))) + return f - features = collections.OrderedDict() - features["input_ids"] = create_int_feature(feature.input_ids) - features["input_mask"] = create_int_feature(feature.input_mask) - features["segment_ids"] = create_int_feature(feature.segment_ids) - features["label_ids"] = create_int_feature([feature.label_id]) - features["is_real_example"] = create_int_feature( - [int(feature.is_real_example)]) + features = collections.OrderedDict() + features["input_ids"] = create_int_feature(feature.input_ids) + features["input_mask"] = create_int_feature(feature.input_mask) + features["segment_ids"] = create_int_feature(feature.segment_ids) + features["label_ids"] = create_int_feature([feature.label_id]) + features["is_real_example"] = create_int_feature( + [int(feature.is_real_example)]) - tf_example = tf.train.Example(features=tf.train.Features(feature=features)) - writer.write(tf_example.SerializeToString()) - writer.close() + tf_example = tf.train.Example(features=tf.train.Features(feature=features)) + writer.write(tf_example.SerializeToString()) + writer.close() def main(_): - tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.INFO) + tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.INFO) + + processors = { + "cola": ColaProcessor, + "mnli": MnliProcessor, + "mrpc": MrpcProcessor, + "xnli": XnliProcessor, + } + + tokenization.validate_case_matches_checkpoint(FLAGS.do_lower_case, + FLAGS.init_checkpoint) + + bert_config = modeling.BertConfig.from_json_file(FLAGS.bert_config_file) + + if FLAGS.max_seq_length > bert_config.max_position_embeddings: + raise ValueError( + "Cannot use sequence length %d because the BERT model " + "was only trained up to sequence length %d" % + (FLAGS.max_seq_length, bert_config.max_position_embeddings)) + + tf.compat.v1.gfile.MakeDirs(FLAGS.output_dir) + + task_name = FLAGS.task_name.lower() + + if task_name not in processors: + raise ValueError("Task not found: %s" % (task_name)) + + processor = processors[task_name]() + + label_list = processor.get_labels() + + tokenizer = tokenization.FullTokenizer( + vocab_file=FLAGS.vocab_file, do_lower_case=FLAGS.do_lower_case) + + tpu_cluster_resolver = None + if FLAGS.use_tpu and FLAGS.tpu_name: + tpu_cluster_resolver = tf.contrib.cluster_resolver.TPUClusterResolver( + FLAGS.tpu_name, zone=FLAGS.tpu_zone, project=FLAGS.gcp_project) + + is_per_host = tf.compat.v1.estimator.tpu.InputPipelineConfig.PER_HOST_V2 + session_config = tf.compat.v1.ConfigProto(inter_op_parallelism_threads=FLAGS.num_inter_threads, + intra_op_parallelism_threads=FLAGS.num_intra_threads) + run_config = tf.compat.v1.estimator.tpu.RunConfig( + cluster=tpu_cluster_resolver, + master=FLAGS.master, + model_dir=FLAGS.output_dir, + save_checkpoints_steps=FLAGS.save_checkpoints_steps, + tpu_config=tf.compat.v1.estimator.tpu.TPUConfig( + iterations_per_loop=FLAGS.iterations_per_loop, + num_shards=FLAGS.num_tpu_cores, + per_host_input_for_training=is_per_host), + session_config=session_config) + + train_examples = None + num_train_steps = None + num_warmup_steps = None + if FLAGS.do_train: + train_examples = processor.get_train_examples(FLAGS.data_dir) + num_train_steps = int( + len(train_examples) / FLAGS.train_batch_size * FLAGS.num_train_epochs) + num_warmup_steps = int(num_train_steps * FLAGS.warmup_proportion) + + model_fn = model_fn_builder( + bert_config=bert_config, + num_labels=len(label_list), + init_checkpoint=FLAGS.init_checkpoint, + learning_rate=FLAGS.learning_rate, + num_train_steps=num_train_steps, + num_warmup_steps=num_warmup_steps, + use_tpu=FLAGS.use_tpu, + use_one_hot_embeddings=FLAGS.use_tpu,) + + + # If TPU is not available, this will fall back to normal Estimator on CPU + # or GPU. + estimator = tf.compat.v1.estimator.tpu.TPUEstimator( + use_tpu=FLAGS.use_tpu, + model_fn=model_fn, + config=run_config, + train_batch_size=FLAGS.train_batch_size, + eval_batch_size=FLAGS.eval_batch_size, + predict_batch_size=FLAGS.predict_batch_size) + + if FLAGS.do_train: + train_file = os.path.join(FLAGS.output_dir, "train.tf_record") + file_based_convert_examples_to_features( + train_examples, label_list, FLAGS.max_seq_length, tokenizer, train_file) + tf.compat.v1.logging.info("***** Running training *****") + tf.compat.v1.logging.info(" Num examples = %d", len(train_examples)) + tf.compat.v1.logging.info(" Batch size = %d", FLAGS.train_batch_size) + tf.compat.v1.logging.info(" Num steps = %d", num_train_steps) + train_input_fn = file_based_input_fn_builder( + input_file=train_file, + seq_length=FLAGS.max_seq_length, + is_training=True, + drop_remainder=True) + estimator.train(input_fn=train_input_fn, max_steps=num_train_steps) + + if FLAGS.do_eval: + eval_examples = processor.get_dev_examples(FLAGS.data_dir) + num_actual_eval_examples = len(eval_examples) + if FLAGS.use_tpu: + # TPU requires a fixed batch size for all batches, therefore the number + # of examples must be a multiple of the batch size, or else examples + # will get dropped. So we pad with fake examples which are ignored + # later on. These do NOT count towards the metric (all tf.metrics + # support a per-instance weight, and these get a weight of 0.0). + while len(eval_examples) % FLAGS.eval_batch_size != 0: + eval_examples.append(PaddingInputExample()) + + eval_file = os.path.join(FLAGS.output_dir, "eval.tf_record") + file_based_convert_examples_to_features( + eval_examples, label_list, FLAGS.max_seq_length, tokenizer, eval_file) + + tf.compat.v1.logging.info("***** Running evaluation *****") + tf.compat.v1.logging.info(" Num examples = %d (%d actual, %d padding)", + len(eval_examples), num_actual_eval_examples, + len(eval_examples) - num_actual_eval_examples) + tf.compat.v1.logging.info(" Batch size = %d", FLAGS.eval_batch_size) + + # This tells the estimator to run through the entire set. + eval_steps = None + # However, if running eval on the TPU, you will need to specify the + # number of steps. + if FLAGS.use_tpu: + assert len(eval_examples) % FLAGS.eval_batch_size == 0 + eval_steps = int(len(eval_examples) // FLAGS.eval_batch_size) + 1 + + eval_drop_remainder = True if FLAGS.use_tpu else False + eval_input_fn = file_based_input_fn_builder( + input_file=eval_file, + seq_length=FLAGS.max_seq_length, + is_training=False, + drop_remainder=eval_drop_remainder) + + start = time.time() + result = estimator.evaluate(input_fn=eval_input_fn, steps=eval_steps, hooks=[LoggerHook()]) + end = time.time() - start + result['global_step'] = str(eval_steps) + result['latency_total'] = str(end) + result['latency_per_step'] = str(end/eval_steps) + if FLAGS.eval_batch_size != 1: + result['samples_per_sec'] = str(FLAGS.eval_batch_size/(end/eval_steps)) + + output_eval_file = os.path.join(FLAGS.output_dir, "eval_results.txt") + with tf.compat.v1.gfile.GFile(output_eval_file, "w") as writer: + tf.compat.v1.logging.info("***** Eval results *****") + for key in sorted(result.keys()): + tf.compat.v1.logging.info(" %s = %s", key, str(result[key])) + writer.write("%s = %s\n" % (key, str(result[key]))) + + # BELOW IS Neural Compressor TUNING AND BENCHMARK CODE + + class Dataset(object): + def __init__(self, file_name, batch_size): + self.file_name = file_name + self.batch_size = batch_size + + def __getitem__(self, idx): + return (self.file_name, self.batch_size), 0 + + def __len__(self): + return 1 + + def collate_fn(batch): + """Puts each data field into a pd frame with outer dimension batch size""" + elem = batch[0] + return elem + + from neural_compressor.metric import METRICS + class Accuracy(object): + def __init__(self): + self.metric = METRICS('tensorflow')['Accuracy']() + + # it's ugly that the label is in the iterator + def update(self, preds, label): + logits, labels = preds + self.metric.update(logits, labels) + + def reset(self): + self.metric.reset() + + def result(self): + return self.metric.result() - processors = { - "cola": ColaProcessor, - "mnli": MnliProcessor, - "mrpc": MrpcProcessor, - "xnli": XnliProcessor, - } - - tokenization.validate_case_matches_checkpoint(FLAGS.do_lower_case, - FLAGS.init_checkpoint) - - bert_config = modeling.BertConfig.from_json_file(FLAGS.bert_config_file) - - if FLAGS.max_seq_length > bert_config.max_position_embeddings: - raise ValueError( - "Cannot use sequence length %d because the BERT model " - "was only trained up to sequence length %d" % - (FLAGS.max_seq_length, bert_config.max_position_embeddings)) - - tf.compat.v1.gfile.MakeDirs(FLAGS.output_dir) - - task_name = FLAGS.task_name.lower() - - if task_name not in processors: - raise ValueError("Task not found: %s" % (task_name)) - - processor = processors[task_name]() - - label_list = processor.get_labels() - - tokenizer = tokenization.FullTokenizer( - vocab_file=FLAGS.vocab_file, do_lower_case=FLAGS.do_lower_case) - - tpu_cluster_resolver = None - if FLAGS.use_tpu and FLAGS.tpu_name: - tpu_cluster_resolver = tf.contrib.cluster_resolver.TPUClusterResolver( - FLAGS.tpu_name, zone=FLAGS.tpu_zone, project=FLAGS.gcp_project) - - is_per_host = tf.compat.v1.estimator.tpu.InputPipelineConfig.PER_HOST_V2 - session_config = tf.compat.v1.ConfigProto(inter_op_parallelism_threads=FLAGS.num_inter_threads, - intra_op_parallelism_threads=FLAGS.num_intra_threads) - run_config = tf.compat.v1.estimator.tpu.RunConfig( - cluster=tpu_cluster_resolver, - master=FLAGS.master, - model_dir=FLAGS.output_dir, - save_checkpoints_steps=FLAGS.save_checkpoints_steps, - tpu_config=tf.compat.v1.estimator.tpu.TPUConfig( - iterations_per_loop=FLAGS.iterations_per_loop, - num_shards=FLAGS.num_tpu_cores, - per_host_input_for_training=is_per_host), - session_config=session_config) - - train_examples = None - num_train_steps = None - num_warmup_steps = None - if FLAGS.do_train: - train_examples = processor.get_train_examples(FLAGS.data_dir) - num_train_steps = int( - len(train_examples) / FLAGS.train_batch_size * FLAGS.num_train_epochs) - num_warmup_steps = int(num_train_steps * FLAGS.warmup_proportion) - - model_fn = model_fn_builder( - bert_config=bert_config, - num_labels=len(label_list), - init_checkpoint=FLAGS.init_checkpoint, - learning_rate=FLAGS.learning_rate, - num_train_steps=num_train_steps, - num_warmup_steps=num_warmup_steps, - use_tpu=FLAGS.use_tpu, - use_one_hot_embeddings=FLAGS.use_tpu,) - - - # If TPU is not available, this will fall back to normal Estimator on CPU - # or GPU. - estimator = tf.compat.v1.estimator.tpu.TPUEstimator( - use_tpu=FLAGS.use_tpu, - model_fn=model_fn, - config=run_config, - train_batch_size=FLAGS.train_batch_size, - eval_batch_size=FLAGS.eval_batch_size, - predict_batch_size=FLAGS.predict_batch_size) - - if FLAGS.do_train: - train_file = os.path.join(FLAGS.output_dir, "train.tf_record") - file_based_convert_examples_to_features( - train_examples, label_list, FLAGS.max_seq_length, tokenizer, train_file) - tf.compat.v1.logging.info("***** Running training *****") - tf.compat.v1.logging.info(" Num examples = %d", len(train_examples)) - tf.compat.v1.logging.info(" Batch size = %d", FLAGS.train_batch_size) - tf.compat.v1.logging.info(" Num steps = %d", num_train_steps) - train_input_fn = file_based_input_fn_builder( - input_file=train_file, - seq_length=FLAGS.max_seq_length, - is_training=True, - drop_remainder=True) - estimator.train(input_fn=train_input_fn, max_steps=num_train_steps) - - if FLAGS.do_eval: eval_examples = processor.get_dev_examples(FLAGS.data_dir) - num_actual_eval_examples = len(eval_examples) - if FLAGS.use_tpu: - # TPU requires a fixed batch size for all batches, therefore the number - # of examples must be a multiple of the batch size, or else examples - # will get dropped. So we pad with fake examples which are ignored - # later on. These do NOT count towards the metric (all tf.metrics - # support a per-instance weight, and these get a weight of 0.0). - while len(eval_examples) % FLAGS.eval_batch_size != 0: - eval_examples.append(PaddingInputExample()) - eval_file = os.path.join(FLAGS.output_dir, "eval.tf_record") - file_based_convert_examples_to_features( - eval_examples, label_list, FLAGS.max_seq_length, tokenizer, eval_file) - - tf.compat.v1.logging.info("***** Running evaluation *****") - tf.compat.v1.logging.info(" Num examples = %d (%d actual, %d padding)", - len(eval_examples), num_actual_eval_examples, - len(eval_examples) - num_actual_eval_examples) - tf.compat.v1.logging.info(" Batch size = %d", FLAGS.eval_batch_size) - - # This tells the estimator to run through the entire set. - eval_steps = None - # However, if running eval on the TPU, you will need to specify the - # number of steps. - if FLAGS.use_tpu: - assert len(eval_examples) % FLAGS.eval_batch_size == 0 - eval_steps = int(len(eval_examples) // FLAGS.eval_batch_size) + 1 - - eval_drop_remainder = True if FLAGS.use_tpu else False - eval_input_fn = file_based_input_fn_builder( + dataset = Dataset(eval_file, FLAGS.eval_batch_size) + from neural_compressor.model.model import Model + from neural_compressor.model.base_model import BaseModel + + def evaluate(model): + """Custom evaluate function to estimate the accuracy of the bert model. + + Args: + model (tf.Graph_def): The input model graph + + Returns: + accuracy (float): evaluation result, the larger is better. + """ + from neural_compressor.adaptor.tf_utils.util import iterator_sess_run + from neural_compressor.objective import Performance + if not isinstance(model, BaseModel): + model = Model(model) + model.input_tensor_names = ["input_file", "batch_size"] + model.output_tensor_names = ["loss/Softmax:0", "IteratorGetNext:3"] + input_tensor = model.input_tensor + output_tensor = model.output_tensor if len(model.output_tensor)>1 else \ + model.output_tensor[0] + iteration = -1 + if FLAGS.benchmark and FLAGS.mode == 'performance': + iteration = 100 + metric = Accuracy() + measurer = Performance() + + def eval_func(dataloader): + warmup = 5 + for idx, (inputs, labels) in enumerate(dataloader): + # dataloader should keep the order and len of inputs same with input_tensor + assert len(input_tensor) == len(inputs), \ + 'inputs len must equal with input_tensor' + feed_dict = dict(zip(input_tensor, inputs)) + predictions = iterator_sess_run(model.sess, model.iter_op, \ + feed_dict, output_tensor, iteration, measurer) + metric.update(predictions, labels) + if idx + 1 == iteration: + break + + latency_list = measurer.result_list() + latency = np.array(latency_list[warmup:]).mean() / FLAGS.eval_batch_size + return latency + + from neural_compressor.data.dataloaders.default_dataloader import DefaultDataLoader + dataloader = DefaultDataLoader(dataset, collate_fn=collate_fn, batch_size=FLAGS.eval_batch_size) + latency = eval_func(dataloader) + if FLAGS.benchmark and FLAGS.mode == 'performance': + print("Batch size = {}".format(FLAGS.eval_batch_size)) + print("Latency: {:.3f} ms".format(latency * 1000)) + print("Throughput: {:.3f} images/sec".format(1. / latency)) + acc = metric.result() + return acc + + convert_examples_to_features( + examples=eval_examples, + label_list=label_list, + max_seq_length=FLAGS.max_seq_length, + tokenizer=tokenizer, + output_file=eval_file) + + estimator_input_fn = input_fn_builder( input_file=eval_file, seq_length=FLAGS.max_seq_length, is_training=False, - drop_remainder=eval_drop_remainder) - - start = time.time() - result = estimator.evaluate(input_fn=eval_input_fn, steps=eval_steps, hooks=[LoggerHook()]) - end = time.time() - start - result['global_step'] = str(eval_steps) - result['latency_total'] = str(end) - result['latency_per_step'] = str(end/eval_steps) - if FLAGS.eval_batch_size != 1: - result['samples_per_sec'] = str(FLAGS.eval_batch_size/(end/eval_steps)) - - output_eval_file = os.path.join(FLAGS.output_dir, "eval_results.txt") - with tf.compat.v1.gfile.GFile(output_eval_file, "w") as writer: - tf.compat.v1.logging.info("***** Eval results *****") - for key in sorted(result.keys()): - tf.compat.v1.logging.info(" %s = %s", key, str(result[key])) - writer.write("%s = %s\n" % (key, str(result[key]))) - - # BELOW IS Neural Compressor TUNING AND BENCHMARK CODE - - class Dataset(object): - def __init__(self, file_name, batch_size): - self.file_name = file_name - self.batch_size = batch_size - - def __getitem__(self, idx): - return (self.file_name, self.batch_size), 0 - - def __len__(self): - return 1 - - def collate_fn(batch): - """Puts each data field into a pd frame with outer dimension batch size""" - elem = batch[0] - return elem - - from neural_compressor.metric import METRICS - class Accuracy(object): - def __init__(self): - self.metric = METRICS('tensorflow')['Accuracy']() - - # it's ugly that the label is in the iterator - def update(self, preds, label): - logits, labels = preds - self.metric.update(logits, labels) - - def reset(self): - self.metric.reset() - - def result(self): - return self.metric.result() - - if FLAGS.tune: - - eval_examples = processor.get_dev_examples(FLAGS.data_dir) - eval_file = os.path.join(FLAGS.output_dir, "eval.tf_record") - - convert_examples_to_features( - examples=eval_examples, - label_list=label_list, - max_seq_length=FLAGS.max_seq_length, - tokenizer=tokenizer, - output_file=eval_file) - - estimator_input_fn = input_fn_builder( - input_file=eval_file, - seq_length=FLAGS.max_seq_length, - is_training=False, - drop_remainder=False) - - from neural_compressor.experimental import Quantization, common - quantizer = Quantization(FLAGS.config) - dataset = Dataset(eval_file, FLAGS.eval_batch_size) - quantizer.model = common.Model(estimator, input_fn=estimator_input_fn) - quantizer.calib_dataloader = common.DataLoader(dataset, collate_fn=collate_fn) - quantizer.eval_dataloader = common.DataLoader(dataset, collate_fn=collate_fn) - quantizer.metric = Accuracy() - q_model = quantizer.fit() - if FLAGS.strip_iterator: - q_model.graph_def = strip_iterator(q_model.graph_def) - q_model.save(FLAGS.output_model) - - if FLAGS.benchmark: - eval_examples = processor.get_dev_examples(FLAGS.data_dir) - eval_file = os.path.join(FLAGS.output_dir, "eval.tf_record") - - from neural_compressor.experimental import Benchmark, common - evaluator = Benchmark(FLAGS.config) - dataset = Dataset(eval_file, FLAGS.eval_batch_size) - evaluator.b_dataloader = common.DataLoader(\ - dataset, batch_size=FLAGS.eval_batch_size, collate_fn=collate_fn) - evaluator.metric = Accuracy() - - - from neural_compressor.model.tensorflow_model import get_model_type - model_type = get_model_type(FLAGS.input_model) - if model_type == 'frozen_pb': - evaluator.model = FLAGS.input_model - else: - estimator_input_fn = input_fn_builder( - input_file=eval_file, - seq_length=FLAGS.max_seq_length, - is_training=False, - drop_remainder=False) - evaluator.model = common.Model(estimator, input_fn=estimator_input_fn) - evaluator(FLAGS.mode) - + drop_remainder=False) + + from neural_compressor.model.tensorflow_model import get_model_type + try: + model_type = get_model_type(FLAGS.input_model) + except: + model_type = None + if model_type == 'frozen_pb': + model = FLAGS.input_model + else: + model = Model(estimator, input_fn=estimator_input_fn) + + if isinstance(model, BaseModel): + model = model.graph_def + + if FLAGS.tune: + from neural_compressor import quantization + from neural_compressor.config import PostTrainingQuantConfig + from neural_compressor.data.dataloaders.dataloader import DataLoader + config = PostTrainingQuantConfig( + inputs=["input_file", "batch_size"], + outputs=["loss/Softmax:0", "IteratorGetNext:3"], + calibration_sampling_size=[500],) + calib_dataloader=DataLoader(dataset=dataset, collate_fn=collate_fn, framework='tensorflow') + eval_dataloader=DataLoader(dataset=dataset, collate_fn=collate_fn, framework='tensorflow') + q_model = quantization.fit(model=model, conf=config, calib_dataloader=calib_dataloader, + eval_dataloader=eval_dataloader, eval_metric=Accuracy()) + + if FLAGS.strip_iterator: + q_model.graph_def = strip_iterator(q_model.graph_def) + q_model.save(FLAGS.output_model) + + if FLAGS.benchmark: + assert FLAGS.mode == 'performance' or FLAGS.mode == 'accuracy', \ + "Benchmark only supports performance or accuracy mode." + from neural_compressor.benchmark import fit + from neural_compressor.config import BenchmarkConfig + + if FLAGS.mode == 'performance': + conf = BenchmarkConfig(cores_per_instance=28, num_of_instance=1) + fit(model, conf, b_func=evaluate) + else: + accuracy = evaluate(model) + print('Batch size = %d' % FLAGS.eval_batch_size) + print("Accuracy: %.5f" % accuracy) if __name__ == "__main__": flags.mark_flag_as_required("data_dir") diff --git a/examples/tensorflow/nlp/bert_base_mrpc/quantization/ptq/run_tuning.sh b/examples/tensorflow/nlp/bert_base_mrpc/quantization/ptq/run_tuning.sh index 2d695de1cfe..a2b86f65355 100644 --- a/examples/tensorflow/nlp/bert_base_mrpc/quantization/ptq/run_tuning.sh +++ b/examples/tensorflow/nlp/bert_base_mrpc/quantization/ptq/run_tuning.sh @@ -11,13 +11,9 @@ function main { # init params function init_params { - for var in "$@" do case $var in - --config=*) - config=$(echo $var |cut -f2 -d=) - ;; --input_model=*) input_model=$(echo "$var" |cut -f2 -d=) ;; @@ -46,7 +42,6 @@ function run_tuning { --num_train_epochs=3.0 \ --output_dir=$input_model \ --output_model=$output_model \ - --config=$config \ --tune \ } diff --git a/examples/tensorflow/nlp/bert_base_mrpc/quantization/ptq/tokenization.py b/examples/tensorflow/nlp/bert_base_mrpc/quantization/ptq/tokenization.py index 76e0651c6de..0f4b22ebe5a 100644 --- a/examples/tensorflow/nlp/bert_base_mrpc/quantization/ptq/tokenization.py +++ b/examples/tensorflow/nlp/bert_base_mrpc/quantization/ptq/tokenization.py @@ -1,5 +1,5 @@ # coding=utf-8 -# Copyright 2018 The Google AI Language Team Authors. +# Copyright 2022 The Google AI Language Team Authors. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/examples/tensorflow/nlp/bert_large_squad/quantization/ptq/README.md b/examples/tensorflow/nlp/bert_large_squad/quantization/ptq/README.md deleted file mode 100644 index 02e51563c92..00000000000 --- a/examples/tensorflow/nlp/bert_large_squad/quantization/ptq/README.md +++ /dev/null @@ -1,201 +0,0 @@ -Step-by-Step -============ - -This document is used to list steps of reproducing TensorFlow Intel® Neural Compressor tuning zoo result of bert large model on squad v1.1 task. -This example can run on Intel CPUs and GPUs. - - -## Prerequisite - -### 1. Installation -```shell -# Install Intel® Neural Compressor -pip install neural-compressor -``` -### 2. Install Intel Tensorflow -```shell -pip install intel-tensorflow -``` - -### 3. Install Intel Extension for Tensorflow - -#### Quantizing the model on Intel GPU -Intel Extension for Tensorflow is mandatory to be installed for quantizing the model on Intel GPUs. - -```shell -pip install --upgrade intel-extension-for-tensorflow[gpu] -``` -For any more details, please follow the procedure in [install-gpu-drivers](https://github.com/intel-innersource/frameworks.ai.infrastructure.intel-extension-for-tensorflow.intel-extension-for-tensorflow/blob/master/docs/install/install_for_gpu.md#install-gpu-drivers) - -#### Quantizing the model on Intel CPU(Experimental) -Intel Extension for Tensorflow for Intel CPUs is experimental currently. It's not mandatory for quantizing the model on Intel CPUs. - -```shell -pip install --upgrade intel-extension-for-tensorflow[cpu] -``` - -### 4. Prepare Dataset -```shell -wget https://storage.googleapis.com/bert_models/2019_05_30/wwm_uncased_L-24_H-1024_A-16.zip -``` - -```shell -unzip wwm_uncased_L-24_H-1024_A-16.zip -``` - -```shell -wget https://rajpurkar.github.io/SQuAD-explorer/dataset/dev-v1.1.json -P wwm_uncased_L-24_H-1024_A-16 -``` -wwm_uncased_L-24_H-1024_A-16 folder will be located on your data path. - -#### Automatic dataset download -Run the `prepare_dataset.sh` script located in `examples/tensorflow/nlp/bert_large_squad/quantization/ptq`. - -Usage: -```shell -cd examples/tensorflow/nlp/bert_large_squad/quantization/ptq -bash prepare_dataset.sh --output_dir=./data -``` - -Then create the tf_record file and you need to config the tf_record path in yaml file. -```shell -python create_tf_record.py --vocab_file=data/vocab.txt --predict_file=data/dev-v1.1.json --output_file=./eval.tf_record -``` - -### 5. Prepare Pretrained model - -#### Manual approach - -```shell -wget https://storage.googleapis.com/intel-optimized-tensorflow/models/v1_8/bert_large_checkpoints.zip -unzip bert_large_checkpoints.zip -``` -#### Automatic model download -Run the `prepare_model.sh` script located in `examples/tensorflow/nlp/bert_large_squad/quantization/ptq`. - -Usage: -```shell -cd examples/tensorflow/nlp/bert_large_squad/quantization/ptq -bash prepare_model.sh --output_dir=./model -``` - -## Prepare frozen pb from checkpoint - ```shell - python freeze_estimator_to_pb.py --input_model=./model --output_model=./bert_fp32.pb - ``` -## Run Command - Please make sure below command should be executed with the same Tensorflow runtime version as above step. - - ```shell - python tune_squad.py --config=./bert.yaml --input_model=./bert_fp32.pb --output_model=./int8.pb --tune - ``` - -Now the tool will generate an int8 model with iterator inside the graph if you want the tuned int8 model to be raw input with 3 inputs you can use command like below: - - ```shell - python tune_squad.py --config=./bert.yaml --input_model=./bert_fp32.pb --output_model=./int8.pb --tune --strip_iterator - ``` - - -Details of enabling Intel® Neural Compressor on bert model for Tensorflow. -========================= - -This is a tutorial of how to enable bert model with Intel® Neural Compressor. -## User Code Analysis -1. User specifies fp32 *model*, calibration dataset *q_dataloader*, evaluation dataset *eval_dataloader* and metric in tuning.metric field of model-specific yaml config file. - -2. User specifies fp32 *model*, calibration dataset *q_dataloader* and a custom *eval_func* which encapsulates the evaluation dataset and metric by itself. - -For bert, we applied the first one as we already have built-in dataset and metric for bert squad task. - -### Write Yaml config file -In examples directory, there is a bert.yaml for tuning the model on Intel CPUs. The 'framework' in the yaml is set to 'tensorflow'. If running this example on Intel GPUs, the 'framework' should be set to 'tensorflow_itex' and the device in yaml file should be set to 'gpu'. The bert_itex.yaml is prepared for the GPU case. We could remove most of items and only keep mandatory item for tuning. We also implement a calibration dataloader and have evaluation field for creation of evaluation function at internal neural_compressor. - -```yaml -model: - name: bert - framework: tensorflow - inputs: input_file, batch_size - outputs: IteratorGetNext:3, unstack:0, unstack:1 - -device: cpu # optional. default value is cpu, other value is gpu. - -evaluation: - accuracy: - metric: - SquadF1: - dataloader: - dataset: - bert: - root: eval.tf_record - label_file: dev-v1.1.json - batch_size: 64 - postprocess: - transform: - SquadV1PostTransform: - label_file: dev-v1.1.json - vocab_file: vocab.txt - performance: - iteration: 50 - configs: - num_of_instance: 7 - cores_per_instance: 4 - dataloader: - dataset: - bert: - root: /path/to/eval.tf_record - label_file: /path/to/dev-v1.1.json - batch_size: 64 - -quantization: - calibration: - sampling_size: 500 - model_wise: - weight: - granularity: per_channel - op_wise: { - 'MatMul': { - 'activation': {'dtype': ['fp32']}, - 'weight': {'dtype': ['fp32']}, - } - } -tuning: - accuracy_criterion: - relative: 0.01 - exit_policy: - timeout: 0 - max_trials: 100 - random_seed: 9527 - -``` -Here we set the input tensor and output tensors name into *inputs* and *outputs* field. In this case we calibrate and quantize the model, and use our calibration dataloader initialized from a 'Dataset' object. - -### Code update - -After prepare step is done, we add tune and benchmark code to generate quantized model and benchmark. - -#### Tune -```python - from neural_compressor.quantization import Quantization - quantizer = Quantization('./bert.yaml') - quantizer.model = FLAGS.input_model - q_model = quantizer.fit() - q_model.save(FLAGS.output_model) - -``` -#### Benchmark -```python - from neural_compressor.experimental import Benchmark - evaluator = Benchmark('./bert.yaml') - evaluator.model = FLAGS.input_model - results = evaluator() - for mode, result in results.items(): - acc, batch_size, result_list = result - latency = np.array(result_list).mean() / batch_size - print('\n{} mode benchmark result:'.format(mode)) - print('Accuracy is {:.3f}'.format(acc)) - print('Batch size = {}'.format(batch_size)) - print('Latency: {:.3f} ms'.format(latency * 1000)) - print('Throughput: {:.3f} images/sec'.format(1./ latency)) -``` -The Intel® Neural Compressor quantizer.fit() function will return a best quantized model under time constraint. diff --git a/examples/tensorflow/nlp/bert_large_squad/quantization/ptq/bert.yaml b/examples/tensorflow/nlp/bert_large_squad/quantization/ptq/bert.yaml deleted file mode 100644 index 8db7ebc194f..00000000000 --- a/examples/tensorflow/nlp/bert_large_squad/quantization/ptq/bert.yaml +++ /dev/null @@ -1,69 +0,0 @@ -# -# Copyright (c) 2021 Intel Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -model: - name: bert - framework: tensorflow - inputs: input_file, batch_size - outputs: IteratorGetNext:3, unstack:0, unstack:1 - -device: cpu # optional. default value is cpu, other value is gpu. - -evaluation: - accuracy: - metric: - SquadF1: - dataloader: - dataset: - bert: - root: /path/to/eval.tf_record - label_file: /path/to/dev-v1.1.json - batch_size: 64 - postprocess: - transform: - SquadV1: - label_file: /path/to/dev-v1.1.json - vocab_file: /path/to/vocab.txt - performance: - iteration: 10 - configs: - num_of_instance: 4 - cores_per_instance: 7 - dataloader: - dataset: - bert: - root: /path/to/eval.tf_record - label_file: /path/to/dev-v1.1.json - batch_size: 64 - -quantization: - calibration: - sampling_size: 500 - dataloader: - dataset: - bert: - root: /path/to/eval.tf_record - label_file: /path/to/dev-v1.1.json - batch_size: 64 - model_wise: - weight: - granularity: per_channel -tuning: - accuracy_criterion: - relative: 0.01 - exit_policy: - timeout: 0 - max_trials: 100 - random_seed: 9527 diff --git a/examples/tensorflow/nlp/bert_large_squad/quantization/ptq/bert_itex.yaml b/examples/tensorflow/nlp/bert_large_squad/quantization/ptq/bert_itex.yaml deleted file mode 100644 index e4198c3a3d4..00000000000 --- a/examples/tensorflow/nlp/bert_large_squad/quantization/ptq/bert_itex.yaml +++ /dev/null @@ -1,69 +0,0 @@ -# -# Copyright (c) 2021 Intel Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -model: - name: bert - framework: tensorflow_itex - inputs: input_file, batch_size - outputs: IteratorGetNext:3, unstack:0, unstack:1 - -device: gpu # optional. set cpu if installed intel-extension-for-tensorflow[cpu], set gpu if installed intel-extension-for-tensorflow[gpu]. - -evaluation: - accuracy: - metric: - SquadF1: - dataloader: - dataset: - bert: - root: /path/to/eval.tf_record - label_file: /path/to/dev-v1.1.json - batch_size: 64 - postprocess: - transform: - SquadV1: - label_file: /path/to/dev-v1.1.json - vocab_file: /path/to/vocab.txt - performance: - iteration: 10 - configs: - num_of_instance: 4 - cores_per_instance: 7 - dataloader: - dataset: - bert: - root: /path/to/eval.tf_record - label_file: /path/to/dev-v1.1.json - batch_size: 64 - -quantization: - calibration: - sampling_size: 500 - dataloader: - dataset: - bert: - root: /path/to/eval.tf_record - label_file: /path/to/dev-v1.1.json - batch_size: 64 - model_wise: - weight: - granularity: per_channel -tuning: - accuracy_criterion: - relative: 0.01 - exit_policy: - timeout: 0 - max_trials: 100 - random_seed: 9527 diff --git a/examples/tensorflow/nlp/bert_large_squad/quantization/ptq/create_pretraining_data.py b/examples/tensorflow/nlp/bert_large_squad/quantization/ptq/create_pretraining_data.py deleted file mode 100644 index 3b3dbd4bc59..00000000000 --- a/examples/tensorflow/nlp/bert_large_squad/quantization/ptq/create_pretraining_data.py +++ /dev/null @@ -1,472 +0,0 @@ -# coding=utf-8 -# Copyright 2018 The Google AI Language Team Authors. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -"""Create masked LM/next sentence masked_lm TF examples for BERT.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import collections -import random -import tokenization -import tensorflow as tf - -from absl import app -#from absl import flags -from absl import logging -flags = tf.compat.v1.flags - -FLAGS = flags.FLAGS - -flags.DEFINE_string("input_file", None, - "Input raw text file (or comma-separated list of files).") - -flags.DEFINE_string( - "output_file", None, - "Output TF example file (or comma-separated list of files).") - -flags.DEFINE_string("vocab_file", None, - "The vocabulary file that the BERT model was trained on.") - -flags.DEFINE_bool( - "do_lower_case", True, - "Whether to lower case the input text. Should be True for uncased " - "models and False for cased models.") - -flags.DEFINE_bool( - "do_whole_word_mask", False, - "Whether to use whole word masking rather than per-WordPiece masking.") - -flags.DEFINE_integer("max_seq_length", 128, "Maximum sequence length.") - -flags.DEFINE_integer("max_predictions_per_seq", 20, - "Maximum number of masked LM predictions per sequence.") - -flags.DEFINE_integer("random_seed", 12345, "Random seed for data generation.") - -flags.DEFINE_integer( - "dupe_factor", 10, - "Number of times to duplicate the input data (with different masks).") - -flags.DEFINE_float("masked_lm_prob", 0.15, "Masked LM probability.") - -flags.DEFINE_float( - "short_seq_prob", 0.1, - "Probability of creating sequences which are shorter than the " - "maximum length.") - - -class TrainingInstance(object): - """A single training instance (sentence pair).""" - - def __init__(self, tokens, segment_ids, masked_lm_positions, masked_lm_labels, - is_random_next): - self.tokens = tokens - self.segment_ids = segment_ids - self.is_random_next = is_random_next - self.masked_lm_positions = masked_lm_positions - self.masked_lm_labels = masked_lm_labels - - def __str__(self): - s = "" - s += "tokens: %s\n" % (" ".join( - [tokenization.printable_text(x) for x in self.tokens])) - s += "segment_ids: %s\n" % (" ".join([str(x) for x in self.segment_ids])) - s += "is_random_next: %s\n" % self.is_random_next - s += "masked_lm_positions: %s\n" % (" ".join( - [str(x) for x in self.masked_lm_positions])) - s += "masked_lm_labels: %s\n" % (" ".join( - [tokenization.printable_text(x) for x in self.masked_lm_labels])) - s += "\n" - return s - - def __repr__(self): - return self.__str__() - - -def write_instance_to_example_files(instances, tokenizer, max_seq_length, - max_predictions_per_seq, output_files): - """Create TF example files from `TrainingInstance`s.""" - writers = [] - for output_file in output_files: - writers.append(tf.io.TFRecordWriter(output_file)) - - writer_index = 0 - - total_written = 0 - for (inst_index, instance) in enumerate(instances): - input_ids = tokenizer.convert_tokens_to_ids(instance.tokens) - input_mask = [1] * len(input_ids) - segment_ids = list(instance.segment_ids) - assert len(input_ids) <= max_seq_length - - while len(input_ids) < max_seq_length: - input_ids.append(0) - input_mask.append(0) - segment_ids.append(0) - - assert len(input_ids) == max_seq_length - assert len(input_mask) == max_seq_length - assert len(segment_ids) == max_seq_length - - masked_lm_positions = list(instance.masked_lm_positions) - masked_lm_ids = tokenizer.convert_tokens_to_ids(instance.masked_lm_labels) - masked_lm_weights = [1.0] * len(masked_lm_ids) - - while len(masked_lm_positions) < max_predictions_per_seq: - masked_lm_positions.append(0) - masked_lm_ids.append(0) - masked_lm_weights.append(0.0) - - next_sentence_label = 1 if instance.is_random_next else 0 - - features = collections.OrderedDict() - features["input_ids"] = create_int_feature(input_ids) - features["input_mask"] = create_int_feature(input_mask) - features["segment_ids"] = create_int_feature(segment_ids) - features["masked_lm_positions"] = create_int_feature(masked_lm_positions) - features["masked_lm_ids"] = create_int_feature(masked_lm_ids) - features["masked_lm_weights"] = create_float_feature(masked_lm_weights) - features["next_sentence_labels"] = create_int_feature([next_sentence_label]) - - tf_example = tf.train.Example(features=tf.train.Features(feature=features)) - - writers[writer_index].write(tf_example.SerializeToString()) - writer_index = (writer_index + 1) % len(writers) - - total_written += 1 - - if inst_index < 20: - tf.compat.v1.logging.info("*** Example ***") - tf.compat.v1.logging.info("tokens: %s" % " ".join( - [tokenization.printable_text(x) for x in instance.tokens])) - - for feature_name in features.keys(): - feature = features[feature_name] - values = [] - if feature.int64_list.value: - values = feature.int64_list.value - elif feature.float_list.value: - values = feature.float_list.value - tf.compat.v1.logging.info( - "%s: %s" % (feature_name, " ".join([str(x) for x in values]))) - - for writer in writers: - writer.close() - - tf.compat.v1.logging.info("Wrote %d total instances", total_written) - - -def create_int_feature(values): - feature = tf.train.Feature(int64_list=tf.train.Int64List(value=list(values))) - return feature - - -def create_float_feature(values): - feature = tf.train.Feature(float_list=tf.train.FloatList(value=list(values))) - return feature - - -def create_training_instances(input_files, tokenizer, max_seq_length, - dupe_factor, short_seq_prob, masked_lm_prob, - max_predictions_per_seq, rng): - """Create `TrainingInstance`s from raw text.""" - all_documents = [[]] - - # Input file format: - # (1) One sentence per line. These should ideally be actual sentences, not - # entire paragraphs or arbitrary spans of text. (Because we use the - # sentence boundaries for the "next sentence prediction" task). - # (2) Blank lines between documents. Document boundaries are needed so - # that the "next sentence prediction" task doesn't span between documents. - for input_file in input_files: - with tf.io.gfile.GFile(input_file, "r") as reader: - while True: - line = tokenization.convert_to_unicode(reader.readline()) - if not line: - break - line = line.strip() - - # Empty lines are used as document delimiters - if not line: - all_documents.append([]) - tokens = tokenizer.tokenize(line) - if tokens: - all_documents[-1].append(tokens) - - # Remove empty documents - all_documents = [x for x in all_documents if x] - rng.shuffle(all_documents) - - vocab_words = list(tokenizer.vocab.keys()) - instances = [] - for _ in range(dupe_factor): - for document_index in range(len(all_documents)): - instances.extend( - create_instances_from_document( - all_documents, document_index, max_seq_length, short_seq_prob, - masked_lm_prob, max_predictions_per_seq, vocab_words, rng)) - - rng.shuffle(instances) - return instances - - -def create_instances_from_document( - all_documents, document_index, max_seq_length, short_seq_prob, - masked_lm_prob, max_predictions_per_seq, vocab_words, rng): - """Creates `TrainingInstance`s for a single document.""" - document = all_documents[document_index] - - # Account for [CLS], [SEP], [SEP] - max_num_tokens = max_seq_length - 3 - - # We *usually* want to fill up the entire sequence since we are padding - # to `max_seq_length` anyways, so short sequences are generally wasted - # computation. However, we *sometimes* - # (i.e., short_seq_prob == 0.1 == 10% of the time) want to use shorter - # sequences to minimize the mismatch between pre-training and fine-tuning. - # The `target_seq_length` is just a rough target however, whereas - # `max_seq_length` is a hard limit. - target_seq_length = max_num_tokens - if rng.random() < short_seq_prob: - target_seq_length = rng.randint(2, max_num_tokens) - - # We DON'T just concatenate all of the tokens from a document into a long - # sequence and choose an arbitrary split point because this would make the - # next sentence prediction task too easy. Instead, we split the input into - # segments "A" and "B" based on the actual "sentences" provided by the user - # input. - instances = [] - current_chunk = [] - current_length = 0 - i = 0 - while i < len(document): - segment = document[i] - current_chunk.append(segment) - current_length += len(segment) - if i == len(document) - 1 or current_length >= target_seq_length: - if current_chunk: - # `a_end` is how many segments from `current_chunk` go into the `A` - # (first) sentence. - a_end = 1 - if len(current_chunk) >= 2: - a_end = rng.randint(1, len(current_chunk) - 1) - - tokens_a = [] - for j in range(a_end): - tokens_a.extend(current_chunk[j]) - - tokens_b = [] - # Random next - is_random_next = False - if len(current_chunk) == 1 or rng.random() < 0.5: - is_random_next = True - target_b_length = target_seq_length - len(tokens_a) - - # This should rarely go for more than one iteration for large - # corpora. However, just to be careful, we try to make sure that - # the random document is not the same as the document - # we're processing. - for _ in range(10): - random_document_index = rng.randint(0, len(all_documents) - 1) - if random_document_index != document_index: - break - - random_document = all_documents[random_document_index] - random_start = rng.randint(0, len(random_document) - 1) - for j in range(random_start, len(random_document)): - tokens_b.extend(random_document[j]) - if len(tokens_b) >= target_b_length: - break - # We didn't actually use these segments so we "put them back" so - # they don't go to waste. - num_unused_segments = len(current_chunk) - a_end - i -= num_unused_segments - # Actual next - else: - is_random_next = False - for j in range(a_end, len(current_chunk)): - tokens_b.extend(current_chunk[j]) - truncate_seq_pair(tokens_a, tokens_b, max_num_tokens, rng) - - assert len(tokens_a) >= 1 - assert len(tokens_b) >= 1 - - tokens = [] - segment_ids = [] - tokens.append("[CLS]") - segment_ids.append(0) - for token in tokens_a: - tokens.append(token) - segment_ids.append(0) - - tokens.append("[SEP]") - segment_ids.append(0) - - for token in tokens_b: - tokens.append(token) - segment_ids.append(1) - tokens.append("[SEP]") - segment_ids.append(1) - - (tokens, masked_lm_positions, - masked_lm_labels) = create_masked_lm_predictions( - tokens, masked_lm_prob, max_predictions_per_seq, vocab_words, rng) - instance = TrainingInstance( - tokens=tokens, - segment_ids=segment_ids, - is_random_next=is_random_next, - masked_lm_positions=masked_lm_positions, - masked_lm_labels=masked_lm_labels) - instances.append(instance) - current_chunk = [] - current_length = 0 - i += 1 - - return instances - - -MaskedLmInstance = collections.namedtuple("MaskedLmInstance", - ["index", "label"]) - - -def create_masked_lm_predictions(tokens, masked_lm_prob, - max_predictions_per_seq, vocab_words, rng): - """Creates the predictions for the masked LM objective.""" - - cand_indexes = [] - for (i, token) in enumerate(tokens): - if token == "[CLS]" or token == "[SEP]": - continue - # Whole Word Masking means that if we mask all of the wordpieces - # corresponding to an original word. When a word has been split into - # WordPieces, the first token does not have any marker and any subsequence - # tokens are prefixed with ##. So whenever we see the ## token, we - # append it to the previous set of word indexes. - # - # Note that Whole Word Masking does *not* change the training code - # at all -- we still predict each WordPiece independently, softmaxed - # over the entire vocabulary. - if (FLAGS.do_whole_word_mask and len(cand_indexes) >= 1 and - token.startswith("##")): - cand_indexes[-1].append(i) - else: - cand_indexes.append([i]) - - rng.shuffle(cand_indexes) - - output_tokens = list(tokens) - - num_to_predict = min(max_predictions_per_seq, - max(1, int(round(len(tokens) * masked_lm_prob)))) - - masked_lms = [] - covered_indexes = set() - for index_set in cand_indexes: - if len(masked_lms) >= num_to_predict: - break - # If adding a whole-word mask would exceed the maximum number of - # predictions, then just skip this candidate. - if len(masked_lms) + len(index_set) > num_to_predict: - continue - is_any_index_covered = False - for index in index_set: - if index in covered_indexes: - is_any_index_covered = True - break - if is_any_index_covered: - continue - for index in index_set: - covered_indexes.add(index) - - masked_token = None - # 80% of the time, replace with [MASK] - if rng.random() < 0.8: - masked_token = "[MASK]" - else: - # 10% of the time, keep original - if rng.random() < 0.5: - masked_token = tokens[index] - # 10% of the time, replace with random word - else: - masked_token = vocab_words[rng.randint(0, len(vocab_words) - 1)] - - output_tokens[index] = masked_token - - masked_lms.append(MaskedLmInstance(index=index, label=tokens[index])) - assert len(masked_lms) <= num_to_predict - masked_lms = sorted(masked_lms, key=lambda x: x.index) - - masked_lm_positions = [] - masked_lm_labels = [] - for p in masked_lms: - masked_lm_positions.append(p.index) - masked_lm_labels.append(p.label) - - return (output_tokens, masked_lm_positions, masked_lm_labels) - - -def truncate_seq_pair(tokens_a, tokens_b, max_num_tokens, rng): - """Truncates a pair of sequences to a maximum sequence length.""" - while True: - total_length = len(tokens_a) + len(tokens_b) - if total_length <= max_num_tokens: - break - - trunc_tokens = tokens_a if len(tokens_a) > len(tokens_b) else tokens_b - assert len(trunc_tokens) >= 1 - - # We want to sometimes truncate from the front and sometimes from the - # back to add more randomness and avoid biases. - if rng.random() < 0.5: - del trunc_tokens[0] - else: - trunc_tokens.pop() - - -def main(_): - tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.INFO) - - tokenizer = tokenization.FullTokenizer( - vocab_file=FLAGS.vocab_file, do_lower_case=FLAGS.do_lower_case) - - input_files = [] - for input_pattern in FLAGS.input_file.split(","): - input_files.extend(tf.io.gfile.glob(input_pattern)) - - tf.compat.v1.logging.info("*** Reading from input files ***") - for input_file in input_files: - tf.compat.v1.logging.info(" %s", input_file) - - rng = random.Random(FLAGS.random_seed) - instances = create_training_instances( - input_files, tokenizer, FLAGS.max_seq_length, FLAGS.dupe_factor, - FLAGS.short_seq_prob, FLAGS.masked_lm_prob, FLAGS.max_predictions_per_seq, - rng) - - output_files = FLAGS.output_file.split(",") - tf.compat.v1.logging.info("*** Writing to output files ***") - for output_file in output_files: - tf.compat.v1.logging.info(" %s", output_file) - - write_instance_to_example_files(instances, tokenizer, FLAGS.max_seq_length, - FLAGS.max_predictions_per_seq, output_files) - - -if __name__ == "__main__": - flags.mark_flag_as_required("input_file") - flags.mark_flag_as_required("output_file") - flags.mark_flag_as_required("vocab_file") - tf.compat.v1.app.run() diff --git a/examples/tensorflow/nlp/bert_large_squad/quantization/ptq/create_tf_record.py b/examples/tensorflow/nlp/bert_large_squad/quantization/ptq/create_tf_record.py deleted file mode 100644 index 9dcc7a48994..00000000000 --- a/examples/tensorflow/nlp/bert_large_squad/quantization/ptq/create_tf_record.py +++ /dev/null @@ -1,508 +0,0 @@ -#!/usr/bin/env python - -# coding=utf-8 -# Copyright 2018 The Google AI Language Team Authors. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -"""generate bert dataset""" - -import collections -import json -import os -import tokenization -import six -import tensorflow as tf - -from absl import app -#from absl import flags -from absl import logging - -flags = tf.compat.v1.flags -FLAGS = flags.FLAGS - -## Required parameters -flags.DEFINE_string("vocab_file", None, - "The vocabulary file that the BERT model was trained on.") - -flags.DEFINE_string( - "predict_file", None, - "SQuAD json for predictions. E.g., dev-v1.1.json or test-v1.1.json") - -flags.DEFINE_string( - "output_file", None, "The output tf_record for usage.") - -class SquadExample(object): - """A single training/test example for simple sequence classification. - - For examples without an answer, the start and end position are -1. - """ - - def __init__(self, - qas_id, - question_text, - doc_tokens, - orig_answer_text=None, - start_position=None, - end_position=None, - is_impossible=False): - self.qas_id = qas_id - self.question_text = question_text - self.doc_tokens = doc_tokens - self.orig_answer_text = orig_answer_text - self.start_position = start_position - self.end_position = end_position - self.is_impossible = is_impossible - #self.startpb = 0 - - def __str__(self): - return self.__repr__() - - def __repr__(self): - s = "" - s += "qas_id: %s" % (tokenization.printable_text(self.qas_id)) - s += ", question_text: %s" % ( - tokenization.printable_text(self.question_text)) - s += ", doc_tokens: [%s]" % (" ".join(self.doc_tokens)) - if self.start_position: - s += ", start_position: %d" % (self.start_position) - if self.start_position: - s += ", end_position: %d" % (self.end_position) - if self.start_position: - s += ", is_impossible: %r" % (self.is_impossible) - return s - - -class InputFeatures(object): - """A single set of features of data.""" - - def __init__(self, - unique_id, - example_index, - doc_span_index, - tokens, - token_to_orig_map, - token_is_max_context, - input_ids, - input_mask, - segment_ids, - start_position=None, - end_position=None, - is_impossible=None): - self.unique_id = unique_id - self.example_index = example_index - self.doc_span_index = doc_span_index - self.tokens = tokens - self.token_to_orig_map = token_to_orig_map - self.token_is_max_context = token_is_max_context - self.input_ids = input_ids - self.input_mask = input_mask - self.segment_ids = segment_ids - self.start_position = start_position - self.end_position = end_position - self.is_impossible = is_impossible - - -def read_squad_examples(input_file, is_training): - """Read a SQuAD json file into a list of SquadExample.""" - with tf.io.gfile.GFile(input_file, "r") as reader: - input_data = json.load(reader)["data"] - - def is_whitespace(c): - if c == " " or c == "\t" or c == "\r" or c == "\n" or ord(c) == 0x202F: - return True - return False - - examples = [] - for entry in input_data: - for paragraph in entry["paragraphs"]: - paragraph_text = paragraph["context"] - doc_tokens = [] - char_to_word_offset = [] - prev_is_whitespace = True - for c in paragraph_text: - if is_whitespace(c): - prev_is_whitespace = True - else: - if prev_is_whitespace: - doc_tokens.append(c) - else: - doc_tokens[-1] += c - prev_is_whitespace = False - char_to_word_offset.append(len(doc_tokens) - 1) - - for qa in paragraph["qas"]: - qas_id = qa["id"] - question_text = qa["question"] - start_position = None - end_position = None - orig_answer_text = None - is_impossible = False - if is_training: - - if FLAGS.version_2_with_negative: - is_impossible = qa["is_impossible"] - if (len(qa["answers"]) != 1) and (not is_impossible): - raise ValueError( - "For training, each question should have exactly 1 answer.") - if not is_impossible: - answer = qa["answers"][0] - orig_answer_text = answer["text"] - answer_offset = answer["answer_start"] - answer_length = len(orig_answer_text) - start_position = char_to_word_offset[answer_offset] - end_position = char_to_word_offset[answer_offset + answer_length - - 1] - # Only add answers where the text can be exactly recovered from the - # document. If this CAN'T happen it's likely due to weird Unicode - # stuff so we will just skip the example. - # - # Note that this means for training mode, every example is NOT - # guaranteed to be preserved. - actual_text = " ".join( - doc_tokens[start_position:(end_position + 1)]) - cleaned_answer_text = " ".join( - tokenization.whitespace_tokenize(orig_answer_text)) - if actual_text.find(cleaned_answer_text) == -1: - tf.compat.v1.logging.warning("Could not find answer: '%s' vs. '%s'", - actual_text, cleaned_answer_text) - continue - else: - start_position = -1 - end_position = -1 - orig_answer_text = "" - - example = SquadExample( - qas_id=qas_id, - question_text=question_text, - doc_tokens=doc_tokens, - orig_answer_text=orig_answer_text, - start_position=start_position, - end_position=end_position, - is_impossible=is_impossible) - examples.append(example) - - return examples - - -def convert_examples_to_features(examples, tokenizer, max_seq_length, - doc_stride, max_query_length, is_training, - output_fn): - """Loads a data file into a list of `InputBatch`s.""" - - unique_id = 1000000000 - - for (example_index, example) in enumerate(examples): - query_tokens = tokenizer.tokenize(example.question_text) - - if len(query_tokens) > max_query_length: - query_tokens = query_tokens[0:max_query_length] - - tok_to_orig_index = [] - orig_to_tok_index = [] - all_doc_tokens = [] - for (i, token) in enumerate(example.doc_tokens): - orig_to_tok_index.append(len(all_doc_tokens)) - sub_tokens = tokenizer.tokenize(token) - for sub_token in sub_tokens: - tok_to_orig_index.append(i) - all_doc_tokens.append(sub_token) - - tok_start_position = None - tok_end_position = None - if is_training and example.is_impossible: - tok_start_position = -1 - tok_end_position = -1 - if is_training and not example.is_impossible: - tok_start_position = orig_to_tok_index[example.start_position] - if example.end_position < len(example.doc_tokens) - 1: - tok_end_position = orig_to_tok_index[example.end_position + 1] - 1 - else: - tok_end_position = len(all_doc_tokens) - 1 - (tok_start_position, tok_end_position) = _improve_answer_span( - all_doc_tokens, tok_start_position, tok_end_position, tokenizer, - example.orig_answer_text) - - # The -3 accounts for [CLS], [SEP] and [SEP] - max_tokens_for_doc = max_seq_length - len(query_tokens) - 3 - - # We can have documents that are longer than the maximum sequence length. - # To deal with this we do a sliding window approach, where we take chunks - # of the up to our max length with a stride of `doc_stride`. - _DocSpan = collections.namedtuple( # pylint: disable=invalid-name - "DocSpan", ["start", "length"]) - doc_spans = [] - start_offset = 0 - while start_offset < len(all_doc_tokens): - length = len(all_doc_tokens) - start_offset - if length > max_tokens_for_doc: - length = max_tokens_for_doc - doc_spans.append(_DocSpan(start=start_offset, length=length)) - if start_offset + length == len(all_doc_tokens): - break - start_offset += min(length, doc_stride) - - for (doc_span_index, doc_span) in enumerate(doc_spans): - tokens = [] - token_to_orig_map = {} - token_is_max_context = {} - segment_ids = [] - tokens.append("[CLS]") - segment_ids.append(0) - for token in query_tokens: - tokens.append(token) - segment_ids.append(0) - tokens.append("[SEP]") - segment_ids.append(0) - - for i in range(doc_span.length): - split_token_index = doc_span.start + i - token_to_orig_map[len(tokens)] = tok_to_orig_index[split_token_index] - - is_max_context = _check_is_max_context(doc_spans, doc_span_index, - split_token_index) - token_is_max_context[len(tokens)] = is_max_context - tokens.append(all_doc_tokens[split_token_index]) - segment_ids.append(1) - tokens.append("[SEP]") - segment_ids.append(1) - - input_ids = tokenizer.convert_tokens_to_ids(tokens) - - # The mask has 1 for real tokens and 0 for padding tokens. Only real - # tokens are attended to. - input_mask = [1] * len(input_ids) - - # Zero-pad up to the sequence length. - while len(input_ids) < max_seq_length: - input_ids.append(0) - input_mask.append(0) - segment_ids.append(0) - - assert len(input_ids) == max_seq_length - assert len(input_mask) == max_seq_length - assert len(segment_ids) == max_seq_length - - start_position = None - end_position = None - if is_training and not example.is_impossible: - # For training, if our document chunk does not contain an annotation - # we throw it out, since there is nothing to predict. - doc_start = doc_span.start - doc_end = doc_span.start + doc_span.length - 1 - out_of_span = False - if not (tok_start_position >= doc_start and - tok_end_position <= doc_end): - out_of_span = True - if out_of_span: - start_position = 0 - end_position = 0 - else: - doc_offset = len(query_tokens) + 2 - start_position = tok_start_position - doc_start + doc_offset - end_position = tok_end_position - doc_start + doc_offset - - if is_training and example.is_impossible: - start_position = 0 - end_position = 0 - - if example_index < 1: - tf.compat.v1.logging.info("*** Example ***") - tf.compat.v1.logging.info("unique_id: %s" % (unique_id)) - tf.compat.v1.logging.info("example_index: %s" % (example_index)) - tf.compat.v1.logging.info("doc_span_index: %s" % (doc_span_index)) - tf.compat.v1.logging.info("tokens: %s" % " ".join( - [tokenization.printable_text(x) for x in tokens])) - tf.compat.v1.logging.info("token_to_orig_map: %s" % " ".join( - ["%d:%d" % (x, y) for (x, y) in six.iteritems(token_to_orig_map)])) - tf.compat.v1.logging.info("token_is_max_context: %s" % " ".join([ - "%d:%s" % (x, y) for (x, y) in six.iteritems(token_is_max_context) - ])) - tf.compat.v1.logging.info("input_ids: %s" % " ".join([str(x) for x in input_ids])) - tf.compat.v1.logging.info( - "input_mask: %s" % " ".join([str(x) for x in input_mask])) - tf.compat.v1.logging.info( - "segment_ids: %s" % " ".join([str(x) for x in segment_ids])) - if is_training and example.is_impossible: - tf.compat.v1.logging.info("impossible example") - if is_training and not example.is_impossible: - answer_text = " ".join(tokens[start_position:(end_position + 1)]) - tf.compat.v1.logging.info("start_position: %d" % (start_position)) - tf.compat.v1.logging.info("end_position: %d" % (end_position)) - tf.compat.v1.logging.info( - "answer: %s" % (tokenization.printable_text(answer_text))) - - feature = InputFeatures( - unique_id=unique_id, - example_index=example_index, - doc_span_index=doc_span_index, - tokens=tokens, - token_to_orig_map=token_to_orig_map, - token_is_max_context=token_is_max_context, - input_ids=input_ids, - input_mask=input_mask, - segment_ids=segment_ids, - start_position=start_position, - end_position=end_position, - is_impossible=example.is_impossible) - - # Run callback - output_fn(feature) - - unique_id += 1 - - -def _improve_answer_span(doc_tokens, input_start, input_end, tokenizer, - orig_answer_text): - """Returns tokenized answer spans that better match the annotated answer.""" - - # The SQuAD annotations are character based. We first project them to - # whitespace-tokenized words. But then after WordPiece tokenization, we can - # often find a "better match". For example: - # - # Question: What year was John Smith born? - # Context: The leader was John Smith (1895-1943). - # Answer: 1895 - # - # The original whitespace-tokenized answer will be "(1895-1943).". However - # after tokenization, our tokens will be "( 1895 - 1943 ) .". So we can match - # the exact answer, 1895. - # - # However, this is not always possible. Consider the following: - # - # Question: What country is the top exporter of electornics? - # Context: The Japanese electronics industry is the lagest in the world. - # Answer: Japan - # - # In this case, the annotator chose "Japan" as a character sub-span of - # the word "Japanese". Since our WordPiece tokenizer does not split - # "Japanese", we just use "Japanese" as the annotation. This is fairly rare - # in SQuAD, but does happen. - tok_answer_text = " ".join(tokenizer.tokenize(orig_answer_text)) - - for new_start in range(input_start, input_end + 1): - for new_end in range(input_end, new_start - 1, -1): - text_span = " ".join(doc_tokens[new_start:(new_end + 1)]) - if text_span == tok_answer_text: - return (new_start, new_end) - - return (input_start, input_end) - - -def _check_is_max_context(doc_spans, cur_span_index, position): - """Check if this is the 'max context' doc span for the token.""" - - # Because of the sliding window approach taken to scoring documents, a single - # token can appear in multiple documents. E.g. - # Doc: the man went to the store and bought a gallon of milk - # Span A: the man went to the - # Span B: to the store and bought - # Span C: and bought a gallon of - # ... - # - # Now the word 'bought' will have two scores from spans B and C. We only - # want to consider the score with "maximum context", which we define as - # the *minimum* of its left and right context (the *sum* of left and - # right context will always be the same, of course). - # - # In the example the maximum context for 'bought' would be span C since - # it has 1 left context and 3 right context, while span B has 4 left context - # and 0 right context. - best_score = None - best_span_index = None - for (span_index, doc_span) in enumerate(doc_spans): - end = doc_span.start + doc_span.length - 1 - if position < doc_span.start: - continue - if position > end: - continue - num_left_context = position - doc_span.start - num_right_context = end - position - score = min(num_left_context, num_right_context) + 0.01 * doc_span.length - if best_score is None or score > best_score: - best_score = score - best_span_index = span_index - - return cur_span_index == best_span_index - -class FeatureWriter(object): - """Writes InputFeature to TF example file.""" - - def __init__(self, filename, is_training): - self.is_training = is_training - self.num_features = 0 - self.filename = filename - self._writer = tf.io.TFRecordWriter(self.filename) - - def process_feature(self, feature): - """Write a InputFeature to the TFRecordWriter as a tf.train.Example.""" - self.num_features += 1 - - def create_int_feature(values): - feature = tf.train.Feature( - int64_list=tf.train.Int64List(value=list(values))) - return feature - - features = collections.OrderedDict() - features["unique_ids"] = create_int_feature([feature.unique_id]) - features["input_ids"] = create_int_feature(feature.input_ids) - features["input_mask"] = create_int_feature(feature.input_mask) - features["segment_ids"] = create_int_feature(feature.segment_ids) - - if self.is_training: - features["start_positions"] = create_int_feature([feature.start_position]) - features["end_positions"] = create_int_feature([feature.end_position]) - impossible = 0 - if feature.is_impossible: - impossible = 1 - features["is_impossible"] = create_int_feature([impossible]) - - tf_example = tf.train.Example(features=tf.train.Features(feature=features)) - self._writer.write(tf_example.SerializeToString()) - - def close(self): - self._writer.close() - - def rm_tmp_file(self): - os.remove(self.filename) - -def main(_): - tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.INFO) - - tokenizer = tokenization.FullTokenizer( - vocab_file=FLAGS.vocab_file, do_lower_case=True) - - eval_examples = read_squad_examples( - input_file=FLAGS.predict_file, is_training=False) - - eval_writer = FeatureWriter( - filename=FLAGS.output_file, is_training=False) - - eval_features = [] - def append_feature(feature): - eval_features.append(feature) - eval_writer.process_feature(feature) - convert_examples_to_features( - examples=eval_examples, - tokenizer=tokenizer, - max_seq_length=384, - doc_stride=128, - max_query_length=64, - is_training=False, - output_fn=append_feature) - - -if __name__ == "__main__": - flags.mark_flag_as_required("vocab_file") - flags.mark_flag_as_required("predict_file") - flags.mark_flag_as_required("output_file") - tf.compat.v1.app.run() diff --git a/examples/tensorflow/nlp/bert_large_squad/quantization/ptq/evaluate_squad.py b/examples/tensorflow/nlp/bert_large_squad/quantization/ptq/evaluate_squad.py deleted file mode 100644 index cdb87d2b12d..00000000000 --- a/examples/tensorflow/nlp/bert_large_squad/quantization/ptq/evaluate_squad.py +++ /dev/null @@ -1,98 +0,0 @@ -""" Official evaluation script for v1.1 of the SQuAD dataset. -https://github.com/allenai/bi-att-flow/blob/master/squad/evaluate-v1.1.py """ -from __future__ import print_function -from collections import Counter -import string -import re -import argparse -import json -import sys - - -def normalize_answer(s): - """Lower text and remove punctuation, articles and extra whitespace.""" - def remove_articles(text): - return re.sub(r'\b(a|an|the)\b', ' ', text) - - def white_space_fix(text): - return ' '.join(text.split()) - - def remove_punc(text): - exclude = set(string.punctuation) - return ''.join(ch for ch in text if ch not in exclude) - - def lower(text): - return text.lower() - - return white_space_fix(remove_articles(remove_punc(lower(s)))) - - -def f1_score(prediction, ground_truth): - prediction_tokens = normalize_answer(prediction).split() - ground_truth_tokens = normalize_answer(ground_truth).split() - common = Counter(prediction_tokens) & Counter(ground_truth_tokens) - num_same = sum(common.values()) - if num_same == 0: - return 0 - precision = 1.0 * num_same / len(prediction_tokens) - recall = 1.0 * num_same / len(ground_truth_tokens) - f1 = (2 * precision * recall) / (precision + recall) - return f1 - - -def exact_match_score(prediction, ground_truth): - return (normalize_answer(prediction) == normalize_answer(ground_truth)) - - -def metric_max_over_ground_truths(metric_fn, prediction, ground_truths): - scores_for_ground_truths = [] - for ground_truth in ground_truths: - score = metric_fn(prediction, ground_truth) - scores_for_ground_truths.append(score) - return max(scores_for_ground_truths) - - -def evaluate(dataset, predictions): - f1 = exact_match = total = 0 - for article in dataset: - for paragraph in article['paragraphs']: - for qa in paragraph['qas']: - total += 1 - if qa['id'] not in predictions: - message = 'Unanswered question ' + qa['id'] + \ - ' will receive score 0.' - print(message, file=sys.stderr) - continue - ground_truths = list(map(lambda x: x['text'], qa['answers'])) - prediction = predictions[qa['id']] - exact_match += metric_max_over_ground_truths( - exact_match_score, prediction, ground_truths) - f1 += metric_max_over_ground_truths( - f1_score, prediction, ground_truths) - - exact_match = 100.0 * exact_match / total - f1 = 100.0 * f1 / total - - return {'exact_match': exact_match, 'f1': f1} - - -if __name__ == '__main__': - expected_version = '1.1' - parser = argparse.ArgumentParser( - description='Evaluation for SQuAD ' + expected_version) - parser.add_argument('dataset_file', help='Dataset file') - parser.add_argument('prediction_file', help='Prediction File') - args = parser.parse_args() - - with open(args.dataset_file) as dataset_file: - dataset_json = json.load(dataset_file) - if (dataset_json['version'] != expected_version): - print('Evaluation expects v-' + expected_version + - ', but got dataset with v-' + dataset_json['version'], - file=sys.stderr) - dataset = dataset_json['data'] - - with open(args.prediction_file) as prediction_file: - predictions = json.load(prediction_file) - print(json.dumps(evaluate(dataset, predictions))) - diff --git a/examples/tensorflow/nlp/bert_large_squad/quantization/ptq/export_classifier.py b/examples/tensorflow/nlp/bert_large_squad/quantization/ptq/export_classifier.py deleted file mode 100644 index f85e3c7e805..00000000000 --- a/examples/tensorflow/nlp/bert_large_squad/quantization/ptq/export_classifier.py +++ /dev/null @@ -1,163 +0,0 @@ -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import os - -import tensorflow as tf -from absl import app -from absl import logging -tf.compat.v1.disable_v2_behavior() - -tf.compat.v1.flags.DEFINE_bool("saved_model", - False, - "whether export saved model or not") -FLAGS = tf.compat.v1.flags.FLAGS - -# We just import classifier here for `create_model` and some processors such as -# MNLI or MRPC. Because of the flags defined in `run_classifier.py`, we need not -# to define the flags again. -from run_classifier import create_model_top -from run_classifier import ColaProcessor -from run_classifier import MnliProcessor -from run_classifier import MrpcProcessor -from run_classifier import XnliProcessor -from modeling import BertConfig - -class ClassifierExporter: - def __init__(self, - output_dir: str, - task_name: str, - bert_config: str, - max_seq_length: int): - - processors = { - "cola": ColaProcessor, - "mnli": MnliProcessor, - "mrpc": MrpcProcessor, - "xnli": XnliProcessor - } - - task_name = task_name.lower() - if task_name not in processors: - raise ValueError("Task not found: %s" % (task_name)) - - processor = processors[task_name]() - label_list = processor.get_labels() - num_labels = len(label_list) - - # create model for CPU/dGPU, not TPU - use_one_hot_embeddings = False - - bert_config = BertConfig.from_json_file(bert_config) - if FLAGS.precision: - bert_config.precision = FLAGS.precision - - self.session = tf.compat.v1.Session() - - placeholder = tf.compat.v1.placeholder - input_shape = [None, max_seq_length] - self.label_ids = placeholder(tf.int32, [None], name='label_ids') - self.input_ids = placeholder(tf.int32, input_shape, name='input_ids') - self.input_mask = placeholder(tf.int32, input_shape, name='input_mask') - self.segment_ids = placeholder(tf.int32, input_shape, name='segment_ids') - - self.loss, self.per_example_loss, self.logits, self.probabilities = \ - create_model_top(bert_config, False, # is training - self.input_ids, self.input_mask, self.segment_ids, - self.label_ids, num_labels, use_one_hot_embeddings, - None) # frozen graph path - - latest_model = tf.train.latest_checkpoint(FLAGS.output_dir) - saver = tf.compat.v1.train.Saver() - saver.restore(self.session, latest_model) - - self.output_dir = output_dir - self.dest_dir = os.path.join(self.output_dir, "frozen") - if not os.path.exists(self.dest_dir): - os.mkdir(self.dest_dir) - - def __enter__(self): - return self - - def __exit__(self, exc_type, exc_value, exc_tb): - self.session.close() - - def export(self, saved_model: bool): - if saved_model: - self.export_saved_model() - - self.export_frozen_graph() - - def export_saved_model(self, - signature_def_name="eval", - tag=tf.compat.v1.saved_model.tag_constants.SERVING): - build_tensor_info = tf.compat.v1.saved_model.build_tensor_info - signature_def_utils = tf.compat.v1.saved_model.signature_def_utils - inputs = { - 'label_ids': build_tensor_info(self.label_ids), - 'input_ids': build_tensor_info(self.input_ids), - 'input_mask': build_tensor_info(self.input_mask), - 'segment_ids': build_tensor_info(self.segment_ids) - } - - outputs = { - "loss": build_tensor_info(self.loss), - "per_example_loss": build_tensor_info(self.per_example_loss), - "logits": build_tensor_info(self.logits), - "probabilities": build_tensor_info(self.probabilities) - } - - signature = signature_def_utils.build_signature_def(inputs, outputs) - signature_def_map = {signature_def_name: signature} - - builder = tf.compat.v1.saved_model.builder.SavedModelBuilder(self.dest_dir) - builder.add_meta_graph_and_variables(self.session, [tag], signature_def_map) - builder.save() - - def export_frozen_graph(self, frozen_graph_name="frozen_graph.pb"): - # we should disable v2 behavior, at the same time, the bn norm has some op name difference - # should be handled. Otherwise, it will throw exception when do import graph def. - # https://www.bountysource.com/issues/36614355-unable-to-import-frozen-graph-with-batchnorm - graph_def = self.session.graph.as_graph_def() - for node in graph_def.node: - if node.op == 'RefEnter': - node.op = 'Enter' - for index in range(len(node.input)): - if 'moving_' in node.input[index]: - node.input[index] = node.input[index] + '/read' - if node.op == 'RefSwitch': - node.op = 'Switch' - for index in range(len(node.input)): - if 'moving_' in node.input[index]: - node.input[index] = node.input[index] + '/read' - elif node.op == 'AssignSub': - node.op = 'Sub' - if 'use_locking' in node.attr: del node.attr['use_locking'] - elif node.op == 'AssignAdd': - node.op = 'Add' - if 'use_locking' in node.attr: del node.attr['use_locking'] - - outputs_name = ['loss/Mean', 'loss/Sum', 'loss/BiasAdd', 'loss/Softmax'] - graph_def = tf.compat.v1.graph_util.convert_variables_to_constants(self.session, - graph_def, - outputs_name) - - path = os.path.join(self.dest_dir, frozen_graph_name) - with tf.compat.v1.gfile.GFile(path, 'wb') as pb_file: - pb_file.write(graph_def.SerializeToString()) - -def main(_): - tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.INFO) - - with ClassifierExporter(FLAGS.output_dir, - FLAGS.task_name, - FLAGS.bert_config_file, - FLAGS.max_seq_length) as exporter: - exporter.export(FLAGS.saved_model) - -if __name__ == "__main__": - tf.compat.v1.flags.mark_flag_as_required("task_name") - tf.compat.v1.flags.mark_flag_as_required("bert_config_file") - tf.compat.v1.flags.mark_flag_as_required("output_dir") - tf.compat.v1.app.run() diff --git a/examples/tensorflow/nlp/bert_large_squad/quantization/ptq/freeze_estimator_to_pb.py b/examples/tensorflow/nlp/bert_large_squad/quantization/ptq/freeze_estimator_to_pb.py deleted file mode 100644 index f43def945e0..00000000000 --- a/examples/tensorflow/nlp/bert_large_squad/quantization/ptq/freeze_estimator_to_pb.py +++ /dev/null @@ -1,340 +0,0 @@ -#!/usr/bin/env python - -# coding=utf-8 -# Copyright 2018 The Google AI Language Team Authors. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -"""Freeze estimator to frozen pb for bert full pipline tuning.""" - -import os -import modeling -import tensorflow as tf -import numpy as np -from absl import app -from absl import logging - -flags = tf.compat.v1.flags -FLAGS = flags.FLAGS - -## Required parameters -flags.DEFINE_string( - "input_model", None, "The input checkpoint path of model.") - -flags.DEFINE_string( - "output_model", None, "The output path frozen pb will be written.") - -def write_graph(out_graph_def, out_graph_file): - from tensorflow.python.platform import gfile - if not isinstance(out_graph_def, tf.compat.v1.GraphDef): - raise ValueError( - 'out_graph_def is not instance of TensorFlow GraphDef.') - if out_graph_file and not os.path.exists(os.path.dirname(out_graph_file)): - raise ValueError('"output_graph" directory does not exists.') - f = gfile.GFile(out_graph_file, 'wb') - f.write(out_graph_def.SerializeToString()) - -def _improve_answer_span(doc_tokens, input_start, input_end, tokenizer, - orig_answer_text): - """Returns tokenized answer spans that better match the annotated answer.""" - - # The SQuAD annotations are character based. We first project them to - # whitespace-tokenized words. But then after WordPiece tokenization, we can - # often find a "better match". For example: - # - # Question: What year was John Smith born? - # Context: The leader was John Smith (1895-1943). - # Answer: 1895 - # - # The original whitespace-tokenized answer will be "(1895-1943).". However - # after tokenization, our tokens will be "( 1895 - 1943 ) .". So we can match - # the exact answer, 1895. - # - # However, this is not always possible. Consider the following: - # - # Question: What country is the top exporter of electornics? - # Context: The Japanese electronics industry is the lagest in the world. - # Answer: Japan - # - # In this case, the annotator chose "Japan" as a character sub-span of - # the word "Japanese". Since our WordPiece tokenizer does not split - # "Japanese", we just use "Japanese" as the annotation. This is fairly rare - # in SQuAD, but does happen. - tok_answer_text = " ".join(tokenizer.tokenize(orig_answer_text)) - - for new_start in range(input_start, input_end + 1): - for new_end in range(input_end, new_start - 1, -1): - text_span = " ".join(doc_tokens[new_start:(new_end + 1)]) - if text_span == tok_answer_text: - return (new_start, new_end) - - return (input_start, input_end) - -def _check_is_max_context(doc_spans, cur_span_index, position): - """Check if this is the 'max context' doc span for the token.""" - - # Because of the sliding window approach taken to scoring documents, a single - # token can appear in multiple documents. E.g. - # Doc: the man went to the store and bought a gallon of milk - # Span A: the man went to the - # Span B: to the store and bought - # Span C: and bought a gallon of - # ... - # - # Now the word 'bought' will have two scores from spans B and C. We only - # want to consider the score with "maximum context", which we define as - # the *minimum* of its left and right context (the *sum* of left and - # right context will always be the same, of course). - # - # In the example the maximum context for 'bought' would be span C since - # it has 1 left context and 3 right context, while span B has 4 left context - # and 0 right context. - best_score = None - best_span_index = None - for (span_index, doc_span) in enumerate(doc_spans): - end = doc_span.start + doc_span.length - 1 - if position < doc_span.start: - continue - if position > end: - continue - num_left_context = position - doc_span.start - num_right_context = end - position - score = min(num_left_context, num_right_context) + 0.01 * doc_span.length - if best_score is None or score > best_score: - best_score = score - best_span_index = span_index - - return cur_span_index == best_span_index - - -def create_model(bert_config, is_training, input_ids, input_mask, segment_ids, - use_one_hot_embeddings): - """Creates a classification model.""" - model = modeling.BertModel( - config=bert_config, - is_training=is_training, - input_ids=input_ids, - input_mask=input_mask, - token_type_ids=segment_ids, - use_one_hot_embeddings=use_one_hot_embeddings) - - final_hidden = model.get_sequence_output() - - final_hidden_shape = modeling.get_shape_list(final_hidden, expected_rank=3) - batch_size = final_hidden_shape[0] - seq_length = final_hidden_shape[1] - hidden_size = final_hidden_shape[2] - - output_weights = tf.compat.v1.get_variable( - "cls/squad/output_weights", [2, hidden_size], - initializer=tf.compat.v1.truncated_normal_initializer(stddev=0.02)) - - output_bias = tf.compat.v1.get_variable( - "cls/squad/output_bias", [2], initializer=tf.compat.v1.zeros_initializer()) - - final_hidden_matrix = tf.reshape(final_hidden, - [batch_size * seq_length, hidden_size]) - logits = tf.matmul(final_hidden_matrix, output_weights, transpose_b=True) - logits = tf.nn.bias_add(logits, output_bias) - - logits = tf.reshape(logits, [batch_size, seq_length, 2]) - logits = tf.transpose(a=logits, perm=[2, 0, 1]) - - unstacked_logits = tf.unstack(logits, axis=0) - - (start_logits, end_logits) = (unstacked_logits[0], unstacked_logits[1]) - - return (start_logits, end_logits) - - -def model_fn_builder(bert_config, init_checkpoint, learning_rate, - num_train_steps, num_warmup_steps, use_tpu, - use_one_hot_embeddings): - """Returns `model_fn` closure for TPUEstimator.""" - def model_fn(features, labels, mode, params): # pylint: disable=unused-argument - """The `model_fn` for TPUEstimator.""" - - tf.compat.v1.logging.info("*** Features ***") - # for name in sorted(features.keys()): - # tf.compat.v1.logging.info(" name = %s, shape = %s" % (name, features[name].shape)) - - unique_ids = features["unique_ids"] - input_ids = features["input_ids"] - input_mask = features["input_mask"] - segment_ids = features["segment_ids"] - - is_training = (mode == tf.estimator.ModeKeys.TRAIN) - - (start_logits, end_logits) = create_model( - bert_config=bert_config, - is_training=is_training, - input_ids=input_ids, - input_mask=input_mask, - segment_ids=segment_ids, - use_one_hot_embeddings=use_one_hot_embeddings) - - tvars = tf.compat.v1.trainable_variables() - - initialized_variable_names = {} - scaffold_fn = None - if init_checkpoint: - (assignment_map, initialized_variable_names - ) = modeling.get_assignment_map_from_checkpoint(tvars, init_checkpoint, "SQuAD") - if use_tpu: - - def tpu_scaffold(): - tf.compat.v1.train.init_from_checkpoint(init_checkpoint, assignment_map) - return tf.compat.v1.train.Scaffold() - - scaffold_fn = tpu_scaffold - else: - tf.compat.v1.train.init_from_checkpoint(init_checkpoint, assignment_map) - - tf.compat.v1.logging.info("**** Trainable Variables ****") - for var in tvars: - init_string = "" - if var.name in initialized_variable_names: - init_string = ", *INIT_FROM_CKPT*" - - output_spec = None - - predictions = { - "unique_ids": unique_ids, - "start_logits": start_logits, - "end_logits": end_logits, - } - output_spec = tf.compat.v1.estimator.tpu.TPUEstimatorSpec( - mode=mode, predictions=predictions, scaffold_fn=scaffold_fn) - - return output_spec - - return model_fn - - -def input_fn_builder(input_file, seq_length, is_training, drop_remainder): - """Creates an `input_fn` closure to be passed to TPUEstimator.""" - - name_to_features = { - "unique_ids": tf.io.FixedLenFeature([], tf.int64), - "input_ids": tf.io.FixedLenFeature([seq_length], tf.int64), - "input_mask": tf.io.FixedLenFeature([seq_length], tf.int64), - "segment_ids": tf.io.FixedLenFeature([seq_length], tf.int64), - } - - if is_training: - name_to_features["start_positions"] = tf.io.FixedLenFeature([], tf.int64) - name_to_features["end_positions"] = tf.io.FixedLenFeature([], tf.int64) - - def _decode_record(record, name_to_features): - """Decodes a record to a TensorFlow example.""" - example = tf.io.parse_single_example(serialized=record, features=name_to_features) - - # tf.Example only supports tf.int64, but the TPU only supports tf.int32. - # So cast all int64 to int32. - for name in list(example.keys()): - t = example[name] - if t.dtype == tf.int64: - t = tf.cast(t, dtype=tf.int32) - example[name] = t - - return example - - def input_fn(params): - """The actual input function.""" - input_file_placeholder = tf.compat.v1.placeholder(shape=[], - name="input_file", dtype=tf.string) - batch_size_placeholder = tf.compat.v1.placeholder(shape=[], - name="batch_size", dtype=tf.int64) - #batch_size = params["batch_size"] - - # For training, we want a lot of parallel reading and shuffling. - # For eval, we want no shuffling and parallel reading doesn't matter. - # d = tf.data.TFRecordDataset(input_file) - d = tf.data.TFRecordDataset(input_file_placeholder) - if is_training: - d = d.repeat() - d = d.shuffle(buffer_size=100) - - d = d.apply( - tf.data.experimental.map_and_batch( - lambda record: _decode_record(record, name_to_features), - batch_size=batch_size_placeholder, - drop_remainder=drop_remainder)) - - return d - - return input_fn - -bert_config_dict = {'vocab_size': 30522, 'hidden_size': 1024, 'num_hidden_layers': 24, \ - 'num_attention_heads': 16, 'hidden_act': 'gelu', 'intermediate_size': 4096,\ - 'hidden_dropout_prob': 0.1, 'attention_probs_dropout_prob': 0.1, \ - 'max_position_embeddings': 512, 'type_vocab_size': 2, 'initializer_range': 0.02, \ - 'precision': 'fp32', 'new_bf16_scope': True, 'experimental_gelu': False, \ - 'optimized_softmax': False} - -def main(_): - tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.INFO) - - bert_config = modeling.BertConfig.from_dict(bert_config_dict) - - session_config = tf.compat.v1.ConfigProto( - inter_op_parallelism_threads=2, - intra_op_parallelism_threads=27, - allow_soft_placement=True) - - is_per_host = tf.compat.v1.estimator.tpu.InputPipelineConfig.PER_HOST_V2 - - run_config = tf.compat.v1.estimator.tpu.RunConfig( - cluster=None, - master=None, - model_dir='./', - save_checkpoints_steps=1000, - session_config=session_config, - tpu_config=tf.compat.v1.estimator.tpu.TPUConfig( - iterations_per_loop=1000, - num_shards=8, - per_host_input_for_training=is_per_host)) - - predict_input_fn = input_fn_builder( - input_file='', - seq_length=384, - is_training=False, - drop_remainder=False) - - from neural_compressor.adaptor.tf_utils.util import is_ckpt_format - assert is_ckpt_format(FLAGS.input_model), 'invalid chekpoint path....' - ckpt_model = [os.path.splitext(i)[0] for i in os.listdir(FLAGS.input_model) \ - if i.endswith('.meta')][0] - model_fn = model_fn_builder( - bert_config=bert_config, - init_checkpoint=os.path.join(FLAGS.input_model, ckpt_model), - learning_rate=5e-5, - num_train_steps=None, - num_warmup_steps=None, - use_tpu=False, - use_one_hot_embeddings=False) - - # If TPU is not available, this will fall back to normal Estimator on CPU - # or GPU. - estimator = tf.compat.v1.estimator.tpu.TPUEstimator( - use_tpu=False, - model_fn=model_fn, - config=run_config, - train_batch_size=32, - predict_batch_size=8) - - from neural_compressor.adaptor.tf_utils.util import get_estimator_graph - graph = get_estimator_graph(estimator, predict_input_fn) - write_graph(graph.as_graph_def(), FLAGS.output_model) - -if __name__ == "__main__": - tf.compat.v1.app.run() diff --git a/examples/tensorflow/nlp/bert_large_squad/quantization/ptq/generic_ops.py b/examples/tensorflow/nlp/bert_large_squad/quantization/ptq/generic_ops.py deleted file mode 100644 index 4be6996eb29..00000000000 --- a/examples/tensorflow/nlp/bert_large_squad/quantization/ptq/generic_ops.py +++ /dev/null @@ -1,119 +0,0 @@ -# coding=utf-8 -# Copyright 2018 The Google AI Language Team Authors. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -"""The main BERT model and related functions.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import numpy as np -import tensorflow as tf -from pkg_resources import parse_version - -_inprecision = tf.float32 -_rprecision = tf.float32 -if parse_version(tf.version.VERSION) < parse_version('2.9.0'): - _keras_policy = tf.keras.mixed_precision.experimental.Policy("float32") -else: - _keras_policy = tf.keras.mixed_precision.Policy("float32") - -_use_optimized_softmax = True -_use_experimental_gelu = True - -def set_global_precision(dt): - # Set Keras API precision - global _keras_policy - if dt == tf.bfloat16: - if parse_version(tf.version.VERSION) < parse_version('2.9.0'): - _keras_policy=tf.keras.mixed_precision.experimental.Policy("mixed_bfloat16") - else: - _keras_policy = tf.keras.mixed_precision.Policy("mixed_bfloat16") - - # Set basic API precision - set_rprecision(dt) - -def set_rprecision(dt): - global _rprecision - _rprecision=dt - -def get_keras_policy(): - return _keras_policy - -def set_global_flags(optimized_softmax, experimental_gelu): - global _use_optimized_softmax - global _use_experimental_gelu - _use_optimized_softmax = optimized_softmax - _use_experimental_gelu = experimental_gelu - -def i_cast(x) : - return tf.cast(x, _inprecision) - -def r_cast(x) : - return tf.cast(x, _rprecision) - -def multiply(x,y): - x = r_cast(x) - y = r_cast(y) - return tf.multiply(x,y) - -def mzip(x,y): - if x.dtype== tf.bfloat16: - x = r_cast(x) - y = r_cast(y) - return zip(x,y) - -def tanh(x): - x = i_cast(x) - rval = tf.tanh(x) - return r_cast(rval) - -def softmax(scores, axis=None): - if _use_optimized_softmax: - return tf.nn.softmax(scores, axis) - else: - scores = i_cast(scores) - rval = tf.nn.softmax(scores, axis) - return r_cast(rval) - -def layer_norm(inputs, begin_norm_axis, begin_params_axis, scope): - lnorm = tf.keras.layers.LayerNormalization(dtype=get_keras_policy()) - return lnorm(inputs) - -"Moved from modeling.py" -def gelu(x): - """Gaussian Error Linear Unit. - - This is a smoother version of the RELU. - Original paper: https://arxiv.org/abs/1606.08415 - Args: - x: float Tensor to perform activation. - - Returns: - `x` with the GELU activation applied. - """ - #if _use_experial_gelu: - if True: - print("using experimental gelu") - return tf.nn.gelu(x) - else: - x = i_cast(x) - cdf = 0.5 * (1.0 + tf.tanh( - (np.sqrt(2 / np.pi) * (x + 0.044715 * tf.pow(x, 3))))) - rval = x * cdf - return r_cast(rval) - -def logTheLossHook(total_loss, n): - return tf.compat.v1.train.LoggingTensorHook({"\t Loss " : total_loss}, every_n_iter=n) - diff --git a/examples/tensorflow/nlp/bert_large_squad/quantization/ptq/modeling.py b/examples/tensorflow/nlp/bert_large_squad/quantization/ptq/modeling.py deleted file mode 100644 index 7e8c02632f8..00000000000 --- a/examples/tensorflow/nlp/bert_large_squad/quantization/ptq/modeling.py +++ /dev/null @@ -1,1052 +0,0 @@ -# coding=utf-8 -# Copyright 2018 The Google AI Language Team Authors. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -"""The main BERT model and related functions.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import collections -import copy -import json -import math -import re -import numpy as np -import six -import tensorflow as tf -import generic_ops as bf - - -class BertConfig(object): - """Configuration for `BertModel`.""" - - def __init__(self, - vocab_size, - hidden_size=768, - num_hidden_layers=12, - num_attention_heads=12, - intermediate_size=3072, - hidden_act="gelu", - hidden_dropout_prob=0.1, - attention_probs_dropout_prob=0.1, - max_position_embeddings=512, - type_vocab_size=16, - initializer_range=0.02, - precision="fp32", - new_bf16_scope=True): - """Constructs BertConfig. - - Args: - vocab_size: Vocabulary size of `inputs_ids` in `BertModel`. - hidden_size: Size of the encoder layers and the pooler layer. - num_hidden_layers: Number of hidden layers in the Transformer encoder. - num_attention_heads: Number of attention heads for each attention layer in - the Transformer encoder. - intermediate_size: The size of the "intermediate" (i.e., feed-forward) - layer in the Transformer encoder. - hidden_act: The non-linear activation function (function or string) in the - encoder and pooler. - hidden_dropout_prob: The dropout probability for all fully connected - layers in the embeddings, encoder, and pooler. - attention_probs_dropout_prob: The dropout ratio for the attention - probabilities. - max_position_embeddings: The maximum sequence length that this model might - ever be used with. Typically set this to something large just in case - (e.g., 512 or 1024 or 2048). - type_vocab_size: The vocabulary size of the `token_type_ids` passed into - `BertModel`. - initializer_range: The stdev of the truncated_normal_initializer for - initializing all weight matrices. - precision: To enable fp32 or bfloat16 based training - """ - self.vocab_size = vocab_size - self.hidden_size = hidden_size - self.num_hidden_layers = num_hidden_layers - self.num_attention_heads = num_attention_heads - self.hidden_act = hidden_act - self.intermediate_size = intermediate_size - self.hidden_dropout_prob = hidden_dropout_prob - self.attention_probs_dropout_prob = attention_probs_dropout_prob - self.max_position_embeddings = max_position_embeddings - self.type_vocab_size = type_vocab_size - self.initializer_range = initializer_range - self.precision = precision - self.new_bf16_scope = new_bf16_scope - self.experimental_gelu = False - self.optimized_softmax = False - - @classmethod - def from_dict(cls, json_object): - """Constructs a `BertConfig` from a Python dictionary of parameters.""" - config = BertConfig(vocab_size=None) - for (key, value) in six.iteritems(json_object): - config.__dict__[key] = value - return config - - @classmethod - def from_json_file(cls, json_file): - """Constructs a `BertConfig` from a json file of parameters.""" - with tf.io.gfile.GFile(json_file, "r") as reader: - text = reader.read() - return cls.from_dict(json.loads(text)) - - def to_dict(self): - """Serializes this instance to a Python dictionary.""" - output = copy.deepcopy(self.__dict__) - return output - - def to_json_string(self): - """Serializes this instance to a JSON string.""" - return json.dumps(self.to_dict(), indent=2, sort_keys=True) + "\n" - - -class BertModel(object): - """BERT model ("Bidirectional Encoder Representations from Transformers"). - - Example usage: - - ```python - # Already been converted into WordPiece token ids - input_ids = tf.constant([[31, 51, 99], [15, 5, 0]]) - input_mask = tf.constant([[1, 1, 1], [1, 1, 0]]) - token_type_ids = tf.constant([[0, 0, 1], [0, 2, 0]]) - - config = modeling.BertConfig(vocab_size=32000, hidden_size=512, - num_hidden_layers=8, num_attention_heads=6, intermediate_size=1024) - - model = modeling.BertModel(config=config, is_training=True, - input_ids=input_ids, input_mask=input_mask, token_type_ids=token_type_ids) - - label_embeddings = tf.get_variable(...) - pooled_output = model.get_pooled_output() - logits = tf.matmul(pooled_output, label_embeddings) - ... - ``` - """ - - def __init__(self, - config, - is_training, - input_ids, - input_mask=None, - token_type_ids=None, - use_one_hot_embeddings=False, - scope=None): - """Constructor for BertModel. - - Args: - config: `BertConfig` instance. - is_training: bool. true for training model, false for eval model. Controls - whether dropout will be applied. - input_ids: int32 Tensor of shape [batch_size, seq_length]. - input_mask: (optional) int32 Tensor of shape [batch_size, seq_length]. - token_type_ids: (optional) int32 Tensor of shape [batch_size, seq_length]. - use_one_hot_embeddings: (optional) bool. Whether to use one-hot word - embeddings or tf.embedding_lookup() for the word embeddings. - scope: (optional) variable scope. Defaults to "bert". - - Raises: - ValueError: The config is invalid or one of the input tensor shapes - is invalid. - """ - # Flags for BF16 CPU - self.bf16_scope = False - if config.precision == "bfloat16" : - bf.set_global_precision(tf.bfloat16) - if config.new_bf16_scope : - self.bf16_scope = True - - bf.set_global_flags(optimized_softmax=config.optimized_softmax, - experimental_gelu=config.experimental_gelu) - - config = copy.deepcopy(config) - if not is_training: - config.hidden_dropout_prob = 0.0 - config.attention_probs_dropout_prob = 0.0 - - input_shape = get_shape_list(input_ids, expected_rank=2) - batch_size = input_shape[0] - seq_length = input_shape[1] - - if input_mask is None: - input_mask = tf.ones(shape=[batch_size, seq_length], dtype=tf.int32) - - if token_type_ids is None: - token_type_ids = tf.zeros(shape=[batch_size, seq_length], dtype=tf.int32) - - with tf.compat.v1.variable_scope(scope, default_name="bert"): - with tf.compat.v1.variable_scope("embeddings"): - # Perform embedding lookup on the word ids. - (self.embedding_output, self.embedding_table) = embedding_lookup( - input_ids=input_ids, - vocab_size=config.vocab_size, - embedding_size=config.hidden_size, - initializer_range=config.initializer_range, - word_embedding_name="word_embeddings", - use_one_hot_embeddings=use_one_hot_embeddings) - - # Add positional embeddings and token type embeddings, then layer - # normalize and perform dropout. - self.embedding_output = embedding_postprocessor( - input_tensor=self.embedding_output, - use_token_type=True, - token_type_ids=token_type_ids, - token_type_vocab_size=config.type_vocab_size, - token_type_embedding_name="token_type_embeddings", - use_position_embeddings=True, - position_embedding_name="position_embeddings", - initializer_range=config.initializer_range, - max_position_embeddings=config.max_position_embeddings, - dropout_prob=config.hidden_dropout_prob) - - with tf.compat.v1.variable_scope("encoder"): - # This converts a 2D mask of shape [batch_size, seq_length] to a 3D - # mask of shape [batch_size, seq_length, seq_length] which is used - # for the attention scores. - attention_mask = create_attention_mask_from_input_mask( - input_ids, input_mask) - - # Run the stacked transformer. - # `sequence_output` shape = [batch_size, seq_length, hidden_size]. - # Cast is used to cover bfloat16 - input_tensor=bf.r_cast(self.embedding_output) - self.all_encoder_layers = transformer_model( - input_tensor=input_tensor, - attention_mask=attention_mask, - hidden_size=config.hidden_size, - num_hidden_layers=config.num_hidden_layers, - num_attention_heads=config.num_attention_heads, - intermediate_size=config.intermediate_size, - intermediate_act_fn=get_activation(config.hidden_act), - hidden_dropout_prob=config.hidden_dropout_prob, - attention_probs_dropout_prob=config.attention_probs_dropout_prob, - initializer_range=config.initializer_range, - do_return_all_layers=True) - - self.sequence_output = self.all_encoder_layers[-1] - # The "pooler" converts the encoded sequence tensor of shape - # [batch_size, seq_length, hidden_size] to a tensor of shape - # [batch_size, hidden_size]. This is necessary for segment-level - # (or segment-pair-level) classification tasks where we need a fixed - # dimensional representation of the segment. - with tf.compat.v1.variable_scope("pooler"): - # We "pool" the model by simply taking the hidden state corresponding - # to the first token. We assume that this has been pre-trained - first_token_tensor = tf.squeeze(self.sequence_output[:, 0:1, :], axis=1) - self.pooled_output = tf.compat.v1.layers.dense( - first_token_tensor, - config.hidden_size, - activation=bf.tanh, - kernel_initializer=create_initializer(config.initializer_range)) - - def get_pooled_output(self): - """ - In bfloat16 enabled execution, with only model covered in bfloat16 scope, - return the output in float32. Other cases return as is - """ - if self.bf16_scope == True: - return tf.cast(self.pooled_output, tf.float32) - else : - return self.pooled_output - - def get_sequence_output(self): - """Gets final hidden layer of encoder. - - Returns: - float Tensor of shape [batch_size, seq_length, hidden_size] corresponding - to the final hidden of the transformer encoder. - - In bfloat16 enabled execution, with only model covered in bfloat16 scope, - return the output in float32. Other cases return as is - """ - if self.bf16_scope == True: - return tf.cast(self.sequence_output, tf.float32) - else : - return self.sequence_output - - def get_all_encoder_layers(self): - return self.all_encoder_layers - - def get_embedding_output(self): - """Gets output of the embedding lookup (i.e., input to the transformer). - - Returns: - float Tensor of shape [batch_size, seq_length, hidden_size] corresponding - to the output of the embedding layer, after summing the word - embeddings with the positional embeddings and the token type embeddings, - then performing layer normalization. This is the input to the transformer. - """ - if self.bf16_scope == True : - return (self.embedding_output) - else : - return bf.r_cast(self.embedding_output) - - def get_embedding_table(self): - if self.bf16_scope == True : - return (self.embedding_table) - else : - return bf.r_cast(self.embedding_table) - - -def get_activation(activation_string): - """Maps a string to a Python function, e.g., "relu" => `tf.nn.relu`. - - Args: - activation_string: String name of the activation function. - - Returns: - A Python function corresponding to the activation function. If - `activation_string` is None, empty, or "linear", this will return None. - If `activation_string` is not a string, it will return `activation_string`. - - Raises: - ValueError: The `activation_string` does not correspond to a known - activation. - """ - - # We assume that anything that"s not a string is already an activation - # function, so we just return it. - if not isinstance(activation_string, six.string_types): - return activation_string - - if not activation_string: - return None - - act = activation_string.lower() - if act == "linear": - return None - elif act == "relu": - return tf.nn.relu - elif act == "gelu": - return bf.gelu - elif act == "tanh": - return bf.tanh - else: - raise ValueError("Unsupported activation: %s" % act) - - -def get_remaps(task): - regex1 = re.compile(r"layer_normalization[_0-9]*") - regex2 = None - if task=="SQuAD" : - regex2 = re.compile(r"squad") - elif task=="Classifier": - regex2 = re.compile(r"classifier") - else : - regex2 = None - - return regex1, regex2 - -def apply_remaps(name, map1, map2): - if map1!=None: - name = map1.sub("LayerNorm", name) - if map2!=None : - name = map2.sub("seq_relationship", name) - return name - -def check_model_validity(tvars, assignment_map) : - # Check if all model vars have a mapping in checkpoint - missing_var=False - missed_vars=[] - for var in tvars: - if var not in assignment_map.values(): - missed_vars.append(var) - missing_var=True - if missing_var : - for var in missed_vars: - tf.compat.v1.logging.info("Model Variable not in checkpoint", var) - raise ValueError("Error: Missing model variables in checkpoint!!") - -def get_assignment_map_from_checkpoint(tvars, init_checkpoint, task="Pretraining"): - """Compute the union of the current variables and checkpoint variables.""" - assignment_map = {} - initialized_variable_names = {} - - map1, map2 = get_remaps(task) - - name_to_variable = collections.OrderedDict() - for var in tvars: - name = var.name - m = re.match("^(.*):\\d+$", name) - if m is not None: - name = m.group(1) - name_to_variable[name] = var - name = apply_remaps(name, map1, map2) - name_to_variable[name] = var - - init_vars = tf.train.list_variables(init_checkpoint) - - assignment_map = collections.OrderedDict() - for x in init_vars: - (name, var) = (x[0], x[1]) - if name not in name_to_variable: - continue - assignment_map[name] = name_to_variable[name] - ivar = name_to_variable[name] - initialized_variable_names[ivar.name] = 1 - initialized_variable_names[ivar.name + ":0"] = 1 - - # Check if all model vars are loaded from Checkpoint - check_model_validity(tvars, assignment_map) - #for name, var in assignment_map.items(): - # print(name, "--->", var) - - return (assignment_map, initialized_variable_names) - -def dropout(input_tensor, dropout_prob): - """Perform dropout. - - Args: - input_tensor: float Tensor. - dropout_prob: Python float. The probability of dropping out a value (NOT of - *keeping* a dimension as in `tf.nn.dropout`). - - Returns: - A version of `input_tensor` with dropout applied. - """ - if dropout_prob is None or dropout_prob == 0.0: - return input_tensor - - output = tf.nn.dropout(input_tensor, 1 - (1.0 - dropout_prob)) - return output - - -def layer_norm(input_tensor, name=None): - """Run layer normalization on the last dimension of the tensor.""" - return bf.layer_norm( - inputs=input_tensor, begin_norm_axis=-1, begin_params_axis=-1, scope=name) - - -def layer_norm_and_dropout(input_tensor, dropout_prob, name=None): - """Runs layer normalization followed by dropout.""" - output_tensor = layer_norm(input_tensor, name) - output_tensor = dropout(output_tensor, dropout_prob) - return output_tensor - - -def create_initializer(initializer_range=0.02): - """Creates a `truncated_normal_initializer` with the given range.""" - return tf.compat.v1.truncated_normal_initializer(stddev=initializer_range) - - -def embedding_lookup(input_ids, - vocab_size, - embedding_size=128, - initializer_range=0.02, - word_embedding_name="word_embeddings", - use_one_hot_embeddings=False): - """Looks up words embeddings for id tensor. - - Args: - input_ids: int32 Tensor of shape [batch_size, seq_length] containing word - ids. - vocab_size: int. Size of the embedding vocabulary. - embedding_size: int. Width of the word embeddings. - initializer_range: float. Embedding initialization range. - word_embedding_name: string. Name of the embedding table. - use_one_hot_embeddings: bool. If True, use one-hot method for word - embeddings. If False, use `tf.gather()`. - - Returns: - float Tensor of shape [batch_size, seq_length, embedding_size]. - """ - # This function assumes that the input is of shape [batch_size, seq_length, - # num_inputs]. - # - # If the input is a 2D tensor of shape [batch_size, seq_length], we - # reshape to [batch_size, seq_length, 1]. - if input_ids.shape.ndims == 2: - input_ids = tf.expand_dims(input_ids, axis=[-1]) - - embedding_table = tf.compat.v1.get_variable( - name=word_embedding_name, - shape=[vocab_size, embedding_size], - initializer=create_initializer(initializer_range)) - - flat_input_ids = tf.reshape(input_ids, [-1]) - if use_one_hot_embeddings: - one_hot_input_ids = tf.one_hot(flat_input_ids, depth=vocab_size) - output = tf.matmul(one_hot_input_ids, embedding_table) - else: - output = tf.gather(embedding_table, flat_input_ids) - - input_shape = get_shape_list(input_ids) - - output = tf.reshape(output, - input_shape[0:-1] + [input_shape[-1] * embedding_size]) - return (output, embedding_table) - - -def embedding_postprocessor(input_tensor, - use_token_type=False, - token_type_ids=None, - token_type_vocab_size=16, - token_type_embedding_name="token_type_embeddings", - use_position_embeddings=True, - position_embedding_name="position_embeddings", - initializer_range=0.02, - max_position_embeddings=512, - dropout_prob=0.1): - """Performs various post-processing on a word embedding tensor. - - Args: - input_tensor: float Tensor of shape [batch_size, seq_length, - embedding_size]. - use_token_type: bool. Whether to add embeddings for `token_type_ids`. - token_type_ids: (optional) int32 Tensor of shape [batch_size, seq_length]. - Must be specified if `use_token_type` is True. - token_type_vocab_size: int. The vocabulary size of `token_type_ids`. - token_type_embedding_name: string. The name of the embedding table variable - for token type ids. - use_position_embeddings: bool. Whether to add position embeddings for the - position of each token in the sequence. - position_embedding_name: string. The name of the embedding table variable - for positional embeddings. - initializer_range: float. Range of the weight initialization. - max_position_embeddings: int. Maximum sequence length that might ever be - used with this model. This can be longer than the sequence length of - input_tensor, but cannot be shorter. - dropout_prob: float. Dropout probability applied to the final output tensor. - - Returns: - float tensor with same shape as `input_tensor`. - - Raises: - ValueError: One of the tensor shapes or input values is invalid. - """ - input_shape = get_shape_list(input_tensor, expected_rank=3) - batch_size = input_shape[0] - seq_length = input_shape[1] - width = input_shape[2] - - output = input_tensor - - if use_token_type: - if token_type_ids is None: - raise ValueError("`token_type_ids` must be specified if" - "`use_token_type` is True.") - token_type_table = tf.compat.v1.get_variable( - name=token_type_embedding_name, - shape=[token_type_vocab_size, width], - initializer=create_initializer(initializer_range)) - # This vocab will be small so we always do one-hot here, since it is always - # faster for a small vocabulary. - flat_token_type_ids = tf.reshape(token_type_ids, [-1]) - one_hot_ids = tf.one_hot(flat_token_type_ids, depth=token_type_vocab_size) - token_type_embeddings = tf.matmul(one_hot_ids, token_type_table) - token_type_embeddings = tf.reshape(token_type_embeddings, - [batch_size, seq_length, width]) - output += token_type_embeddings - - if use_position_embeddings: - assert_op = tf.compat.v1.assert_less_equal(seq_length, max_position_embeddings) - with tf.control_dependencies([assert_op]): - full_position_embeddings = tf.compat.v1.get_variable( - name=position_embedding_name, - shape=[max_position_embeddings, width], - initializer=create_initializer(initializer_range)) - # Since the position embedding table is a learned variable, we create it - # using a (long) sequence length `max_position_embeddings`. The actual - # sequence length might be shorter than this, for faster training of - # tasks that do not have long sequences. - # - # So `full_position_embeddings` is effectively an embedding table - # for position [0, 1, 2, ..., max_position_embeddings-1], and the current - # sequence has positions [0, 1, 2, ... seq_length-1], so we can just - # perform a slice. - position_embeddings = tf.slice(full_position_embeddings, [0, 0], - [seq_length, -1]) - num_dims = len(output.shape.as_list()) - - # Only the last two dimensions are relevant (`seq_length` and `width`), so - # we broadcast among the first dimensions, which is typically just - # the batch size. - position_broadcast_shape = [] - for _ in range(num_dims - 2): - position_broadcast_shape.append(1) - position_broadcast_shape.extend([seq_length, width]) - position_embeddings = tf.reshape(position_embeddings, - position_broadcast_shape) - output += position_embeddings - - output = layer_norm_and_dropout(output, dropout_prob) - return output - - -def create_attention_mask_from_input_mask(from_tensor, to_mask): - """Create 3D attention mask from a 2D tensor mask. - - Args: - from_tensor: 2D or 3D Tensor of shape [batch_size, from_seq_length, ...]. - to_mask: int32 Tensor of shape [batch_size, to_seq_length]. - - Returns: - float Tensor of shape [batch_size, from_seq_length, to_seq_length]. - """ - from_shape = get_shape_list(from_tensor, expected_rank=[2, 3]) - batch_size = from_shape[0] - from_seq_length = from_shape[1] - - to_shape = get_shape_list(to_mask, expected_rank=2) - to_seq_length = to_shape[1] - - to_mask = tf.cast( - tf.reshape(to_mask, [batch_size, 1, to_seq_length]), tf.float32) - - # We don't assume that `from_tensor` is a mask (although it could be). We - # don't actually care if we attend *from* padding tokens (only *to* padding) - # tokens so we create a tensor of all ones. - try: - # `broadcast_ones` = [batch_size, from_seq_length, 1] - broadcast_ones = tf.ones( - shape=[batch_size, from_seq_length, 1], dtype=tf.float32) - - # Here we broadcast along two dimensions to create the mask. - mask = broadcast_ones * to_mask - except (NotImplementedError): - # Kernel bug, happens when the version of python is 3.7 and the version of numpy is >= 1.20.0 - mask = to_mask - return mask - - -def attention_layer(from_tensor, - to_tensor, - attention_mask=None, - num_attention_heads=1, - size_per_head=512, - query_act=None, - key_act=None, - value_act=None, - attention_probs_dropout_prob=0.0, - initializer_range=0.02, - do_return_2d_tensor=False, - batch_size=None, - from_seq_length=None, - to_seq_length=None): - """Performs multi-headed attention from `from_tensor` to `to_tensor`. - - This is an implementation of multi-headed attention based on "Attention - is all you Need". If `from_tensor` and `to_tensor` are the same, then - this is self-attention. Each timestep in `from_tensor` attends to the - corresponding sequence in `to_tensor`, and returns a fixed-with vector. - - This function first projects `from_tensor` into a "query" tensor and - `to_tensor` into "key" and "value" tensors. These are (effectively) a list - of tensors of length `num_attention_heads`, where each tensor is of shape - [batch_size, seq_length, size_per_head]. - - Then, the query and key tensors are dot-producted and scaled. These are - softmaxed to obtain attention probabilities. The value tensors are then - interpolated by these probabilities, then concatenated back to a single - tensor and returned. - - In practice, the multi-headed attention are done with transposes and - reshapes rather than actual separate tensors. - - Args: - from_tensor: float Tensor of shape [batch_size, from_seq_length, - from_width]. - to_tensor: float Tensor of shape [batch_size, to_seq_length, to_width]. - attention_mask: (optional) int32 Tensor of shape [batch_size, - from_seq_length, to_seq_length]. The values should be 1 or 0. The - attention scores will effectively be set to -infinity for any positions in - the mask that are 0, and will be unchanged for positions that are 1. - num_attention_heads: int. Number of attention heads. - size_per_head: int. Size of each attention head. - query_act: (optional) Activation function for the query transform. - key_act: (optional) Activation function for the key transform. - value_act: (optional) Activation function for the value transform. - attention_probs_dropout_prob: (optional) float. Dropout probability of the - attention probabilities. - initializer_range: float. Range of the weight initializer. - do_return_2d_tensor: bool. If True, the output will be of shape [batch_size - * from_seq_length, num_attention_heads * size_per_head]. If False, the - output will be of shape [batch_size, from_seq_length, num_attention_heads - * size_per_head]. - batch_size: (Optional) int. If the input is 2D, this might be the batch size - of the 3D version of the `from_tensor` and `to_tensor`. - from_seq_length: (Optional) If the input is 2D, this might be the seq length - of the 3D version of the `from_tensor`. - to_seq_length: (Optional) If the input is 2D, this might be the seq length - of the 3D version of the `to_tensor`. - - Returns: - float Tensor of shape [batch_size, from_seq_length, - num_attention_heads * size_per_head]. (If `do_return_2d_tensor` is - true, this will be of shape [batch_size * from_seq_length, - num_attention_heads * size_per_head]). - - Raises: - ValueError: Any of the arguments or tensor shapes are invalid. - """ - - def transpose_for_scores(input_tensor, batch_size, num_attention_heads, - seq_length, width): - output_tensor = tf.reshape( - input_tensor, [batch_size, seq_length, num_attention_heads, width]) - - output_tensor = tf.transpose(a=output_tensor, perm=[0, 2, 1, 3]) - return output_tensor - - from_shape = get_shape_list(from_tensor, expected_rank=[2, 3]) - to_shape = get_shape_list(to_tensor, expected_rank=[2, 3]) - - if len(from_shape) != len(to_shape): - raise ValueError( - "The rank of `from_tensor` must match the rank of `to_tensor`.") - - if len(from_shape) == 3: - batch_size = from_shape[0] - from_seq_length = from_shape[1] - to_seq_length = to_shape[1] - elif len(from_shape) == 2: - if (batch_size is None or from_seq_length is None or to_seq_length is None): - raise ValueError( - "When passing in rank 2 tensors to attention_layer, the values " - "for `batch_size`, `from_seq_length`, and `to_seq_length` " - "must all be specified.") - - # Scalar dimensions referenced here: - # B = batch size (number of sequences) - # F = `from_tensor` sequence length - # T = `to_tensor` sequence length - # N = `num_attention_heads` - # H = `size_per_head` - - from_tensor_2d = reshape_to_matrix(from_tensor) - to_tensor_2d = reshape_to_matrix(to_tensor) - - # `query_layer` = [B*F, N*H] - query_layer = tf.compat.v1.layers.dense( - from_tensor_2d, - num_attention_heads * size_per_head, - activation=query_act, - name="query", - kernel_initializer=create_initializer(initializer_range)) - - # `key_layer` = [B*T, N*H] - key_layer = tf.compat.v1.layers.dense( - to_tensor_2d, - num_attention_heads * size_per_head, - activation=key_act, - name="key", - kernel_initializer=create_initializer(initializer_range)) - - # `value_layer` = [B*T, N*H] - value_layer = tf.compat.v1.layers.dense( - to_tensor_2d, - num_attention_heads * size_per_head, - activation=value_act, - name="value", - kernel_initializer=create_initializer(initializer_range)) - - # `query_layer` = [B, N, F, H] - query_layer = transpose_for_scores(query_layer, batch_size, - num_attention_heads, from_seq_length, - size_per_head) - - # `key_layer` = [B, N, T, H] - key_layer = transpose_for_scores(key_layer, batch_size, num_attention_heads, - to_seq_length, size_per_head) - - # Take the dot product between "query" and "key" to get the raw - # attention scores. - # `attention_scores` = [B, N, F, T] - attention_scores = tf.matmul(query_layer, key_layer, transpose_b=True) - attention_scores = tf.multiply(attention_scores, bf.r_cast( - 1.0 / math.sqrt(float(size_per_head)))) - - if attention_mask is not None: - # `attention_mask` = [B, 1, F, T] - attention_mask = tf.expand_dims(attention_mask, axis=[1]) - - # Since attention_mask is 1.0 for positions we want to attend and 0.0 for - # masked positions, this operation will create a tensor which is 0.0 for - # positions we want to attend and -10000.0 for masked positions. - adder = (1.0 - tf.cast(attention_mask, from_tensor.dtype)) * -10000.0 - - # Since we are adding it to the raw scores before the softmax, this is - # effectively the same as removing these entirely. - attention_scores += adder - - # Normalize the attention scores to probabilities. - # `attention_probs` = [B, N, F, T] - attention_probs = bf.softmax(attention_scores) - - # This is actually dropping out entire tokens to attend to, which might - # seem a bit unusual, but is taken from the original Transformer paper. - attention_probs = dropout(attention_probs, attention_probs_dropout_prob) - - # `value_layer` = [B, T, N, H] - value_layer = tf.reshape( - value_layer, - [batch_size, to_seq_length, num_attention_heads, size_per_head]) - - # `value_layer` = [B, N, T, H] - value_layer = tf.transpose(a=value_layer, perm=[0, 2, 1, 3]) - - # `context_layer` = [B, N, F, H] - context_layer = tf.matmul(attention_probs, value_layer) - - # `context_layer` = [B, F, N, H] - context_layer = tf.transpose(a=context_layer, perm=[0, 2, 1, 3]) - - if do_return_2d_tensor: - # `context_layer` = [B*F, N*H] - context_layer = tf.reshape( - context_layer, - [batch_size * from_seq_length, num_attention_heads * size_per_head]) - else: - # `context_layer` = [B, F, N*H] - context_layer = tf.reshape( - context_layer, - [batch_size, from_seq_length, num_attention_heads * size_per_head]) - - return context_layer - - -def transformer_model(input_tensor, - attention_mask=None, - hidden_size=768, - num_hidden_layers=12, - num_attention_heads=12, - intermediate_size=3072, - intermediate_act_fn=bf.gelu, - hidden_dropout_prob=0.1, - attention_probs_dropout_prob=0.1, - initializer_range=0.02, - do_return_all_layers=False): - """Multi-headed, multi-layer Transformer from "Attention is All You Need". - - This is almost an exact implementation of the original Transformer encoder. - - See the original paper: - https://arxiv.org/abs/1706.03762 - - Also see: - https://github.com/tensorflow/tensor2tensor/blob/master/tensor2tensor/models/transformer.py - - Args: - input_tensor: float Tensor of shape [batch_size, seq_length, hidden_size]. - attention_mask: (optional) int32 Tensor of shape [batch_size, seq_length, - seq_length], with 1 for positions that can be attended to and 0 in - positions that should not be. - hidden_size: int. Hidden size of the Transformer. - num_hidden_layers: int. Number of layers (blocks) in the Transformer. - num_attention_heads: int. Number of attention heads in the Transformer. - intermediate_size: int. The size of the "intermediate" (a.k.a., feed - forward) layer. - intermediate_act_fn: function. The non-linear activation function to apply - to the output of the intermediate/feed-forward layer. - hidden_dropout_prob: float. Dropout probability for the hidden layers. - attention_probs_dropout_prob: float. Dropout probability of the attention - probabilities. - initializer_range: float. Range of the initializer (stddev of truncated - normal). - do_return_all_layers: Whether to also return all layers or just the final - layer. - - Returns: - float Tensor of shape [batch_size, seq_length, hidden_size], the final - hidden layer of the Transformer. - - Raises: - ValueError: A Tensor shape or parameter is invalid. - """ - if hidden_size % num_attention_heads != 0: - raise ValueError( - "The hidden size (%d) is not a multiple of the number of attention " - "heads (%d)" % (hidden_size, num_attention_heads)) - - attention_head_size = int(hidden_size / num_attention_heads) - input_shape = get_shape_list(input_tensor, expected_rank=3) - batch_size = input_shape[0] - seq_length = input_shape[1] - input_width = input_shape[2] - - # The Transformer performs sum residuals on all layers so the input needs - # to be the same as the hidden size. - if input_width != hidden_size: - raise ValueError("The width of the input tensor (%d) != hidden size (%d)" % - (input_width, hidden_size)) - - # We keep the representation as a 2D tensor to avoid re-shaping it back and - # forth from a 3D tensor to a 2D tensor. Re-shapes are normally free on - # the GPU/CPU but may not be free on the TPU, so we want to minimize them to - # help the optimizer. - prev_output = reshape_to_matrix(input_tensor) - - all_layer_outputs = [] - for layer_idx in range(num_hidden_layers): - with tf.compat.v1.variable_scope("layer_%d" % layer_idx): - layer_input = prev_output - - with tf.compat.v1.variable_scope("attention"): - attention_heads = [] - with tf.compat.v1.variable_scope("self"): - attention_head = attention_layer( - from_tensor=layer_input, - to_tensor=layer_input, - attention_mask=attention_mask, - num_attention_heads=num_attention_heads, - size_per_head=attention_head_size, - attention_probs_dropout_prob=attention_probs_dropout_prob, - initializer_range=initializer_range, - do_return_2d_tensor=True, - batch_size=batch_size, - from_seq_length=seq_length, - to_seq_length=seq_length) - attention_heads.append(attention_head) - - attention_output = None - if len(attention_heads) == 1: - attention_output = attention_heads[0] - else: - # In the case where we have other sequences, we just concatenate - # them to the self-attention head before the projection. - attention_output = tf.concat(attention_heads, axis=-1) - - # Run a linear projection of `hidden_size` then add a residual - # with `layer_input`. - with tf.compat.v1.variable_scope("output"): - attention_output = tf.compat.v1.layers.dense( - attention_output, - hidden_size, - kernel_initializer=create_initializer(initializer_range)) - attention_output = dropout(attention_output, hidden_dropout_prob) - attention_output = layer_norm(attention_output + layer_input) - - # The activation is only applied to the "intermediate" hidden layer. - with tf.compat.v1.variable_scope("intermediate"): - intermediate_output = tf.compat.v1.layers.dense( - attention_output, - intermediate_size, - activation=intermediate_act_fn, - kernel_initializer=create_initializer(initializer_range)) - - # Down-project back to `hidden_size` then add the residual. - with tf.compat.v1.variable_scope("output"): - layer_output = tf.compat.v1.layers.dense( - intermediate_output, - hidden_size, - kernel_initializer=create_initializer(initializer_range)) - layer_output = dropout(layer_output, hidden_dropout_prob) - layer_output = layer_norm(layer_output + attention_output) - prev_output = layer_output - all_layer_outputs.append(layer_output) - - if do_return_all_layers: - final_outputs = [] - for layer_output in all_layer_outputs: - final_output = reshape_from_matrix(layer_output, input_shape) - final_outputs.append(final_output) - return final_outputs - else: - final_output = reshape_from_matrix(prev_output, input_shape) - return final_output - - -def get_shape_list(tensor, expected_rank=None, name=None): - """Returns a list of the shape of tensor, preferring static dimensions. - - Args: - tensor: A tf.Tensor object to find the shape of. - expected_rank: (optional) int. The expected rank of `tensor`. If this is - specified and the `tensor` has a different rank, and exception will be - thrown. - name: Optional name of the tensor for the error message. - - Returns: - A list of dimensions of the shape of tensor. All static dimensions will - be returned as python integers, and dynamic dimensions will be returned - as tf.Tensor scalars. - """ - if name is None: - name = tensor.name - - if expected_rank is not None: - assert_rank(tensor, expected_rank, name) - - shape = tensor.shape.as_list() - - non_static_indexes = [] - for (index, dim) in enumerate(shape): - if dim is None: - non_static_indexes.append(index) - - if not non_static_indexes: - return shape - - dyn_shape = tf.shape(input=tensor) - for index in non_static_indexes: - shape[index] = dyn_shape[index] - return shape - - -def reshape_to_matrix(input_tensor): - """Reshapes a >= rank 2 tensor to a rank 2 tensor (i.e., a matrix).""" - ndims = input_tensor.shape.ndims - if ndims < 2: - raise ValueError("Input tensor must have at least rank 2. Shape = %s" % - (input_tensor.shape)) - if ndims == 2: - return input_tensor - - width = input_tensor.shape[-1] - output_tensor = tf.reshape(input_tensor, [-1, width]) - return output_tensor - - -def reshape_from_matrix(output_tensor, orig_shape_list): - """Reshapes a rank 2 tensor back to its original rank >= 2 tensor.""" - if len(orig_shape_list) == 2: - return output_tensor - - output_shape = get_shape_list(output_tensor) - - orig_dims = orig_shape_list[0:-1] - width = output_shape[-1] - - return tf.reshape(output_tensor, orig_dims + [width]) - - -def assert_rank(tensor, expected_rank, name=None): - """Raises an exception if the tensor rank is not of the expected rank. - - Args: - tensor: A tf.Tensor to check the rank of. - expected_rank: Python integer or list of integers, expected rank. - name: Optional name of the tensor for the error message. - - Raises: - ValueError: If the expected shape doesn't match the actual shape. - """ - if name is None: - name = tensor.name - - expected_rank_dict = {} - if isinstance(expected_rank, six.integer_types): - expected_rank_dict[expected_rank] = True - else: - for x in expected_rank: - expected_rank_dict[x] = True - - actual_rank = tensor.shape.ndims - if actual_rank not in expected_rank_dict: - scope_name = tf.compat.v1.get_variable_scope().name - raise ValueError( - "For the tensor `%s` in scope `%s`, the actual rank " - "`%d` (shape = %s) is not equal to the expected rank `%s`" % - (name, scope_name, actual_rank, str(tensor.shape), str(expected_rank))) diff --git a/examples/tensorflow/nlp/bert_large_squad/quantization/ptq/optimization.py b/examples/tensorflow/nlp/bert_large_squad/quantization/ptq/optimization.py deleted file mode 100644 index 597ef36dc11..00000000000 --- a/examples/tensorflow/nlp/bert_large_squad/quantization/ptq/optimization.py +++ /dev/null @@ -1,247 +0,0 @@ -# coding=utf-8 -# Copyright 2018 The Google AI Language Team Authors. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -"""Functions and classes related to optimization (weight updates).""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import re -import tensorflow as tf -import generic_ops as bf - - -def create_optimizer(loss, init_lr, num_train_steps, num_warmup_steps, accum_steps=1, use_tpu=False, fine_tuning=True): - """Creates an optimizer training op.""" - global_step = tf.compat.v1.train.get_or_create_global_step() - - learning_rate = tf.constant(value=init_lr, shape=[], dtype=tf.float32) - - # Implements linear decay of the learning rate. - learning_rate = tf.compat.v1.train.polynomial_decay( - learning_rate, - global_step, - num_train_steps, - end_learning_rate=0.0, - power=1.0, - cycle=False) - - # Implements linear warmup. I.e., if global_step < num_warmup_steps, the - # learning rate will be `global_step/num_warmup_steps * init_lr`. - if num_warmup_steps: - global_steps_int = tf.cast(global_step, tf.int32) - warmup_steps_int = tf.constant(num_warmup_steps, dtype=tf.int32) - - global_steps_float = tf.cast(global_steps_int, tf.float32) - warmup_steps_float = tf.cast(warmup_steps_int, tf.float32) - - warmup_percent_done = global_steps_float / warmup_steps_float - warmup_learning_rate = init_lr * warmup_percent_done - - is_warmup = tf.cast(global_steps_int < warmup_steps_int, tf.float32) - learning_rate = ( - (1.0 - is_warmup) * learning_rate + is_warmup * warmup_learning_rate) - - # It is recommended that you use this optimizer for fine tuning, since this - # is how the model was trained (note that the Adam m/v variables are NOT - # loaded from init_checkpoint.) - optimizer = AdamWeightDecayOptimizer( - learning_rate=learning_rate, - weight_decay_rate=0.01, - beta_1=0.9, - beta_2=0.999, - epsilon=1e-6, - exclude_from_weight_decay=["LayerNorm", "layer_norm", "bias"]) - - if use_tpu: - optimizer = tf.compat.v1.tpu.CrossShardOptimizer(optimizer) - - tvars = tf.compat.v1.trainable_variables() - - - if accum_steps > 1 : - #tf.compat.v1.logging.info("Accumulation Steps....") - grads_and_vars = optimizer.compute_gradients(loss * 1.0 / accum_steps, tvars) - - current_step = tf.compat.v1.get_variable(name="current_step", shape=[], dtype=tf.int32, - trainable=False, - initializer=tf.zeros_initializer) - accum_vars = [tf.compat.v1.get_variable( - name=tvar.name.split(":")[0] + "/agrads", - shape=tvar.shape.as_list(), - dtype=tf.float32, - trainable=False, - initializer=tf.zeros_initializer()) for tvar in tvars] - - apply_grads = tf.cast(tf.math.equal(current_step % accum_steps, 0), dtype=tf.bool) - current_step = tf.cond(apply_grads, - lambda:current_step.assign(tf.ones_like(current_step)), - lambda:current_step.assign_add(1)) - #lambda:inc_current_step(current_step, "Apply Grads:"), - #lambda:inc_current_step(current_step, "Step:")) - - grads_and_vars_and_accums = [(gv[0],gv[1],accum_vars[i]) for i, gv in enumerate(grads_and_vars) if gv[0] is not None] - grads, tvars, accum_vars = list(zip(*grads_and_vars_and_accums)) - - (cgrads, _) = tf.clip_by_global_norm(grads, clip_norm=1.0) - - #accum_vars = update_accum_vars(accum_vars, apply_grads, cgrads, current_step) - accum_vars = tf.cond(apply_grads, - lambda: [accum_vars[i].assign(grad) for i, grad in enumerate(cgrads)], - lambda: [accum_vars[i].assign_add(grad) for i, grad in enumerate(cgrads)]) - - def applyGrads(accum_vars, current_step): - #tf.compat.v1.logging.info("\t\t APPLYING GRADIENTS....:", global_step) - return optimizer.apply_gradients(list(zip(accum_vars, tvars)), global_step=global_step) - - apply_step = tf.identity(tf.cast(tf.math.equal(current_step % accum_steps, 0), dtype=tf.bool), name="apply_step") - update_op = tf.cond(apply_step, lambda: applyGrads(accum_vars, current_step), lambda: tf.no_op()) - - new_global_step = tf.cond(apply_step, lambda: global_step+1, lambda: global_step) - new_global_step = tf.identity(new_global_step, name='global_step_update') - train_op = tf.group(update_op, [global_step.assign(new_global_step)]) - else : - grads = tf.gradients(ys=loss, xs=tvars) - - # This is how the model was pre-trained. - if fine_tuning : - (grads, _) = tf.clip_by_global_norm(grads, clip_norm=1.0) - else : - gdtypes = [grad.dtype for grad in grads]; - mgrads = [tf.cast(grad, tf.float32) for grad in grads] - (grads, _) = tf.clip_by_global_norm(mgrads, clip_norm=1.0) - grads = [tf.cast(grad, ddtype) for grad, ddtype in zip(grads, gdtypes)] - - train_op = optimizer.apply_gradients( - zip(grads, tvars), global_step=global_step, fine_tuning=fine_tuning) - - # Normally the global step update is done inside of `apply_gradients`. - # However, `AdamWeightDecayOptimizer` doesn't do this. But if you use - # a different optimizer, you should probably take this line out. - new_global_step = global_step + 1 - new_global_step = tf.identity(new_global_step, name='global_step_update') - train_op = tf.group(train_op, [global_step.assign(new_global_step)]) - - return train_op - - -class AdamWeightDecayOptimizer(tf.compat.v1.train.Optimizer): - """A basic Adam optimizer that includes "correct" L2 weight decay.""" - - def __init__(self, - learning_rate, - weight_decay_rate=0.0, - beta_1=0.9, - beta_2=0.999, - epsilon=1e-6, - exclude_from_weight_decay=None, - name="AdamWeightDecayOptimizer"): - """Constructs a AdamWeightDecayOptimizer.""" - super(AdamWeightDecayOptimizer, self).__init__(False, name) - - self.learning_rate = learning_rate - self.weight_decay_rate = weight_decay_rate - self.beta_1 = beta_1 - self.beta_2 = beta_2 - self.epsilon = epsilon - self.exclude_from_weight_decay = exclude_from_weight_decay - - def apply_gradients(self, grads_and_vars, fine_tuning=True, global_step=None, name=None): - """See base class.""" - assignments = [] - for (grad, param) in grads_and_vars: - if grad is None or param is None: - continue - - param_name = self._get_variable_name(param.name) - - m = tf.compat.v1.get_variable( - name=param_name + "/adam_m", - shape=param.shape.as_list(), - dtype=tf.float32, - trainable=False, - initializer=tf.compat.v1.zeros_initializer()) - v = tf.compat.v1.get_variable( - name=param_name + "/adam_v", - shape=param.shape.as_list(), - dtype=tf.float32, - trainable=False, - initializer=tf.compat.v1.zeros_initializer()) - - # Standard Adam update. - if fine_tuning : - next_m = ( - tf.multiply(self.beta_1, m) + tf.multiply(1.0 - self.beta_1, grad)) - next_v = ( - tf.multiply(self.beta_2, v) + tf.multiply(1.0 - self.beta_2, - tf.square(grad))) - else : - next_m = ( - bf.multiply(self.beta_1, m) + bf.multiply(1.0 - self.beta_1, grad)) - next_v = ( - bf.multiply(self.beta_2, v) + bf.multiply(1.0 - self.beta_2, - tf.square(grad))) - - update = next_m / (tf.sqrt(next_v) + self.epsilon) - - # Just adding the square of the weights to the loss function is *not* - # the correct way of using L2 regularization/weight decay with Adam, - # since that will interact with the m and v parameters in strange ways. - # - # Instead we want ot decay the weights in a manner that doesn't interact - # with the m/v parameters. This is equivalent to adding the square - # of the weights to the loss with plain (non-momentum) SGD. - if self._do_use_weight_decay(param_name): - if fine_tuning : - update += self.weight_decay_rate * param - else : - update += tf.cast(self.weight_decay_rate * param, update.dtype) - - if fine_tuning : - update_with_lr = self.learning_rate * update - - next_param = param - update_with_lr - - assignments.extend( - [param.assign(next_param), - m.assign(next_m), - v.assign(next_v)]) - else : - update_with_lr = tf.cast(self.learning_rate, update.dtype) * update - - next_param = tf.cast(param, update_with_lr.dtype) - update_with_lr - - - param, m, v = tf.cast(next_param, param.dtype), tf.cast(m, m.dtype), tf.cast(v, v.dtype) - - assignments.extend([param, m, v]) - return tf.group(*assignments, name=name) - - def _do_use_weight_decay(self, param_name): - """Whether to use L2 weight decay for `param_name`.""" - if not self.weight_decay_rate: - return False - if self.exclude_from_weight_decay: - for r in self.exclude_from_weight_decay: - if re.search(r, param_name) is not None: - return False - return True - - def _get_variable_name(self, param_name): - """Get the variable name from the tensor name.""" - m = re.match("^(.*):\\d+$", param_name) - if m is not None: - param_name = m.group(1) - return param_name diff --git a/examples/tensorflow/nlp/bert_large_squad/quantization/ptq/prepare_dataset.sh b/examples/tensorflow/nlp/bert_large_squad/quantization/ptq/prepare_dataset.sh deleted file mode 100644 index acae8ce944d..00000000000 --- a/examples/tensorflow/nlp/bert_large_squad/quantization/ptq/prepare_dataset.sh +++ /dev/null @@ -1,53 +0,0 @@ -#!/bin/bash -# set -x - -OUTPUT_DIR="./data" - -help() -{ - cat <<- EOF - Desc: Prepare bert dataset - -h --help help info - --output_dir Output data directory - default: './data' -EOF - exit 0 -} - -function main { - init_params "$@" - convert_dataset -} - -# init params -function init_params { - for var in "$@" - do - case $var in - --output_dir=*) - OUTPUT_DIR=$(echo $var |cut -f2 -d=) - ;; - -h|--help) help - ;; - *) - echo "Error: No such parameter: ${var}" - exit 1 - ;; - esac - done -} - -# convert dataset -function convert_dataset { - if [ ! -d ${OUTPUT_DIR} ]; then - echo '${OUTPUT_DIR} already exists, please check...' - fi - wget https://storage.googleapis.com/bert_models/2019_05_30/wwm_uncased_L-24_H-1024_A-16.zip - unzip wwm_uncased_L-24_H-1024_A-16.zip - wget https://rajpurkar.github.io/SQuAD-explorer/dataset/dev-v1.1.json -P wwm_uncased_L-24_H-1024_A-16 - mv wwm_uncased_L-24_H-1024_A-16 ${OUTPUT_DIR} - -} - -main "$@" - diff --git a/examples/tensorflow/nlp/bert_large_squad/quantization/ptq/prepare_model.sh b/examples/tensorflow/nlp/bert_large_squad/quantization/ptq/prepare_model.sh deleted file mode 100644 index c1d9a4ed702..00000000000 --- a/examples/tensorflow/nlp/bert_large_squad/quantization/ptq/prepare_model.sh +++ /dev/null @@ -1,52 +0,0 @@ -#!/bin/bash -# set -x - -OUTPUT_DIR="./model" - -help() -{ - cat <<- EOF - Desc: Prepare bert model - -h --help help info - --output_dir Output model directory - default: './model' -EOF - exit 0 -} - -function main { - init_params "$@" - convert_model -} - -# init params -function init_params { - for var in "$@" - do - case $var in - --output_dir=*) - OUTPUT_DIR=$(echo $var |cut -f2 -d=) - ;; - -h|--help) help - ;; - *) - echo "Error: No such parameter: ${var}" - exit 1 - ;; - esac - done -} - -# convert model -function convert_model { - if [ ! -d ${OUTPUT_DIR} ]; then - echo '${OUTPUT_DIR} already exists, please check...' - fi - wget https://storage.googleapis.com/intel-optimized-tensorflow/models/v1_8/bert_large_checkpoints.zip - unzip bert_large_checkpoints.zip - mv bert_large_checkpoints ${OUTPUT_DIR} - -} - -main "$@" - diff --git a/examples/tensorflow/nlp/bert_large_squad/quantization/ptq/run_benchmark.sh b/examples/tensorflow/nlp/bert_large_squad/quantization/ptq/run_benchmark.sh deleted file mode 100644 index 38b0dc3807c..00000000000 --- a/examples/tensorflow/nlp/bert_large_squad/quantization/ptq/run_benchmark.sh +++ /dev/null @@ -1,47 +0,0 @@ -#!/bin/bash -set -x - -function main { - - init_params "$@" - run_benchmark - -} - -# init params -function init_params { - iters=100 - for var in "$@" - do - case $var in - --config=*) - config=$(echo $var |cut -f2 -d=) - ;; - --input_model=*) - input_model=$(echo $var |cut -f2 -d=) - ;; - --mode=*) - mode=$(echo $var |cut -f2 -d=) - ;; - *) - echo "Error: No such parameter: ${var}" - exit 1 - ;; - esac - done - -} - - -# run_tuning -function run_benchmark { - - python tune_squad.py \ - --config=${config} \ - --input_model=${input_model} \ - --mode=${mode} \ - --benchmark \ - -} - -main "$@" diff --git a/examples/tensorflow/nlp/bert_large_squad/quantization/ptq/run_tuning.sh b/examples/tensorflow/nlp/bert_large_squad/quantization/ptq/run_tuning.sh deleted file mode 100644 index be090267e8a..00000000000 --- a/examples/tensorflow/nlp/bert_large_squad/quantization/ptq/run_tuning.sh +++ /dev/null @@ -1,42 +0,0 @@ -#!/bin/bash -# set -x - -function main { - - init_params "$@" - - run_tuning - -} - -# init params -function init_params { - - for var in "$@" - do - case $var in - --config=*) - config=$(echo $var |cut -f2 -d=) - ;; - --input_model=*) - input_model=$(echo "$var" |cut -f2 -d=) - ;; - --output_model=*) - output_model=$(echo "$var" |cut -f2 -d=) - ;; - esac - done - -} - -# run_tuning -function run_tuning { - python tune_squad.py \ - --input_model=${input_model} \ - --output_model=${output_model} \ - --config=${config} \ - --tune \ - -} - -main "$@" diff --git a/examples/tensorflow/nlp/bert_large_squad/quantization/ptq/tokenization.py b/examples/tensorflow/nlp/bert_large_squad/quantization/ptq/tokenization.py deleted file mode 100644 index 52c92adb81f..00000000000 --- a/examples/tensorflow/nlp/bert_large_squad/quantization/ptq/tokenization.py +++ /dev/null @@ -1,399 +0,0 @@ -# coding=utf-8 -# Copyright 2018 The Google AI Language Team Authors. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -"""Tokenization classes.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import collections -import re -import unicodedata -import six -import tensorflow as tf - - -def validate_case_matches_checkpoint(do_lower_case, init_checkpoint): - """Checks whether the casing config is consistent with the checkpoint name.""" - - # The casing has to be passed in by the user and there is no explicit check - # as to whether it matches the checkpoint. The casing information probably - # should have been stored in the bert_config.json file, but it's not, so - # we have to heuristically detect it to validate. - - if not init_checkpoint: - return - - m = re.match("^.*?([A-Za-z0-9_-]+)/bert_model.ckpt", init_checkpoint) - if m is None: - return - - model_name = m.group(1) - - lower_models = [ - "uncased_L-24_H-1024_A-16", "uncased_L-12_H-768_A-12", - "multilingual_L-12_H-768_A-12", "chinese_L-12_H-768_A-12" - ] - - cased_models = [ - "cased_L-12_H-768_A-12", "cased_L-24_H-1024_A-16", - "multi_cased_L-12_H-768_A-12" - ] - - is_bad_config = False - if model_name in lower_models and not do_lower_case: - is_bad_config = True - actual_flag = "False" - case_name = "lowercased" - opposite_flag = "True" - - if model_name in cased_models and do_lower_case: - is_bad_config = True - actual_flag = "True" - case_name = "cased" - opposite_flag = "False" - - if is_bad_config: - raise ValueError( - "You passed in `--do_lower_case=%s` with `--init_checkpoint=%s`. " - "However, `%s` seems to be a %s model, so you " - "should pass in `--do_lower_case=%s` so that the fine-tuning matches " - "how the model was pre-training. If this error is wrong, please " - "just comment out this check." % (actual_flag, init_checkpoint, - model_name, case_name, opposite_flag)) - - -def convert_to_unicode(text): - """Converts `text` to Unicode (if it's not already), assuming utf-8 input.""" - if six.PY3: - if isinstance(text, str): - return text - elif isinstance(text, bytes): - return text.decode("utf-8", "ignore") - else: - raise ValueError("Unsupported string type: %s" % (type(text))) - elif six.PY2: - if isinstance(text, str): - return text.decode("utf-8", "ignore") - elif isinstance(text, unicode): - return text - else: - raise ValueError("Unsupported string type: %s" % (type(text))) - else: - raise ValueError("Not running on Python2 or Python 3?") - - -def printable_text(text): - """Returns text encoded in a way suitable for print or `tf.logging`.""" - - # These functions want `str` for both Python2 and Python3, but in one case - # it's a Unicode string and in the other it's a byte string. - if six.PY3: - if isinstance(text, str): - return text - elif isinstance(text, bytes): - return text.decode("utf-8", "ignore") - else: - raise ValueError("Unsupported string type: %s" % (type(text))) - elif six.PY2: - if isinstance(text, str): - return text - elif isinstance(text, unicode): - return text.encode("utf-8") - else: - raise ValueError("Unsupported string type: %s" % (type(text))) - else: - raise ValueError("Not running on Python2 or Python 3?") - - -def load_vocab(vocab_file): - """Loads a vocabulary file into a dictionary.""" - vocab = collections.OrderedDict() - index = 0 - with tf.io.gfile.GFile(vocab_file, "r") as reader: - while True: - token = convert_to_unicode(reader.readline()) - if not token: - break - token = token.strip() - vocab[token] = index - index += 1 - return vocab - - -def convert_by_vocab(vocab, items): - """Converts a sequence of [tokens|ids] using the vocab.""" - output = [] - for item in items: - output.append(vocab[item]) - return output - - -def convert_tokens_to_ids(vocab, tokens): - return convert_by_vocab(vocab, tokens) - - -def convert_ids_to_tokens(inv_vocab, ids): - return convert_by_vocab(inv_vocab, ids) - - -def whitespace_tokenize(text): - """Runs basic whitespace cleaning and splitting on a piece of text.""" - text = text.strip() - if not text: - return [] - tokens = text.split() - return tokens - - -class FullTokenizer(object): - """Runs end-to-end tokenziation.""" - - def __init__(self, vocab_file, do_lower_case=True): - self.vocab = load_vocab(vocab_file) - self.inv_vocab = {v: k for k, v in self.vocab.items()} - self.basic_tokenizer = BasicTokenizer(do_lower_case=do_lower_case) - self.wordpiece_tokenizer = WordpieceTokenizer(vocab=self.vocab) - - def tokenize(self, text): - split_tokens = [] - for token in self.basic_tokenizer.tokenize(text): - for sub_token in self.wordpiece_tokenizer.tokenize(token): - split_tokens.append(sub_token) - - return split_tokens - - def convert_tokens_to_ids(self, tokens): - return convert_by_vocab(self.vocab, tokens) - - def convert_ids_to_tokens(self, ids): - return convert_by_vocab(self.inv_vocab, ids) - - -class BasicTokenizer(object): - """Runs basic tokenization (punctuation splitting, lower casing, etc.).""" - - def __init__(self, do_lower_case=True): - """Constructs a BasicTokenizer. - - Args: - do_lower_case: Whether to lower case the input. - """ - self.do_lower_case = do_lower_case - - def tokenize(self, text): - """Tokenizes a piece of text.""" - text = convert_to_unicode(text) - text = self._clean_text(text) - - # This was added on November 1st, 2018 for the multilingual and Chinese - # models. This is also applied to the English models now, but it doesn't - # matter since the English models were not trained on any Chinese data - # and generally don't have any Chinese data in them (there are Chinese - # characters in the vocabulary because Wikipedia does have some Chinese - # words in the English Wikipedia.). - text = self._tokenize_chinese_chars(text) - - orig_tokens = whitespace_tokenize(text) - split_tokens = [] - for token in orig_tokens: - if self.do_lower_case: - token = token.lower() - token = self._run_strip_accents(token) - split_tokens.extend(self._run_split_on_punc(token)) - - output_tokens = whitespace_tokenize(" ".join(split_tokens)) - return output_tokens - - def _run_strip_accents(self, text): - """Strips accents from a piece of text.""" - text = unicodedata.normalize("NFD", text) - output = [] - for char in text: - cat = unicodedata.category(char) - if cat == "Mn": - continue - output.append(char) - return "".join(output) - - def _run_split_on_punc(self, text): - """Splits punctuation on a piece of text.""" - chars = list(text) - i = 0 - start_new_word = True - output = [] - while i < len(chars): - char = chars[i] - if _is_punctuation(char): - output.append([char]) - start_new_word = True - else: - if start_new_word: - output.append([]) - start_new_word = False - output[-1].append(char) - i += 1 - - return ["".join(x) for x in output] - - def _tokenize_chinese_chars(self, text): - """Adds whitespace around any CJK character.""" - output = [] - for char in text: - cp = ord(char) - if self._is_chinese_char(cp): - output.append(" ") - output.append(char) - output.append(" ") - else: - output.append(char) - return "".join(output) - - def _is_chinese_char(self, cp): - """Checks whether CP is the codepoint of a CJK character.""" - # This defines a "chinese character" as anything in the CJK Unicode block: - # https://en.wikipedia.org/wiki/CJK_Unified_Ideographs_(Unicode_block) - # - # Note that the CJK Unicode block is NOT all Japanese and Korean characters, - # despite its name. The modern Korean Hangul alphabet is a different block, - # as is Japanese Hiragana and Katakana. Those alphabets are used to write - # space-separated words, so they are not treated specially and handled - # like the all of the other languages. - if ((cp >= 0x4E00 and cp <= 0x9FFF) or # - (cp >= 0x3400 and cp <= 0x4DBF) or # - (cp >= 0x20000 and cp <= 0x2A6DF) or # - (cp >= 0x2A700 and cp <= 0x2B73F) or # - (cp >= 0x2B740 and cp <= 0x2B81F) or # - (cp >= 0x2B820 and cp <= 0x2CEAF) or - (cp >= 0xF900 and cp <= 0xFAFF) or # - (cp >= 0x2F800 and cp <= 0x2FA1F)): # - return True - - return False - - def _clean_text(self, text): - """Performs invalid character removal and whitespace cleanup on text.""" - output = [] - for char in text: - cp = ord(char) - if cp == 0 or cp == 0xfffd or _is_control(char): - continue - if _is_whitespace(char): - output.append(" ") - else: - output.append(char) - return "".join(output) - - -class WordpieceTokenizer(object): - """Runs WordPiece tokenziation.""" - - def __init__(self, vocab, unk_token="[UNK]", max_input_chars_per_word=200): - self.vocab = vocab - self.unk_token = unk_token - self.max_input_chars_per_word = max_input_chars_per_word - - def tokenize(self, text): - """Tokenizes a piece of text into its word pieces. - - This uses a greedy longest-match-first algorithm to perform tokenization - using the given vocabulary. - - For example: - input = "unaffable" - output = ["un", "##aff", "##able"] - - Args: - text: A single token or whitespace separated tokens. This should have - already been passed through `BasicTokenizer. - - Returns: - A list of wordpiece tokens. - """ - - text = convert_to_unicode(text) - - output_tokens = [] - for token in whitespace_tokenize(text): - chars = list(token) - if len(chars) > self.max_input_chars_per_word: - output_tokens.append(self.unk_token) - continue - - is_bad = False - start = 0 - sub_tokens = [] - while start < len(chars): - end = len(chars) - cur_substr = None - while start < end: - substr = "".join(chars[start:end]) - if start > 0: - substr = "##" + substr - if substr in self.vocab: - cur_substr = substr - break - end -= 1 - if cur_substr is None: - is_bad = True - break - sub_tokens.append(cur_substr) - start = end - - if is_bad: - output_tokens.append(self.unk_token) - else: - output_tokens.extend(sub_tokens) - return output_tokens - - -def _is_whitespace(char): - """Checks whether `chars` is a whitespace character.""" - # \t, \n, and \r are technically contorl characters but we treat them - # as whitespace since they are generally considered as such. - if char == " " or char == "\t" or char == "\n" or char == "\r": - return True - cat = unicodedata.category(char) - if cat == "Zs": - return True - return False - - -def _is_control(char): - """Checks whether `chars` is a control character.""" - # These are technically control characters but we count them as whitespace - # characters. - if char == "\t" or char == "\n" or char == "\r": - return False - cat = unicodedata.category(char) - if cat in ("Cc", "Cf"): - return True - return False - - -def _is_punctuation(char): - """Checks whether `chars` is a punctuation character.""" - cp = ord(char) - # We treat all non-letter/number ASCII as punctuation. - # Characters such as "^", "$", and "`" are not in the Unicode - # Punctuation class but we treat them as punctuation anyways, for - # consistency. - if ((cp >= 33 and cp <= 47) or (cp >= 58 and cp <= 64) or - (cp >= 91 and cp <= 96) or (cp >= 123 and cp <= 126)): - return True - cat = unicodedata.category(char) - if cat.startswith("P"): - return True - return False diff --git a/examples/tensorflow/nlp/bert_large_squad/quantization/ptq/tune_squad.py b/examples/tensorflow/nlp/bert_large_squad/quantization/ptq/tune_squad.py deleted file mode 100644 index 30ab3f5fa99..00000000000 --- a/examples/tensorflow/nlp/bert_large_squad/quantization/ptq/tune_squad.py +++ /dev/null @@ -1,90 +0,0 @@ -#!/usr/bin/env python - -# coding=utf-8 -# Copyright 2018 The Google AI Language Team Authors. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -"""Run BERT on SQuAD 1.1 and SQuAD 2.0.""" - -import tensorflow as tf -import numpy as np - -flags = tf.compat.v1.flags -FLAGS = flags.FLAGS - -## Required parameters -flags.DEFINE_string( - 'input_model', None, 'Run inference with specified pb graph.') - -flags.DEFINE_string( - 'output_model', None, 'The output model of the quantized model.') - -flags.DEFINE_string( - 'mode', 'performance', 'define benchmark mode for accuracy or performance') - -flags.DEFINE_bool( - 'tune', False, 'whether to tune the model') - -flags.DEFINE_bool( - 'benchmark', False, 'whether to benchmark the model') - -flags.DEFINE_string( - 'config', 'bert.yaml', 'yaml configuration of the model') - -flags.DEFINE_bool( - 'strip_iterator', False, 'whether to strip the iterator of the model') - -def strip_iterator(graph_def): - from neural_compressor.adaptor.tf_utils.util import strip_unused_nodes - input_node_names = ['input_ids', 'input_mask', 'segment_ids'] - output_node_names = ['unstack'] - # create the placeholder and merge with the graph - with tf.compat.v1.Graph().as_default() as g: - input_ids = tf.compat.v1.placeholder(tf.int32, shape=(None,384), name="input_ids") - input_mask = tf.compat.v1.placeholder(tf.int32, shape=(None,384), name="input_mask") - segment_ids = tf.compat.v1.placeholder(tf.int32, shape=(None,384), name="segment_ids") - tf.import_graph_def(graph_def, name='') - - graph_def = g.as_graph_def() - # change the input from iterator to placeholder - for node in graph_def.node: - for idx, in_tensor in enumerate(node.input): - if 'IteratorGetNext:0' == in_tensor or 'IteratorGetNext' == in_tensor: - node.input[idx] = 'input_ids' - if 'IteratorGetNext:1' in in_tensor: - node.input[idx] = 'input_mask' - if 'IteratorGetNext:2' in in_tensor: - node.input[idx] = 'segment_ids' - - graph_def = strip_unused_nodes(graph_def, input_node_names, output_node_names) - return graph_def - -def main(_): - tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.INFO) - if FLAGS.benchmark: - from neural_compressor.experimental import Benchmark - evaluator = Benchmark(FLAGS.config) - evaluator.model = FLAGS.input_model - evaluator(FLAGS.mode) - - elif FLAGS.tune: - from neural_compressor.experimental import Quantization - quantizer = Quantization(FLAGS.config) - quantizer.model = FLAGS.input_model - q_model = quantizer.fit() - if FLAGS.strip_iterator: - q_model.graph_def = strip_iterator(q_model.graph_def) - q_model.save(FLAGS.output_model) - -if __name__ == "__main__": - tf.compat.v1.app.run() diff --git a/examples/tensorflow/nlp/bert_large_squad_model_zoo/quantization/ptq/README.md b/examples/tensorflow/nlp/bert_large_squad_model_zoo/quantization/ptq/README.md deleted file mode 100644 index ff954a36864..00000000000 --- a/examples/tensorflow/nlp/bert_large_squad_model_zoo/quantization/ptq/README.md +++ /dev/null @@ -1,84 +0,0 @@ -Step-by-Step -============ - -This document is used to list steps of reproducing TensorFlow Intel® Neural Compressor tuning result of Intel® Model Zoo bert large model on squad v1.1 task. -This example can run on Intel CPUs and GPUs. - - -## Prerequisite - -### 1. Installation -```shell -# Install Intel® Neural Compressor -pip install neural-compressor -``` -### 2. Install Intel Tensorflow -```python -pip install intel-tensorflow -``` - -### 3. Install Intel Extension for Tensorflow - -#### Quantizing the model on Intel GPU -Intel Extension for Tensorflow is mandatory to be installed for quantizing the model on Intel GPUs. - -```shell -pip install --upgrade intel-extension-for-tensorflow[gpu] -``` -For any more details, please follow the procedure in [install-gpu-drivers](https://github.com/intel-innersource/frameworks.ai.infrastructure.intel-extension-for-tensorflow.intel-extension-for-tensorflow/blob/master/docs/install/install_for_gpu.md#install-gpu-drivers) - -#### Quantizing the model on Intel CPU(Experimental) -Intel Extension for Tensorflow for Intel CPUs is experimental currently. It's not mandatory for quantizing the model on Intel CPUs. - -```shell -pip install --upgrade intel-extension-for-tensorflow[cpu] -``` - -### 4. Prepare Dataset -```shell -wget https://storage.googleapis.com/bert_models/2019_05_30/wwm_uncased_L-24_H-1024_A-16.zip -``` - -```shell -unzip wwm_uncased_L-24_H-1024_A-16.zip -``` - -```shell -wget https://rajpurkar.github.io/SQuAD-explorer/dataset/dev-v1.1.json -P wwm_uncased_L-24_H-1024_A-16 -``` -wwm_uncased_L-24_H-1024_A-16 folder will be located on your data path. - -#### Automatic dataset download -Run the `prepare_dataset.sh` script located in `examples/tensorflow/nlp/bert_large_squad/quantization/ptq`. - -Usage: -```shell -cd examples/tensorflow/nlp/bert_large_squad/quantization/ptq -bash prepare_dataset.sh --output_dir=./data -``` - -Then create the tf_record file and you need to config the tf_record path in yaml file. -```shell -python create_tf_record.py --vocab_file=data/vocab.txt --predict_file=data/dev-v1.1.json --output_file=./eval.tf_record -``` - -### 5. Prepare Pretrained model -```shell -wget https://storage.googleapis.com/intel-optimized-tensorflow/models/v2_7_0/fp32_bert_squad.pb -``` - -## Write Yaml config file -In examples directory, there is a bert.yaml for tuning the model on Intel CPUs. The 'framework' in the yaml is set to 'tensorflow'. If running this example on Intel GPUs, the 'framework' should be set to 'tensorflow_itex' and the device in yaml file should be set to 'gpu'. The bert_itex.yaml is prepared for the GPU case. We could remove most of items and only keep mandatory item for tuning. We also implement a calibration dataloader and have evaluation field for creation of evaluation function at internal neural_compressor. - -## Run Command - Please make sure below command should be executed with the same Tensorflow runtime version as above step. - -### Run Tuning - ```shell - python tune_squad.py --config=./bert.yaml --input_model=./bert_fp32.pb --output_model=./int8.pb --tune - ``` - -### Run Benchmark - ```shell - python tune_squad.py --config=./bert.yaml --input_model=./int8.pb --benchmark - ``` \ No newline at end of file diff --git a/examples/tensorflow/nlp/bert_large_squad_model_zoo/quantization/ptq/bert.yaml b/examples/tensorflow/nlp/bert_large_squad_model_zoo/quantization/ptq/bert.yaml deleted file mode 100644 index 5a9080d6f15..00000000000 --- a/examples/tensorflow/nlp/bert_large_squad_model_zoo/quantization/ptq/bert.yaml +++ /dev/null @@ -1,71 +0,0 @@ -# -# Copyright (c) 2021 Intel Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -version: 1.0 - -model: - name: bert - framework: tensorflow - inputs: input_ids, input_mask, segment_ids - outputs: start_logits, end_logits - -device: cpu # optional. default value is cpu, other value is gpu. - -evaluation: - accuracy: - metric: - SquadF1: - dataloader: - dataset: - mzbert: - root: /path/to/eval.tf_record - label_file: /path/to/dev-v1.1.json - batch_size: 64 - postprocess: - transform: - SquadV1ModelZoo: - label_file: /path/to/dev-v1.1.json - vocab_file: /path/to/vocab.txt - performance: - iteration: 10 - configs: - num_of_instance: 4 - cores_per_instance: 7 - dataloader: - dataset: - mzbert: - root: /path/to/eval.tf_record - label_file: /path/to/dev-v1.1.json - batch_size: 64 - -quantization: - calibration: - sampling_size: 500 - dataloader: - dataset: - mzbert: - root: /path/to/eval.tf_record - label_file: /path/to/dev-v1.1.json - batch_size: 64 - model_wise: - weight: - granularity: per_channel -tuning: - accuracy_criterion: - relative: 0.01 - exit_policy: - timeout: 0 - max_trials: 100 - random_seed: 9527 diff --git a/examples/tensorflow/nlp/bert_large_squad_model_zoo/quantization/ptq/bert_itex.yaml b/examples/tensorflow/nlp/bert_large_squad_model_zoo/quantization/ptq/bert_itex.yaml deleted file mode 100644 index 36730dbfa1a..00000000000 --- a/examples/tensorflow/nlp/bert_large_squad_model_zoo/quantization/ptq/bert_itex.yaml +++ /dev/null @@ -1,69 +0,0 @@ -# -# Copyright (c) 2021 Intel Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -model: - name: bert - framework: tensorflow_itex - inputs: input_ids, input_mask, segment_ids - outputs: start_logits, end_logits - -device: gpu # optional. set cpu if installed intel-extension-for-tensorflow[cpu], set gpu if installed intel-extension-for-tensorflow[gpu]. - -evaluation: - accuracy: - metric: - SquadF1: - dataloader: - dataset: - mzbert: - root: /path/to/eval.tf_record - label_file: /path/to/dev-v1.1.json - batch_size: 64 - postprocess: - transform: - SquadV1ModelZoo: - label_file: /path/to/dev-v1.1.json - vocab_file: /path/to/vocab.txt - performance: - iteration: 10 - configs: - num_of_instance: 4 - cores_per_instance: 7 - dataloader: - dataset: - mzbert: - root: /path/to/eval.tf_record - label_file: /path/to/dev-v1.1.json - batch_size: 64 - -quantization: - calibration: - sampling_size: 500 - dataloader: - dataset: - mzbert: - root: /path/to/eval.tf_record - label_file: /path/to/dev-v1.1.json - batch_size: 64 - model_wise: - weight: - granularity: per_channel -tuning: - accuracy_criterion: - relative: 0.01 - exit_policy: - timeout: 0 - max_trials: 100 - random_seed: 9527 \ No newline at end of file diff --git a/examples/tensorflow/nlp/bert_large_squad_model_zoo/quantization/ptq/create_pretraining_data.py b/examples/tensorflow/nlp/bert_large_squad_model_zoo/quantization/ptq/create_pretraining_data.py deleted file mode 100644 index 3b3dbd4bc59..00000000000 --- a/examples/tensorflow/nlp/bert_large_squad_model_zoo/quantization/ptq/create_pretraining_data.py +++ /dev/null @@ -1,472 +0,0 @@ -# coding=utf-8 -# Copyright 2018 The Google AI Language Team Authors. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -"""Create masked LM/next sentence masked_lm TF examples for BERT.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import collections -import random -import tokenization -import tensorflow as tf - -from absl import app -#from absl import flags -from absl import logging -flags = tf.compat.v1.flags - -FLAGS = flags.FLAGS - -flags.DEFINE_string("input_file", None, - "Input raw text file (or comma-separated list of files).") - -flags.DEFINE_string( - "output_file", None, - "Output TF example file (or comma-separated list of files).") - -flags.DEFINE_string("vocab_file", None, - "The vocabulary file that the BERT model was trained on.") - -flags.DEFINE_bool( - "do_lower_case", True, - "Whether to lower case the input text. Should be True for uncased " - "models and False for cased models.") - -flags.DEFINE_bool( - "do_whole_word_mask", False, - "Whether to use whole word masking rather than per-WordPiece masking.") - -flags.DEFINE_integer("max_seq_length", 128, "Maximum sequence length.") - -flags.DEFINE_integer("max_predictions_per_seq", 20, - "Maximum number of masked LM predictions per sequence.") - -flags.DEFINE_integer("random_seed", 12345, "Random seed for data generation.") - -flags.DEFINE_integer( - "dupe_factor", 10, - "Number of times to duplicate the input data (with different masks).") - -flags.DEFINE_float("masked_lm_prob", 0.15, "Masked LM probability.") - -flags.DEFINE_float( - "short_seq_prob", 0.1, - "Probability of creating sequences which are shorter than the " - "maximum length.") - - -class TrainingInstance(object): - """A single training instance (sentence pair).""" - - def __init__(self, tokens, segment_ids, masked_lm_positions, masked_lm_labels, - is_random_next): - self.tokens = tokens - self.segment_ids = segment_ids - self.is_random_next = is_random_next - self.masked_lm_positions = masked_lm_positions - self.masked_lm_labels = masked_lm_labels - - def __str__(self): - s = "" - s += "tokens: %s\n" % (" ".join( - [tokenization.printable_text(x) for x in self.tokens])) - s += "segment_ids: %s\n" % (" ".join([str(x) for x in self.segment_ids])) - s += "is_random_next: %s\n" % self.is_random_next - s += "masked_lm_positions: %s\n" % (" ".join( - [str(x) for x in self.masked_lm_positions])) - s += "masked_lm_labels: %s\n" % (" ".join( - [tokenization.printable_text(x) for x in self.masked_lm_labels])) - s += "\n" - return s - - def __repr__(self): - return self.__str__() - - -def write_instance_to_example_files(instances, tokenizer, max_seq_length, - max_predictions_per_seq, output_files): - """Create TF example files from `TrainingInstance`s.""" - writers = [] - for output_file in output_files: - writers.append(tf.io.TFRecordWriter(output_file)) - - writer_index = 0 - - total_written = 0 - for (inst_index, instance) in enumerate(instances): - input_ids = tokenizer.convert_tokens_to_ids(instance.tokens) - input_mask = [1] * len(input_ids) - segment_ids = list(instance.segment_ids) - assert len(input_ids) <= max_seq_length - - while len(input_ids) < max_seq_length: - input_ids.append(0) - input_mask.append(0) - segment_ids.append(0) - - assert len(input_ids) == max_seq_length - assert len(input_mask) == max_seq_length - assert len(segment_ids) == max_seq_length - - masked_lm_positions = list(instance.masked_lm_positions) - masked_lm_ids = tokenizer.convert_tokens_to_ids(instance.masked_lm_labels) - masked_lm_weights = [1.0] * len(masked_lm_ids) - - while len(masked_lm_positions) < max_predictions_per_seq: - masked_lm_positions.append(0) - masked_lm_ids.append(0) - masked_lm_weights.append(0.0) - - next_sentence_label = 1 if instance.is_random_next else 0 - - features = collections.OrderedDict() - features["input_ids"] = create_int_feature(input_ids) - features["input_mask"] = create_int_feature(input_mask) - features["segment_ids"] = create_int_feature(segment_ids) - features["masked_lm_positions"] = create_int_feature(masked_lm_positions) - features["masked_lm_ids"] = create_int_feature(masked_lm_ids) - features["masked_lm_weights"] = create_float_feature(masked_lm_weights) - features["next_sentence_labels"] = create_int_feature([next_sentence_label]) - - tf_example = tf.train.Example(features=tf.train.Features(feature=features)) - - writers[writer_index].write(tf_example.SerializeToString()) - writer_index = (writer_index + 1) % len(writers) - - total_written += 1 - - if inst_index < 20: - tf.compat.v1.logging.info("*** Example ***") - tf.compat.v1.logging.info("tokens: %s" % " ".join( - [tokenization.printable_text(x) for x in instance.tokens])) - - for feature_name in features.keys(): - feature = features[feature_name] - values = [] - if feature.int64_list.value: - values = feature.int64_list.value - elif feature.float_list.value: - values = feature.float_list.value - tf.compat.v1.logging.info( - "%s: %s" % (feature_name, " ".join([str(x) for x in values]))) - - for writer in writers: - writer.close() - - tf.compat.v1.logging.info("Wrote %d total instances", total_written) - - -def create_int_feature(values): - feature = tf.train.Feature(int64_list=tf.train.Int64List(value=list(values))) - return feature - - -def create_float_feature(values): - feature = tf.train.Feature(float_list=tf.train.FloatList(value=list(values))) - return feature - - -def create_training_instances(input_files, tokenizer, max_seq_length, - dupe_factor, short_seq_prob, masked_lm_prob, - max_predictions_per_seq, rng): - """Create `TrainingInstance`s from raw text.""" - all_documents = [[]] - - # Input file format: - # (1) One sentence per line. These should ideally be actual sentences, not - # entire paragraphs or arbitrary spans of text. (Because we use the - # sentence boundaries for the "next sentence prediction" task). - # (2) Blank lines between documents. Document boundaries are needed so - # that the "next sentence prediction" task doesn't span between documents. - for input_file in input_files: - with tf.io.gfile.GFile(input_file, "r") as reader: - while True: - line = tokenization.convert_to_unicode(reader.readline()) - if not line: - break - line = line.strip() - - # Empty lines are used as document delimiters - if not line: - all_documents.append([]) - tokens = tokenizer.tokenize(line) - if tokens: - all_documents[-1].append(tokens) - - # Remove empty documents - all_documents = [x for x in all_documents if x] - rng.shuffle(all_documents) - - vocab_words = list(tokenizer.vocab.keys()) - instances = [] - for _ in range(dupe_factor): - for document_index in range(len(all_documents)): - instances.extend( - create_instances_from_document( - all_documents, document_index, max_seq_length, short_seq_prob, - masked_lm_prob, max_predictions_per_seq, vocab_words, rng)) - - rng.shuffle(instances) - return instances - - -def create_instances_from_document( - all_documents, document_index, max_seq_length, short_seq_prob, - masked_lm_prob, max_predictions_per_seq, vocab_words, rng): - """Creates `TrainingInstance`s for a single document.""" - document = all_documents[document_index] - - # Account for [CLS], [SEP], [SEP] - max_num_tokens = max_seq_length - 3 - - # We *usually* want to fill up the entire sequence since we are padding - # to `max_seq_length` anyways, so short sequences are generally wasted - # computation. However, we *sometimes* - # (i.e., short_seq_prob == 0.1 == 10% of the time) want to use shorter - # sequences to minimize the mismatch between pre-training and fine-tuning. - # The `target_seq_length` is just a rough target however, whereas - # `max_seq_length` is a hard limit. - target_seq_length = max_num_tokens - if rng.random() < short_seq_prob: - target_seq_length = rng.randint(2, max_num_tokens) - - # We DON'T just concatenate all of the tokens from a document into a long - # sequence and choose an arbitrary split point because this would make the - # next sentence prediction task too easy. Instead, we split the input into - # segments "A" and "B" based on the actual "sentences" provided by the user - # input. - instances = [] - current_chunk = [] - current_length = 0 - i = 0 - while i < len(document): - segment = document[i] - current_chunk.append(segment) - current_length += len(segment) - if i == len(document) - 1 or current_length >= target_seq_length: - if current_chunk: - # `a_end` is how many segments from `current_chunk` go into the `A` - # (first) sentence. - a_end = 1 - if len(current_chunk) >= 2: - a_end = rng.randint(1, len(current_chunk) - 1) - - tokens_a = [] - for j in range(a_end): - tokens_a.extend(current_chunk[j]) - - tokens_b = [] - # Random next - is_random_next = False - if len(current_chunk) == 1 or rng.random() < 0.5: - is_random_next = True - target_b_length = target_seq_length - len(tokens_a) - - # This should rarely go for more than one iteration for large - # corpora. However, just to be careful, we try to make sure that - # the random document is not the same as the document - # we're processing. - for _ in range(10): - random_document_index = rng.randint(0, len(all_documents) - 1) - if random_document_index != document_index: - break - - random_document = all_documents[random_document_index] - random_start = rng.randint(0, len(random_document) - 1) - for j in range(random_start, len(random_document)): - tokens_b.extend(random_document[j]) - if len(tokens_b) >= target_b_length: - break - # We didn't actually use these segments so we "put them back" so - # they don't go to waste. - num_unused_segments = len(current_chunk) - a_end - i -= num_unused_segments - # Actual next - else: - is_random_next = False - for j in range(a_end, len(current_chunk)): - tokens_b.extend(current_chunk[j]) - truncate_seq_pair(tokens_a, tokens_b, max_num_tokens, rng) - - assert len(tokens_a) >= 1 - assert len(tokens_b) >= 1 - - tokens = [] - segment_ids = [] - tokens.append("[CLS]") - segment_ids.append(0) - for token in tokens_a: - tokens.append(token) - segment_ids.append(0) - - tokens.append("[SEP]") - segment_ids.append(0) - - for token in tokens_b: - tokens.append(token) - segment_ids.append(1) - tokens.append("[SEP]") - segment_ids.append(1) - - (tokens, masked_lm_positions, - masked_lm_labels) = create_masked_lm_predictions( - tokens, masked_lm_prob, max_predictions_per_seq, vocab_words, rng) - instance = TrainingInstance( - tokens=tokens, - segment_ids=segment_ids, - is_random_next=is_random_next, - masked_lm_positions=masked_lm_positions, - masked_lm_labels=masked_lm_labels) - instances.append(instance) - current_chunk = [] - current_length = 0 - i += 1 - - return instances - - -MaskedLmInstance = collections.namedtuple("MaskedLmInstance", - ["index", "label"]) - - -def create_masked_lm_predictions(tokens, masked_lm_prob, - max_predictions_per_seq, vocab_words, rng): - """Creates the predictions for the masked LM objective.""" - - cand_indexes = [] - for (i, token) in enumerate(tokens): - if token == "[CLS]" or token == "[SEP]": - continue - # Whole Word Masking means that if we mask all of the wordpieces - # corresponding to an original word. When a word has been split into - # WordPieces, the first token does not have any marker and any subsequence - # tokens are prefixed with ##. So whenever we see the ## token, we - # append it to the previous set of word indexes. - # - # Note that Whole Word Masking does *not* change the training code - # at all -- we still predict each WordPiece independently, softmaxed - # over the entire vocabulary. - if (FLAGS.do_whole_word_mask and len(cand_indexes) >= 1 and - token.startswith("##")): - cand_indexes[-1].append(i) - else: - cand_indexes.append([i]) - - rng.shuffle(cand_indexes) - - output_tokens = list(tokens) - - num_to_predict = min(max_predictions_per_seq, - max(1, int(round(len(tokens) * masked_lm_prob)))) - - masked_lms = [] - covered_indexes = set() - for index_set in cand_indexes: - if len(masked_lms) >= num_to_predict: - break - # If adding a whole-word mask would exceed the maximum number of - # predictions, then just skip this candidate. - if len(masked_lms) + len(index_set) > num_to_predict: - continue - is_any_index_covered = False - for index in index_set: - if index in covered_indexes: - is_any_index_covered = True - break - if is_any_index_covered: - continue - for index in index_set: - covered_indexes.add(index) - - masked_token = None - # 80% of the time, replace with [MASK] - if rng.random() < 0.8: - masked_token = "[MASK]" - else: - # 10% of the time, keep original - if rng.random() < 0.5: - masked_token = tokens[index] - # 10% of the time, replace with random word - else: - masked_token = vocab_words[rng.randint(0, len(vocab_words) - 1)] - - output_tokens[index] = masked_token - - masked_lms.append(MaskedLmInstance(index=index, label=tokens[index])) - assert len(masked_lms) <= num_to_predict - masked_lms = sorted(masked_lms, key=lambda x: x.index) - - masked_lm_positions = [] - masked_lm_labels = [] - for p in masked_lms: - masked_lm_positions.append(p.index) - masked_lm_labels.append(p.label) - - return (output_tokens, masked_lm_positions, masked_lm_labels) - - -def truncate_seq_pair(tokens_a, tokens_b, max_num_tokens, rng): - """Truncates a pair of sequences to a maximum sequence length.""" - while True: - total_length = len(tokens_a) + len(tokens_b) - if total_length <= max_num_tokens: - break - - trunc_tokens = tokens_a if len(tokens_a) > len(tokens_b) else tokens_b - assert len(trunc_tokens) >= 1 - - # We want to sometimes truncate from the front and sometimes from the - # back to add more randomness and avoid biases. - if rng.random() < 0.5: - del trunc_tokens[0] - else: - trunc_tokens.pop() - - -def main(_): - tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.INFO) - - tokenizer = tokenization.FullTokenizer( - vocab_file=FLAGS.vocab_file, do_lower_case=FLAGS.do_lower_case) - - input_files = [] - for input_pattern in FLAGS.input_file.split(","): - input_files.extend(tf.io.gfile.glob(input_pattern)) - - tf.compat.v1.logging.info("*** Reading from input files ***") - for input_file in input_files: - tf.compat.v1.logging.info(" %s", input_file) - - rng = random.Random(FLAGS.random_seed) - instances = create_training_instances( - input_files, tokenizer, FLAGS.max_seq_length, FLAGS.dupe_factor, - FLAGS.short_seq_prob, FLAGS.masked_lm_prob, FLAGS.max_predictions_per_seq, - rng) - - output_files = FLAGS.output_file.split(",") - tf.compat.v1.logging.info("*** Writing to output files ***") - for output_file in output_files: - tf.compat.v1.logging.info(" %s", output_file) - - write_instance_to_example_files(instances, tokenizer, FLAGS.max_seq_length, - FLAGS.max_predictions_per_seq, output_files) - - -if __name__ == "__main__": - flags.mark_flag_as_required("input_file") - flags.mark_flag_as_required("output_file") - flags.mark_flag_as_required("vocab_file") - tf.compat.v1.app.run() diff --git a/examples/tensorflow/nlp/bert_large_squad_model_zoo/quantization/ptq/create_tf_record.py b/examples/tensorflow/nlp/bert_large_squad_model_zoo/quantization/ptq/create_tf_record.py deleted file mode 100644 index 9dcc7a48994..00000000000 --- a/examples/tensorflow/nlp/bert_large_squad_model_zoo/quantization/ptq/create_tf_record.py +++ /dev/null @@ -1,508 +0,0 @@ -#!/usr/bin/env python - -# coding=utf-8 -# Copyright 2018 The Google AI Language Team Authors. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -"""generate bert dataset""" - -import collections -import json -import os -import tokenization -import six -import tensorflow as tf - -from absl import app -#from absl import flags -from absl import logging - -flags = tf.compat.v1.flags -FLAGS = flags.FLAGS - -## Required parameters -flags.DEFINE_string("vocab_file", None, - "The vocabulary file that the BERT model was trained on.") - -flags.DEFINE_string( - "predict_file", None, - "SQuAD json for predictions. E.g., dev-v1.1.json or test-v1.1.json") - -flags.DEFINE_string( - "output_file", None, "The output tf_record for usage.") - -class SquadExample(object): - """A single training/test example for simple sequence classification. - - For examples without an answer, the start and end position are -1. - """ - - def __init__(self, - qas_id, - question_text, - doc_tokens, - orig_answer_text=None, - start_position=None, - end_position=None, - is_impossible=False): - self.qas_id = qas_id - self.question_text = question_text - self.doc_tokens = doc_tokens - self.orig_answer_text = orig_answer_text - self.start_position = start_position - self.end_position = end_position - self.is_impossible = is_impossible - #self.startpb = 0 - - def __str__(self): - return self.__repr__() - - def __repr__(self): - s = "" - s += "qas_id: %s" % (tokenization.printable_text(self.qas_id)) - s += ", question_text: %s" % ( - tokenization.printable_text(self.question_text)) - s += ", doc_tokens: [%s]" % (" ".join(self.doc_tokens)) - if self.start_position: - s += ", start_position: %d" % (self.start_position) - if self.start_position: - s += ", end_position: %d" % (self.end_position) - if self.start_position: - s += ", is_impossible: %r" % (self.is_impossible) - return s - - -class InputFeatures(object): - """A single set of features of data.""" - - def __init__(self, - unique_id, - example_index, - doc_span_index, - tokens, - token_to_orig_map, - token_is_max_context, - input_ids, - input_mask, - segment_ids, - start_position=None, - end_position=None, - is_impossible=None): - self.unique_id = unique_id - self.example_index = example_index - self.doc_span_index = doc_span_index - self.tokens = tokens - self.token_to_orig_map = token_to_orig_map - self.token_is_max_context = token_is_max_context - self.input_ids = input_ids - self.input_mask = input_mask - self.segment_ids = segment_ids - self.start_position = start_position - self.end_position = end_position - self.is_impossible = is_impossible - - -def read_squad_examples(input_file, is_training): - """Read a SQuAD json file into a list of SquadExample.""" - with tf.io.gfile.GFile(input_file, "r") as reader: - input_data = json.load(reader)["data"] - - def is_whitespace(c): - if c == " " or c == "\t" or c == "\r" or c == "\n" or ord(c) == 0x202F: - return True - return False - - examples = [] - for entry in input_data: - for paragraph in entry["paragraphs"]: - paragraph_text = paragraph["context"] - doc_tokens = [] - char_to_word_offset = [] - prev_is_whitespace = True - for c in paragraph_text: - if is_whitespace(c): - prev_is_whitespace = True - else: - if prev_is_whitespace: - doc_tokens.append(c) - else: - doc_tokens[-1] += c - prev_is_whitespace = False - char_to_word_offset.append(len(doc_tokens) - 1) - - for qa in paragraph["qas"]: - qas_id = qa["id"] - question_text = qa["question"] - start_position = None - end_position = None - orig_answer_text = None - is_impossible = False - if is_training: - - if FLAGS.version_2_with_negative: - is_impossible = qa["is_impossible"] - if (len(qa["answers"]) != 1) and (not is_impossible): - raise ValueError( - "For training, each question should have exactly 1 answer.") - if not is_impossible: - answer = qa["answers"][0] - orig_answer_text = answer["text"] - answer_offset = answer["answer_start"] - answer_length = len(orig_answer_text) - start_position = char_to_word_offset[answer_offset] - end_position = char_to_word_offset[answer_offset + answer_length - - 1] - # Only add answers where the text can be exactly recovered from the - # document. If this CAN'T happen it's likely due to weird Unicode - # stuff so we will just skip the example. - # - # Note that this means for training mode, every example is NOT - # guaranteed to be preserved. - actual_text = " ".join( - doc_tokens[start_position:(end_position + 1)]) - cleaned_answer_text = " ".join( - tokenization.whitespace_tokenize(orig_answer_text)) - if actual_text.find(cleaned_answer_text) == -1: - tf.compat.v1.logging.warning("Could not find answer: '%s' vs. '%s'", - actual_text, cleaned_answer_text) - continue - else: - start_position = -1 - end_position = -1 - orig_answer_text = "" - - example = SquadExample( - qas_id=qas_id, - question_text=question_text, - doc_tokens=doc_tokens, - orig_answer_text=orig_answer_text, - start_position=start_position, - end_position=end_position, - is_impossible=is_impossible) - examples.append(example) - - return examples - - -def convert_examples_to_features(examples, tokenizer, max_seq_length, - doc_stride, max_query_length, is_training, - output_fn): - """Loads a data file into a list of `InputBatch`s.""" - - unique_id = 1000000000 - - for (example_index, example) in enumerate(examples): - query_tokens = tokenizer.tokenize(example.question_text) - - if len(query_tokens) > max_query_length: - query_tokens = query_tokens[0:max_query_length] - - tok_to_orig_index = [] - orig_to_tok_index = [] - all_doc_tokens = [] - for (i, token) in enumerate(example.doc_tokens): - orig_to_tok_index.append(len(all_doc_tokens)) - sub_tokens = tokenizer.tokenize(token) - for sub_token in sub_tokens: - tok_to_orig_index.append(i) - all_doc_tokens.append(sub_token) - - tok_start_position = None - tok_end_position = None - if is_training and example.is_impossible: - tok_start_position = -1 - tok_end_position = -1 - if is_training and not example.is_impossible: - tok_start_position = orig_to_tok_index[example.start_position] - if example.end_position < len(example.doc_tokens) - 1: - tok_end_position = orig_to_tok_index[example.end_position + 1] - 1 - else: - tok_end_position = len(all_doc_tokens) - 1 - (tok_start_position, tok_end_position) = _improve_answer_span( - all_doc_tokens, tok_start_position, tok_end_position, tokenizer, - example.orig_answer_text) - - # The -3 accounts for [CLS], [SEP] and [SEP] - max_tokens_for_doc = max_seq_length - len(query_tokens) - 3 - - # We can have documents that are longer than the maximum sequence length. - # To deal with this we do a sliding window approach, where we take chunks - # of the up to our max length with a stride of `doc_stride`. - _DocSpan = collections.namedtuple( # pylint: disable=invalid-name - "DocSpan", ["start", "length"]) - doc_spans = [] - start_offset = 0 - while start_offset < len(all_doc_tokens): - length = len(all_doc_tokens) - start_offset - if length > max_tokens_for_doc: - length = max_tokens_for_doc - doc_spans.append(_DocSpan(start=start_offset, length=length)) - if start_offset + length == len(all_doc_tokens): - break - start_offset += min(length, doc_stride) - - for (doc_span_index, doc_span) in enumerate(doc_spans): - tokens = [] - token_to_orig_map = {} - token_is_max_context = {} - segment_ids = [] - tokens.append("[CLS]") - segment_ids.append(0) - for token in query_tokens: - tokens.append(token) - segment_ids.append(0) - tokens.append("[SEP]") - segment_ids.append(0) - - for i in range(doc_span.length): - split_token_index = doc_span.start + i - token_to_orig_map[len(tokens)] = tok_to_orig_index[split_token_index] - - is_max_context = _check_is_max_context(doc_spans, doc_span_index, - split_token_index) - token_is_max_context[len(tokens)] = is_max_context - tokens.append(all_doc_tokens[split_token_index]) - segment_ids.append(1) - tokens.append("[SEP]") - segment_ids.append(1) - - input_ids = tokenizer.convert_tokens_to_ids(tokens) - - # The mask has 1 for real tokens and 0 for padding tokens. Only real - # tokens are attended to. - input_mask = [1] * len(input_ids) - - # Zero-pad up to the sequence length. - while len(input_ids) < max_seq_length: - input_ids.append(0) - input_mask.append(0) - segment_ids.append(0) - - assert len(input_ids) == max_seq_length - assert len(input_mask) == max_seq_length - assert len(segment_ids) == max_seq_length - - start_position = None - end_position = None - if is_training and not example.is_impossible: - # For training, if our document chunk does not contain an annotation - # we throw it out, since there is nothing to predict. - doc_start = doc_span.start - doc_end = doc_span.start + doc_span.length - 1 - out_of_span = False - if not (tok_start_position >= doc_start and - tok_end_position <= doc_end): - out_of_span = True - if out_of_span: - start_position = 0 - end_position = 0 - else: - doc_offset = len(query_tokens) + 2 - start_position = tok_start_position - doc_start + doc_offset - end_position = tok_end_position - doc_start + doc_offset - - if is_training and example.is_impossible: - start_position = 0 - end_position = 0 - - if example_index < 1: - tf.compat.v1.logging.info("*** Example ***") - tf.compat.v1.logging.info("unique_id: %s" % (unique_id)) - tf.compat.v1.logging.info("example_index: %s" % (example_index)) - tf.compat.v1.logging.info("doc_span_index: %s" % (doc_span_index)) - tf.compat.v1.logging.info("tokens: %s" % " ".join( - [tokenization.printable_text(x) for x in tokens])) - tf.compat.v1.logging.info("token_to_orig_map: %s" % " ".join( - ["%d:%d" % (x, y) for (x, y) in six.iteritems(token_to_orig_map)])) - tf.compat.v1.logging.info("token_is_max_context: %s" % " ".join([ - "%d:%s" % (x, y) for (x, y) in six.iteritems(token_is_max_context) - ])) - tf.compat.v1.logging.info("input_ids: %s" % " ".join([str(x) for x in input_ids])) - tf.compat.v1.logging.info( - "input_mask: %s" % " ".join([str(x) for x in input_mask])) - tf.compat.v1.logging.info( - "segment_ids: %s" % " ".join([str(x) for x in segment_ids])) - if is_training and example.is_impossible: - tf.compat.v1.logging.info("impossible example") - if is_training and not example.is_impossible: - answer_text = " ".join(tokens[start_position:(end_position + 1)]) - tf.compat.v1.logging.info("start_position: %d" % (start_position)) - tf.compat.v1.logging.info("end_position: %d" % (end_position)) - tf.compat.v1.logging.info( - "answer: %s" % (tokenization.printable_text(answer_text))) - - feature = InputFeatures( - unique_id=unique_id, - example_index=example_index, - doc_span_index=doc_span_index, - tokens=tokens, - token_to_orig_map=token_to_orig_map, - token_is_max_context=token_is_max_context, - input_ids=input_ids, - input_mask=input_mask, - segment_ids=segment_ids, - start_position=start_position, - end_position=end_position, - is_impossible=example.is_impossible) - - # Run callback - output_fn(feature) - - unique_id += 1 - - -def _improve_answer_span(doc_tokens, input_start, input_end, tokenizer, - orig_answer_text): - """Returns tokenized answer spans that better match the annotated answer.""" - - # The SQuAD annotations are character based. We first project them to - # whitespace-tokenized words. But then after WordPiece tokenization, we can - # often find a "better match". For example: - # - # Question: What year was John Smith born? - # Context: The leader was John Smith (1895-1943). - # Answer: 1895 - # - # The original whitespace-tokenized answer will be "(1895-1943).". However - # after tokenization, our tokens will be "( 1895 - 1943 ) .". So we can match - # the exact answer, 1895. - # - # However, this is not always possible. Consider the following: - # - # Question: What country is the top exporter of electornics? - # Context: The Japanese electronics industry is the lagest in the world. - # Answer: Japan - # - # In this case, the annotator chose "Japan" as a character sub-span of - # the word "Japanese". Since our WordPiece tokenizer does not split - # "Japanese", we just use "Japanese" as the annotation. This is fairly rare - # in SQuAD, but does happen. - tok_answer_text = " ".join(tokenizer.tokenize(orig_answer_text)) - - for new_start in range(input_start, input_end + 1): - for new_end in range(input_end, new_start - 1, -1): - text_span = " ".join(doc_tokens[new_start:(new_end + 1)]) - if text_span == tok_answer_text: - return (new_start, new_end) - - return (input_start, input_end) - - -def _check_is_max_context(doc_spans, cur_span_index, position): - """Check if this is the 'max context' doc span for the token.""" - - # Because of the sliding window approach taken to scoring documents, a single - # token can appear in multiple documents. E.g. - # Doc: the man went to the store and bought a gallon of milk - # Span A: the man went to the - # Span B: to the store and bought - # Span C: and bought a gallon of - # ... - # - # Now the word 'bought' will have two scores from spans B and C. We only - # want to consider the score with "maximum context", which we define as - # the *minimum* of its left and right context (the *sum* of left and - # right context will always be the same, of course). - # - # In the example the maximum context for 'bought' would be span C since - # it has 1 left context and 3 right context, while span B has 4 left context - # and 0 right context. - best_score = None - best_span_index = None - for (span_index, doc_span) in enumerate(doc_spans): - end = doc_span.start + doc_span.length - 1 - if position < doc_span.start: - continue - if position > end: - continue - num_left_context = position - doc_span.start - num_right_context = end - position - score = min(num_left_context, num_right_context) + 0.01 * doc_span.length - if best_score is None or score > best_score: - best_score = score - best_span_index = span_index - - return cur_span_index == best_span_index - -class FeatureWriter(object): - """Writes InputFeature to TF example file.""" - - def __init__(self, filename, is_training): - self.is_training = is_training - self.num_features = 0 - self.filename = filename - self._writer = tf.io.TFRecordWriter(self.filename) - - def process_feature(self, feature): - """Write a InputFeature to the TFRecordWriter as a tf.train.Example.""" - self.num_features += 1 - - def create_int_feature(values): - feature = tf.train.Feature( - int64_list=tf.train.Int64List(value=list(values))) - return feature - - features = collections.OrderedDict() - features["unique_ids"] = create_int_feature([feature.unique_id]) - features["input_ids"] = create_int_feature(feature.input_ids) - features["input_mask"] = create_int_feature(feature.input_mask) - features["segment_ids"] = create_int_feature(feature.segment_ids) - - if self.is_training: - features["start_positions"] = create_int_feature([feature.start_position]) - features["end_positions"] = create_int_feature([feature.end_position]) - impossible = 0 - if feature.is_impossible: - impossible = 1 - features["is_impossible"] = create_int_feature([impossible]) - - tf_example = tf.train.Example(features=tf.train.Features(feature=features)) - self._writer.write(tf_example.SerializeToString()) - - def close(self): - self._writer.close() - - def rm_tmp_file(self): - os.remove(self.filename) - -def main(_): - tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.INFO) - - tokenizer = tokenization.FullTokenizer( - vocab_file=FLAGS.vocab_file, do_lower_case=True) - - eval_examples = read_squad_examples( - input_file=FLAGS.predict_file, is_training=False) - - eval_writer = FeatureWriter( - filename=FLAGS.output_file, is_training=False) - - eval_features = [] - def append_feature(feature): - eval_features.append(feature) - eval_writer.process_feature(feature) - convert_examples_to_features( - examples=eval_examples, - tokenizer=tokenizer, - max_seq_length=384, - doc_stride=128, - max_query_length=64, - is_training=False, - output_fn=append_feature) - - -if __name__ == "__main__": - flags.mark_flag_as_required("vocab_file") - flags.mark_flag_as_required("predict_file") - flags.mark_flag_as_required("output_file") - tf.compat.v1.app.run() diff --git a/examples/tensorflow/nlp/bert_large_squad_model_zoo/quantization/ptq/evaluate_squad.py b/examples/tensorflow/nlp/bert_large_squad_model_zoo/quantization/ptq/evaluate_squad.py deleted file mode 100644 index cdb87d2b12d..00000000000 --- a/examples/tensorflow/nlp/bert_large_squad_model_zoo/quantization/ptq/evaluate_squad.py +++ /dev/null @@ -1,98 +0,0 @@ -""" Official evaluation script for v1.1 of the SQuAD dataset. -https://github.com/allenai/bi-att-flow/blob/master/squad/evaluate-v1.1.py """ -from __future__ import print_function -from collections import Counter -import string -import re -import argparse -import json -import sys - - -def normalize_answer(s): - """Lower text and remove punctuation, articles and extra whitespace.""" - def remove_articles(text): - return re.sub(r'\b(a|an|the)\b', ' ', text) - - def white_space_fix(text): - return ' '.join(text.split()) - - def remove_punc(text): - exclude = set(string.punctuation) - return ''.join(ch for ch in text if ch not in exclude) - - def lower(text): - return text.lower() - - return white_space_fix(remove_articles(remove_punc(lower(s)))) - - -def f1_score(prediction, ground_truth): - prediction_tokens = normalize_answer(prediction).split() - ground_truth_tokens = normalize_answer(ground_truth).split() - common = Counter(prediction_tokens) & Counter(ground_truth_tokens) - num_same = sum(common.values()) - if num_same == 0: - return 0 - precision = 1.0 * num_same / len(prediction_tokens) - recall = 1.0 * num_same / len(ground_truth_tokens) - f1 = (2 * precision * recall) / (precision + recall) - return f1 - - -def exact_match_score(prediction, ground_truth): - return (normalize_answer(prediction) == normalize_answer(ground_truth)) - - -def metric_max_over_ground_truths(metric_fn, prediction, ground_truths): - scores_for_ground_truths = [] - for ground_truth in ground_truths: - score = metric_fn(prediction, ground_truth) - scores_for_ground_truths.append(score) - return max(scores_for_ground_truths) - - -def evaluate(dataset, predictions): - f1 = exact_match = total = 0 - for article in dataset: - for paragraph in article['paragraphs']: - for qa in paragraph['qas']: - total += 1 - if qa['id'] not in predictions: - message = 'Unanswered question ' + qa['id'] + \ - ' will receive score 0.' - print(message, file=sys.stderr) - continue - ground_truths = list(map(lambda x: x['text'], qa['answers'])) - prediction = predictions[qa['id']] - exact_match += metric_max_over_ground_truths( - exact_match_score, prediction, ground_truths) - f1 += metric_max_over_ground_truths( - f1_score, prediction, ground_truths) - - exact_match = 100.0 * exact_match / total - f1 = 100.0 * f1 / total - - return {'exact_match': exact_match, 'f1': f1} - - -if __name__ == '__main__': - expected_version = '1.1' - parser = argparse.ArgumentParser( - description='Evaluation for SQuAD ' + expected_version) - parser.add_argument('dataset_file', help='Dataset file') - parser.add_argument('prediction_file', help='Prediction File') - args = parser.parse_args() - - with open(args.dataset_file) as dataset_file: - dataset_json = json.load(dataset_file) - if (dataset_json['version'] != expected_version): - print('Evaluation expects v-' + expected_version + - ', but got dataset with v-' + dataset_json['version'], - file=sys.stderr) - dataset = dataset_json['data'] - - with open(args.prediction_file) as prediction_file: - predictions = json.load(prediction_file) - print(json.dumps(evaluate(dataset, predictions))) - diff --git a/examples/tensorflow/nlp/bert_large_squad_model_zoo/quantization/ptq/export_classifier.py b/examples/tensorflow/nlp/bert_large_squad_model_zoo/quantization/ptq/export_classifier.py deleted file mode 100644 index f85e3c7e805..00000000000 --- a/examples/tensorflow/nlp/bert_large_squad_model_zoo/quantization/ptq/export_classifier.py +++ /dev/null @@ -1,163 +0,0 @@ -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import os - -import tensorflow as tf -from absl import app -from absl import logging -tf.compat.v1.disable_v2_behavior() - -tf.compat.v1.flags.DEFINE_bool("saved_model", - False, - "whether export saved model or not") -FLAGS = tf.compat.v1.flags.FLAGS - -# We just import classifier here for `create_model` and some processors such as -# MNLI or MRPC. Because of the flags defined in `run_classifier.py`, we need not -# to define the flags again. -from run_classifier import create_model_top -from run_classifier import ColaProcessor -from run_classifier import MnliProcessor -from run_classifier import MrpcProcessor -from run_classifier import XnliProcessor -from modeling import BertConfig - -class ClassifierExporter: - def __init__(self, - output_dir: str, - task_name: str, - bert_config: str, - max_seq_length: int): - - processors = { - "cola": ColaProcessor, - "mnli": MnliProcessor, - "mrpc": MrpcProcessor, - "xnli": XnliProcessor - } - - task_name = task_name.lower() - if task_name not in processors: - raise ValueError("Task not found: %s" % (task_name)) - - processor = processors[task_name]() - label_list = processor.get_labels() - num_labels = len(label_list) - - # create model for CPU/dGPU, not TPU - use_one_hot_embeddings = False - - bert_config = BertConfig.from_json_file(bert_config) - if FLAGS.precision: - bert_config.precision = FLAGS.precision - - self.session = tf.compat.v1.Session() - - placeholder = tf.compat.v1.placeholder - input_shape = [None, max_seq_length] - self.label_ids = placeholder(tf.int32, [None], name='label_ids') - self.input_ids = placeholder(tf.int32, input_shape, name='input_ids') - self.input_mask = placeholder(tf.int32, input_shape, name='input_mask') - self.segment_ids = placeholder(tf.int32, input_shape, name='segment_ids') - - self.loss, self.per_example_loss, self.logits, self.probabilities = \ - create_model_top(bert_config, False, # is training - self.input_ids, self.input_mask, self.segment_ids, - self.label_ids, num_labels, use_one_hot_embeddings, - None) # frozen graph path - - latest_model = tf.train.latest_checkpoint(FLAGS.output_dir) - saver = tf.compat.v1.train.Saver() - saver.restore(self.session, latest_model) - - self.output_dir = output_dir - self.dest_dir = os.path.join(self.output_dir, "frozen") - if not os.path.exists(self.dest_dir): - os.mkdir(self.dest_dir) - - def __enter__(self): - return self - - def __exit__(self, exc_type, exc_value, exc_tb): - self.session.close() - - def export(self, saved_model: bool): - if saved_model: - self.export_saved_model() - - self.export_frozen_graph() - - def export_saved_model(self, - signature_def_name="eval", - tag=tf.compat.v1.saved_model.tag_constants.SERVING): - build_tensor_info = tf.compat.v1.saved_model.build_tensor_info - signature_def_utils = tf.compat.v1.saved_model.signature_def_utils - inputs = { - 'label_ids': build_tensor_info(self.label_ids), - 'input_ids': build_tensor_info(self.input_ids), - 'input_mask': build_tensor_info(self.input_mask), - 'segment_ids': build_tensor_info(self.segment_ids) - } - - outputs = { - "loss": build_tensor_info(self.loss), - "per_example_loss": build_tensor_info(self.per_example_loss), - "logits": build_tensor_info(self.logits), - "probabilities": build_tensor_info(self.probabilities) - } - - signature = signature_def_utils.build_signature_def(inputs, outputs) - signature_def_map = {signature_def_name: signature} - - builder = tf.compat.v1.saved_model.builder.SavedModelBuilder(self.dest_dir) - builder.add_meta_graph_and_variables(self.session, [tag], signature_def_map) - builder.save() - - def export_frozen_graph(self, frozen_graph_name="frozen_graph.pb"): - # we should disable v2 behavior, at the same time, the bn norm has some op name difference - # should be handled. Otherwise, it will throw exception when do import graph def. - # https://www.bountysource.com/issues/36614355-unable-to-import-frozen-graph-with-batchnorm - graph_def = self.session.graph.as_graph_def() - for node in graph_def.node: - if node.op == 'RefEnter': - node.op = 'Enter' - for index in range(len(node.input)): - if 'moving_' in node.input[index]: - node.input[index] = node.input[index] + '/read' - if node.op == 'RefSwitch': - node.op = 'Switch' - for index in range(len(node.input)): - if 'moving_' in node.input[index]: - node.input[index] = node.input[index] + '/read' - elif node.op == 'AssignSub': - node.op = 'Sub' - if 'use_locking' in node.attr: del node.attr['use_locking'] - elif node.op == 'AssignAdd': - node.op = 'Add' - if 'use_locking' in node.attr: del node.attr['use_locking'] - - outputs_name = ['loss/Mean', 'loss/Sum', 'loss/BiasAdd', 'loss/Softmax'] - graph_def = tf.compat.v1.graph_util.convert_variables_to_constants(self.session, - graph_def, - outputs_name) - - path = os.path.join(self.dest_dir, frozen_graph_name) - with tf.compat.v1.gfile.GFile(path, 'wb') as pb_file: - pb_file.write(graph_def.SerializeToString()) - -def main(_): - tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.INFO) - - with ClassifierExporter(FLAGS.output_dir, - FLAGS.task_name, - FLAGS.bert_config_file, - FLAGS.max_seq_length) as exporter: - exporter.export(FLAGS.saved_model) - -if __name__ == "__main__": - tf.compat.v1.flags.mark_flag_as_required("task_name") - tf.compat.v1.flags.mark_flag_as_required("bert_config_file") - tf.compat.v1.flags.mark_flag_as_required("output_dir") - tf.compat.v1.app.run() diff --git a/examples/tensorflow/nlp/bert_large_squad_model_zoo/quantization/ptq/freeze_estimator_to_pb.py b/examples/tensorflow/nlp/bert_large_squad_model_zoo/quantization/ptq/freeze_estimator_to_pb.py deleted file mode 100644 index f43def945e0..00000000000 --- a/examples/tensorflow/nlp/bert_large_squad_model_zoo/quantization/ptq/freeze_estimator_to_pb.py +++ /dev/null @@ -1,340 +0,0 @@ -#!/usr/bin/env python - -# coding=utf-8 -# Copyright 2018 The Google AI Language Team Authors. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -"""Freeze estimator to frozen pb for bert full pipline tuning.""" - -import os -import modeling -import tensorflow as tf -import numpy as np -from absl import app -from absl import logging - -flags = tf.compat.v1.flags -FLAGS = flags.FLAGS - -## Required parameters -flags.DEFINE_string( - "input_model", None, "The input checkpoint path of model.") - -flags.DEFINE_string( - "output_model", None, "The output path frozen pb will be written.") - -def write_graph(out_graph_def, out_graph_file): - from tensorflow.python.platform import gfile - if not isinstance(out_graph_def, tf.compat.v1.GraphDef): - raise ValueError( - 'out_graph_def is not instance of TensorFlow GraphDef.') - if out_graph_file and not os.path.exists(os.path.dirname(out_graph_file)): - raise ValueError('"output_graph" directory does not exists.') - f = gfile.GFile(out_graph_file, 'wb') - f.write(out_graph_def.SerializeToString()) - -def _improve_answer_span(doc_tokens, input_start, input_end, tokenizer, - orig_answer_text): - """Returns tokenized answer spans that better match the annotated answer.""" - - # The SQuAD annotations are character based. We first project them to - # whitespace-tokenized words. But then after WordPiece tokenization, we can - # often find a "better match". For example: - # - # Question: What year was John Smith born? - # Context: The leader was John Smith (1895-1943). - # Answer: 1895 - # - # The original whitespace-tokenized answer will be "(1895-1943).". However - # after tokenization, our tokens will be "( 1895 - 1943 ) .". So we can match - # the exact answer, 1895. - # - # However, this is not always possible. Consider the following: - # - # Question: What country is the top exporter of electornics? - # Context: The Japanese electronics industry is the lagest in the world. - # Answer: Japan - # - # In this case, the annotator chose "Japan" as a character sub-span of - # the word "Japanese". Since our WordPiece tokenizer does not split - # "Japanese", we just use "Japanese" as the annotation. This is fairly rare - # in SQuAD, but does happen. - tok_answer_text = " ".join(tokenizer.tokenize(orig_answer_text)) - - for new_start in range(input_start, input_end + 1): - for new_end in range(input_end, new_start - 1, -1): - text_span = " ".join(doc_tokens[new_start:(new_end + 1)]) - if text_span == tok_answer_text: - return (new_start, new_end) - - return (input_start, input_end) - -def _check_is_max_context(doc_spans, cur_span_index, position): - """Check if this is the 'max context' doc span for the token.""" - - # Because of the sliding window approach taken to scoring documents, a single - # token can appear in multiple documents. E.g. - # Doc: the man went to the store and bought a gallon of milk - # Span A: the man went to the - # Span B: to the store and bought - # Span C: and bought a gallon of - # ... - # - # Now the word 'bought' will have two scores from spans B and C. We only - # want to consider the score with "maximum context", which we define as - # the *minimum* of its left and right context (the *sum* of left and - # right context will always be the same, of course). - # - # In the example the maximum context for 'bought' would be span C since - # it has 1 left context and 3 right context, while span B has 4 left context - # and 0 right context. - best_score = None - best_span_index = None - for (span_index, doc_span) in enumerate(doc_spans): - end = doc_span.start + doc_span.length - 1 - if position < doc_span.start: - continue - if position > end: - continue - num_left_context = position - doc_span.start - num_right_context = end - position - score = min(num_left_context, num_right_context) + 0.01 * doc_span.length - if best_score is None or score > best_score: - best_score = score - best_span_index = span_index - - return cur_span_index == best_span_index - - -def create_model(bert_config, is_training, input_ids, input_mask, segment_ids, - use_one_hot_embeddings): - """Creates a classification model.""" - model = modeling.BertModel( - config=bert_config, - is_training=is_training, - input_ids=input_ids, - input_mask=input_mask, - token_type_ids=segment_ids, - use_one_hot_embeddings=use_one_hot_embeddings) - - final_hidden = model.get_sequence_output() - - final_hidden_shape = modeling.get_shape_list(final_hidden, expected_rank=3) - batch_size = final_hidden_shape[0] - seq_length = final_hidden_shape[1] - hidden_size = final_hidden_shape[2] - - output_weights = tf.compat.v1.get_variable( - "cls/squad/output_weights", [2, hidden_size], - initializer=tf.compat.v1.truncated_normal_initializer(stddev=0.02)) - - output_bias = tf.compat.v1.get_variable( - "cls/squad/output_bias", [2], initializer=tf.compat.v1.zeros_initializer()) - - final_hidden_matrix = tf.reshape(final_hidden, - [batch_size * seq_length, hidden_size]) - logits = tf.matmul(final_hidden_matrix, output_weights, transpose_b=True) - logits = tf.nn.bias_add(logits, output_bias) - - logits = tf.reshape(logits, [batch_size, seq_length, 2]) - logits = tf.transpose(a=logits, perm=[2, 0, 1]) - - unstacked_logits = tf.unstack(logits, axis=0) - - (start_logits, end_logits) = (unstacked_logits[0], unstacked_logits[1]) - - return (start_logits, end_logits) - - -def model_fn_builder(bert_config, init_checkpoint, learning_rate, - num_train_steps, num_warmup_steps, use_tpu, - use_one_hot_embeddings): - """Returns `model_fn` closure for TPUEstimator.""" - def model_fn(features, labels, mode, params): # pylint: disable=unused-argument - """The `model_fn` for TPUEstimator.""" - - tf.compat.v1.logging.info("*** Features ***") - # for name in sorted(features.keys()): - # tf.compat.v1.logging.info(" name = %s, shape = %s" % (name, features[name].shape)) - - unique_ids = features["unique_ids"] - input_ids = features["input_ids"] - input_mask = features["input_mask"] - segment_ids = features["segment_ids"] - - is_training = (mode == tf.estimator.ModeKeys.TRAIN) - - (start_logits, end_logits) = create_model( - bert_config=bert_config, - is_training=is_training, - input_ids=input_ids, - input_mask=input_mask, - segment_ids=segment_ids, - use_one_hot_embeddings=use_one_hot_embeddings) - - tvars = tf.compat.v1.trainable_variables() - - initialized_variable_names = {} - scaffold_fn = None - if init_checkpoint: - (assignment_map, initialized_variable_names - ) = modeling.get_assignment_map_from_checkpoint(tvars, init_checkpoint, "SQuAD") - if use_tpu: - - def tpu_scaffold(): - tf.compat.v1.train.init_from_checkpoint(init_checkpoint, assignment_map) - return tf.compat.v1.train.Scaffold() - - scaffold_fn = tpu_scaffold - else: - tf.compat.v1.train.init_from_checkpoint(init_checkpoint, assignment_map) - - tf.compat.v1.logging.info("**** Trainable Variables ****") - for var in tvars: - init_string = "" - if var.name in initialized_variable_names: - init_string = ", *INIT_FROM_CKPT*" - - output_spec = None - - predictions = { - "unique_ids": unique_ids, - "start_logits": start_logits, - "end_logits": end_logits, - } - output_spec = tf.compat.v1.estimator.tpu.TPUEstimatorSpec( - mode=mode, predictions=predictions, scaffold_fn=scaffold_fn) - - return output_spec - - return model_fn - - -def input_fn_builder(input_file, seq_length, is_training, drop_remainder): - """Creates an `input_fn` closure to be passed to TPUEstimator.""" - - name_to_features = { - "unique_ids": tf.io.FixedLenFeature([], tf.int64), - "input_ids": tf.io.FixedLenFeature([seq_length], tf.int64), - "input_mask": tf.io.FixedLenFeature([seq_length], tf.int64), - "segment_ids": tf.io.FixedLenFeature([seq_length], tf.int64), - } - - if is_training: - name_to_features["start_positions"] = tf.io.FixedLenFeature([], tf.int64) - name_to_features["end_positions"] = tf.io.FixedLenFeature([], tf.int64) - - def _decode_record(record, name_to_features): - """Decodes a record to a TensorFlow example.""" - example = tf.io.parse_single_example(serialized=record, features=name_to_features) - - # tf.Example only supports tf.int64, but the TPU only supports tf.int32. - # So cast all int64 to int32. - for name in list(example.keys()): - t = example[name] - if t.dtype == tf.int64: - t = tf.cast(t, dtype=tf.int32) - example[name] = t - - return example - - def input_fn(params): - """The actual input function.""" - input_file_placeholder = tf.compat.v1.placeholder(shape=[], - name="input_file", dtype=tf.string) - batch_size_placeholder = tf.compat.v1.placeholder(shape=[], - name="batch_size", dtype=tf.int64) - #batch_size = params["batch_size"] - - # For training, we want a lot of parallel reading and shuffling. - # For eval, we want no shuffling and parallel reading doesn't matter. - # d = tf.data.TFRecordDataset(input_file) - d = tf.data.TFRecordDataset(input_file_placeholder) - if is_training: - d = d.repeat() - d = d.shuffle(buffer_size=100) - - d = d.apply( - tf.data.experimental.map_and_batch( - lambda record: _decode_record(record, name_to_features), - batch_size=batch_size_placeholder, - drop_remainder=drop_remainder)) - - return d - - return input_fn - -bert_config_dict = {'vocab_size': 30522, 'hidden_size': 1024, 'num_hidden_layers': 24, \ - 'num_attention_heads': 16, 'hidden_act': 'gelu', 'intermediate_size': 4096,\ - 'hidden_dropout_prob': 0.1, 'attention_probs_dropout_prob': 0.1, \ - 'max_position_embeddings': 512, 'type_vocab_size': 2, 'initializer_range': 0.02, \ - 'precision': 'fp32', 'new_bf16_scope': True, 'experimental_gelu': False, \ - 'optimized_softmax': False} - -def main(_): - tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.INFO) - - bert_config = modeling.BertConfig.from_dict(bert_config_dict) - - session_config = tf.compat.v1.ConfigProto( - inter_op_parallelism_threads=2, - intra_op_parallelism_threads=27, - allow_soft_placement=True) - - is_per_host = tf.compat.v1.estimator.tpu.InputPipelineConfig.PER_HOST_V2 - - run_config = tf.compat.v1.estimator.tpu.RunConfig( - cluster=None, - master=None, - model_dir='./', - save_checkpoints_steps=1000, - session_config=session_config, - tpu_config=tf.compat.v1.estimator.tpu.TPUConfig( - iterations_per_loop=1000, - num_shards=8, - per_host_input_for_training=is_per_host)) - - predict_input_fn = input_fn_builder( - input_file='', - seq_length=384, - is_training=False, - drop_remainder=False) - - from neural_compressor.adaptor.tf_utils.util import is_ckpt_format - assert is_ckpt_format(FLAGS.input_model), 'invalid chekpoint path....' - ckpt_model = [os.path.splitext(i)[0] for i in os.listdir(FLAGS.input_model) \ - if i.endswith('.meta')][0] - model_fn = model_fn_builder( - bert_config=bert_config, - init_checkpoint=os.path.join(FLAGS.input_model, ckpt_model), - learning_rate=5e-5, - num_train_steps=None, - num_warmup_steps=None, - use_tpu=False, - use_one_hot_embeddings=False) - - # If TPU is not available, this will fall back to normal Estimator on CPU - # or GPU. - estimator = tf.compat.v1.estimator.tpu.TPUEstimator( - use_tpu=False, - model_fn=model_fn, - config=run_config, - train_batch_size=32, - predict_batch_size=8) - - from neural_compressor.adaptor.tf_utils.util import get_estimator_graph - graph = get_estimator_graph(estimator, predict_input_fn) - write_graph(graph.as_graph_def(), FLAGS.output_model) - -if __name__ == "__main__": - tf.compat.v1.app.run() diff --git a/examples/tensorflow/nlp/bert_large_squad_model_zoo/quantization/ptq/generic_ops.py b/examples/tensorflow/nlp/bert_large_squad_model_zoo/quantization/ptq/generic_ops.py deleted file mode 100644 index b0749752f88..00000000000 --- a/examples/tensorflow/nlp/bert_large_squad_model_zoo/quantization/ptq/generic_ops.py +++ /dev/null @@ -1,112 +0,0 @@ -# coding=utf-8 -# Copyright 2018 The Google AI Language Team Authors. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -"""The main BERT model and related functions.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import numpy as np -import tensorflow as tf - - -_inprecision = tf.float32 -_rprecision = tf.float32 -_keras_policy = tf.keras.mixed_precision.experimental.Policy("float32") -_use_optimized_softmax = True -_use_experimental_gelu = True - -def set_global_precision(dt): - # Set Keras API precision - global _keras_policy - if dt == tf.bfloat16: - _keras_policy=tf.keras.mixed_precision.experimental.Policy("mixed_bfloat16") - - # Set basic API precision - set_rprecision(dt) - -def set_rprecision(dt): - global _rprecision - _rprecision=dt - -def get_keras_policy(): - return _keras_policy - -def set_global_flags(optimized_softmax, experimental_gelu): - global _use_optimized_softmax - global _use_experimental_gelu - _use_optimized_softmax = optimized_softmax - _use_experimental_gelu = experimental_gelu - -def i_cast(x) : - return tf.cast(x, _inprecision) - -def r_cast(x) : - return tf.cast(x, _rprecision) - -def multiply(x,y): - x = r_cast(x) - y = r_cast(y) - return tf.multiply(x,y) - -def mzip(x,y): - if x.dtype== tf.bfloat16: - x = r_cast(x) - y = r_cast(y) - return zip(x,y) - -def tanh(x): - x = i_cast(x) - rval = tf.tanh(x) - return r_cast(rval) - -def softmax(scores, axis=None): - if _use_optimized_softmax: - return tf.nn.softmax(scores, axis) - else: - scores = i_cast(scores) - rval = tf.nn.softmax(scores, axis) - return r_cast(rval) - -def layer_norm(inputs, begin_norm_axis, begin_params_axis, scope): - lnorm = tf.keras.layers.LayerNormalization(dtype=get_keras_policy()) - return lnorm(inputs) - -"Moved from modeling.py" -def gelu(x): - """Gaussian Error Linear Unit. - - This is a smoother version of the RELU. - Original paper: https://arxiv.org/abs/1606.08415 - Args: - x: float Tensor to perform activation. - - Returns: - `x` with the GELU activation applied. - """ - #if _use_experial_gelu: - if True: - print("using experimental gelu") - return tf.nn.gelu(x) - else: - x = i_cast(x) - cdf = 0.5 * (1.0 + tf.tanh( - (np.sqrt(2 / np.pi) * (x + 0.044715 * tf.pow(x, 3))))) - rval = x * cdf - return r_cast(rval) - -def logTheLossHook(total_loss, n): - return tf.compat.v1.train.LoggingTensorHook({"\t Loss " : total_loss}, every_n_iter=n) - diff --git a/examples/tensorflow/nlp/bert_large_squad_model_zoo/quantization/ptq/modeling.py b/examples/tensorflow/nlp/bert_large_squad_model_zoo/quantization/ptq/modeling.py deleted file mode 100644 index 7e8c02632f8..00000000000 --- a/examples/tensorflow/nlp/bert_large_squad_model_zoo/quantization/ptq/modeling.py +++ /dev/null @@ -1,1052 +0,0 @@ -# coding=utf-8 -# Copyright 2018 The Google AI Language Team Authors. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -"""The main BERT model and related functions.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import collections -import copy -import json -import math -import re -import numpy as np -import six -import tensorflow as tf -import generic_ops as bf - - -class BertConfig(object): - """Configuration for `BertModel`.""" - - def __init__(self, - vocab_size, - hidden_size=768, - num_hidden_layers=12, - num_attention_heads=12, - intermediate_size=3072, - hidden_act="gelu", - hidden_dropout_prob=0.1, - attention_probs_dropout_prob=0.1, - max_position_embeddings=512, - type_vocab_size=16, - initializer_range=0.02, - precision="fp32", - new_bf16_scope=True): - """Constructs BertConfig. - - Args: - vocab_size: Vocabulary size of `inputs_ids` in `BertModel`. - hidden_size: Size of the encoder layers and the pooler layer. - num_hidden_layers: Number of hidden layers in the Transformer encoder. - num_attention_heads: Number of attention heads for each attention layer in - the Transformer encoder. - intermediate_size: The size of the "intermediate" (i.e., feed-forward) - layer in the Transformer encoder. - hidden_act: The non-linear activation function (function or string) in the - encoder and pooler. - hidden_dropout_prob: The dropout probability for all fully connected - layers in the embeddings, encoder, and pooler. - attention_probs_dropout_prob: The dropout ratio for the attention - probabilities. - max_position_embeddings: The maximum sequence length that this model might - ever be used with. Typically set this to something large just in case - (e.g., 512 or 1024 or 2048). - type_vocab_size: The vocabulary size of the `token_type_ids` passed into - `BertModel`. - initializer_range: The stdev of the truncated_normal_initializer for - initializing all weight matrices. - precision: To enable fp32 or bfloat16 based training - """ - self.vocab_size = vocab_size - self.hidden_size = hidden_size - self.num_hidden_layers = num_hidden_layers - self.num_attention_heads = num_attention_heads - self.hidden_act = hidden_act - self.intermediate_size = intermediate_size - self.hidden_dropout_prob = hidden_dropout_prob - self.attention_probs_dropout_prob = attention_probs_dropout_prob - self.max_position_embeddings = max_position_embeddings - self.type_vocab_size = type_vocab_size - self.initializer_range = initializer_range - self.precision = precision - self.new_bf16_scope = new_bf16_scope - self.experimental_gelu = False - self.optimized_softmax = False - - @classmethod - def from_dict(cls, json_object): - """Constructs a `BertConfig` from a Python dictionary of parameters.""" - config = BertConfig(vocab_size=None) - for (key, value) in six.iteritems(json_object): - config.__dict__[key] = value - return config - - @classmethod - def from_json_file(cls, json_file): - """Constructs a `BertConfig` from a json file of parameters.""" - with tf.io.gfile.GFile(json_file, "r") as reader: - text = reader.read() - return cls.from_dict(json.loads(text)) - - def to_dict(self): - """Serializes this instance to a Python dictionary.""" - output = copy.deepcopy(self.__dict__) - return output - - def to_json_string(self): - """Serializes this instance to a JSON string.""" - return json.dumps(self.to_dict(), indent=2, sort_keys=True) + "\n" - - -class BertModel(object): - """BERT model ("Bidirectional Encoder Representations from Transformers"). - - Example usage: - - ```python - # Already been converted into WordPiece token ids - input_ids = tf.constant([[31, 51, 99], [15, 5, 0]]) - input_mask = tf.constant([[1, 1, 1], [1, 1, 0]]) - token_type_ids = tf.constant([[0, 0, 1], [0, 2, 0]]) - - config = modeling.BertConfig(vocab_size=32000, hidden_size=512, - num_hidden_layers=8, num_attention_heads=6, intermediate_size=1024) - - model = modeling.BertModel(config=config, is_training=True, - input_ids=input_ids, input_mask=input_mask, token_type_ids=token_type_ids) - - label_embeddings = tf.get_variable(...) - pooled_output = model.get_pooled_output() - logits = tf.matmul(pooled_output, label_embeddings) - ... - ``` - """ - - def __init__(self, - config, - is_training, - input_ids, - input_mask=None, - token_type_ids=None, - use_one_hot_embeddings=False, - scope=None): - """Constructor for BertModel. - - Args: - config: `BertConfig` instance. - is_training: bool. true for training model, false for eval model. Controls - whether dropout will be applied. - input_ids: int32 Tensor of shape [batch_size, seq_length]. - input_mask: (optional) int32 Tensor of shape [batch_size, seq_length]. - token_type_ids: (optional) int32 Tensor of shape [batch_size, seq_length]. - use_one_hot_embeddings: (optional) bool. Whether to use one-hot word - embeddings or tf.embedding_lookup() for the word embeddings. - scope: (optional) variable scope. Defaults to "bert". - - Raises: - ValueError: The config is invalid or one of the input tensor shapes - is invalid. - """ - # Flags for BF16 CPU - self.bf16_scope = False - if config.precision == "bfloat16" : - bf.set_global_precision(tf.bfloat16) - if config.new_bf16_scope : - self.bf16_scope = True - - bf.set_global_flags(optimized_softmax=config.optimized_softmax, - experimental_gelu=config.experimental_gelu) - - config = copy.deepcopy(config) - if not is_training: - config.hidden_dropout_prob = 0.0 - config.attention_probs_dropout_prob = 0.0 - - input_shape = get_shape_list(input_ids, expected_rank=2) - batch_size = input_shape[0] - seq_length = input_shape[1] - - if input_mask is None: - input_mask = tf.ones(shape=[batch_size, seq_length], dtype=tf.int32) - - if token_type_ids is None: - token_type_ids = tf.zeros(shape=[batch_size, seq_length], dtype=tf.int32) - - with tf.compat.v1.variable_scope(scope, default_name="bert"): - with tf.compat.v1.variable_scope("embeddings"): - # Perform embedding lookup on the word ids. - (self.embedding_output, self.embedding_table) = embedding_lookup( - input_ids=input_ids, - vocab_size=config.vocab_size, - embedding_size=config.hidden_size, - initializer_range=config.initializer_range, - word_embedding_name="word_embeddings", - use_one_hot_embeddings=use_one_hot_embeddings) - - # Add positional embeddings and token type embeddings, then layer - # normalize and perform dropout. - self.embedding_output = embedding_postprocessor( - input_tensor=self.embedding_output, - use_token_type=True, - token_type_ids=token_type_ids, - token_type_vocab_size=config.type_vocab_size, - token_type_embedding_name="token_type_embeddings", - use_position_embeddings=True, - position_embedding_name="position_embeddings", - initializer_range=config.initializer_range, - max_position_embeddings=config.max_position_embeddings, - dropout_prob=config.hidden_dropout_prob) - - with tf.compat.v1.variable_scope("encoder"): - # This converts a 2D mask of shape [batch_size, seq_length] to a 3D - # mask of shape [batch_size, seq_length, seq_length] which is used - # for the attention scores. - attention_mask = create_attention_mask_from_input_mask( - input_ids, input_mask) - - # Run the stacked transformer. - # `sequence_output` shape = [batch_size, seq_length, hidden_size]. - # Cast is used to cover bfloat16 - input_tensor=bf.r_cast(self.embedding_output) - self.all_encoder_layers = transformer_model( - input_tensor=input_tensor, - attention_mask=attention_mask, - hidden_size=config.hidden_size, - num_hidden_layers=config.num_hidden_layers, - num_attention_heads=config.num_attention_heads, - intermediate_size=config.intermediate_size, - intermediate_act_fn=get_activation(config.hidden_act), - hidden_dropout_prob=config.hidden_dropout_prob, - attention_probs_dropout_prob=config.attention_probs_dropout_prob, - initializer_range=config.initializer_range, - do_return_all_layers=True) - - self.sequence_output = self.all_encoder_layers[-1] - # The "pooler" converts the encoded sequence tensor of shape - # [batch_size, seq_length, hidden_size] to a tensor of shape - # [batch_size, hidden_size]. This is necessary for segment-level - # (or segment-pair-level) classification tasks where we need a fixed - # dimensional representation of the segment. - with tf.compat.v1.variable_scope("pooler"): - # We "pool" the model by simply taking the hidden state corresponding - # to the first token. We assume that this has been pre-trained - first_token_tensor = tf.squeeze(self.sequence_output[:, 0:1, :], axis=1) - self.pooled_output = tf.compat.v1.layers.dense( - first_token_tensor, - config.hidden_size, - activation=bf.tanh, - kernel_initializer=create_initializer(config.initializer_range)) - - def get_pooled_output(self): - """ - In bfloat16 enabled execution, with only model covered in bfloat16 scope, - return the output in float32. Other cases return as is - """ - if self.bf16_scope == True: - return tf.cast(self.pooled_output, tf.float32) - else : - return self.pooled_output - - def get_sequence_output(self): - """Gets final hidden layer of encoder. - - Returns: - float Tensor of shape [batch_size, seq_length, hidden_size] corresponding - to the final hidden of the transformer encoder. - - In bfloat16 enabled execution, with only model covered in bfloat16 scope, - return the output in float32. Other cases return as is - """ - if self.bf16_scope == True: - return tf.cast(self.sequence_output, tf.float32) - else : - return self.sequence_output - - def get_all_encoder_layers(self): - return self.all_encoder_layers - - def get_embedding_output(self): - """Gets output of the embedding lookup (i.e., input to the transformer). - - Returns: - float Tensor of shape [batch_size, seq_length, hidden_size] corresponding - to the output of the embedding layer, after summing the word - embeddings with the positional embeddings and the token type embeddings, - then performing layer normalization. This is the input to the transformer. - """ - if self.bf16_scope == True : - return (self.embedding_output) - else : - return bf.r_cast(self.embedding_output) - - def get_embedding_table(self): - if self.bf16_scope == True : - return (self.embedding_table) - else : - return bf.r_cast(self.embedding_table) - - -def get_activation(activation_string): - """Maps a string to a Python function, e.g., "relu" => `tf.nn.relu`. - - Args: - activation_string: String name of the activation function. - - Returns: - A Python function corresponding to the activation function. If - `activation_string` is None, empty, or "linear", this will return None. - If `activation_string` is not a string, it will return `activation_string`. - - Raises: - ValueError: The `activation_string` does not correspond to a known - activation. - """ - - # We assume that anything that"s not a string is already an activation - # function, so we just return it. - if not isinstance(activation_string, six.string_types): - return activation_string - - if not activation_string: - return None - - act = activation_string.lower() - if act == "linear": - return None - elif act == "relu": - return tf.nn.relu - elif act == "gelu": - return bf.gelu - elif act == "tanh": - return bf.tanh - else: - raise ValueError("Unsupported activation: %s" % act) - - -def get_remaps(task): - regex1 = re.compile(r"layer_normalization[_0-9]*") - regex2 = None - if task=="SQuAD" : - regex2 = re.compile(r"squad") - elif task=="Classifier": - regex2 = re.compile(r"classifier") - else : - regex2 = None - - return regex1, regex2 - -def apply_remaps(name, map1, map2): - if map1!=None: - name = map1.sub("LayerNorm", name) - if map2!=None : - name = map2.sub("seq_relationship", name) - return name - -def check_model_validity(tvars, assignment_map) : - # Check if all model vars have a mapping in checkpoint - missing_var=False - missed_vars=[] - for var in tvars: - if var not in assignment_map.values(): - missed_vars.append(var) - missing_var=True - if missing_var : - for var in missed_vars: - tf.compat.v1.logging.info("Model Variable not in checkpoint", var) - raise ValueError("Error: Missing model variables in checkpoint!!") - -def get_assignment_map_from_checkpoint(tvars, init_checkpoint, task="Pretraining"): - """Compute the union of the current variables and checkpoint variables.""" - assignment_map = {} - initialized_variable_names = {} - - map1, map2 = get_remaps(task) - - name_to_variable = collections.OrderedDict() - for var in tvars: - name = var.name - m = re.match("^(.*):\\d+$", name) - if m is not None: - name = m.group(1) - name_to_variable[name] = var - name = apply_remaps(name, map1, map2) - name_to_variable[name] = var - - init_vars = tf.train.list_variables(init_checkpoint) - - assignment_map = collections.OrderedDict() - for x in init_vars: - (name, var) = (x[0], x[1]) - if name not in name_to_variable: - continue - assignment_map[name] = name_to_variable[name] - ivar = name_to_variable[name] - initialized_variable_names[ivar.name] = 1 - initialized_variable_names[ivar.name + ":0"] = 1 - - # Check if all model vars are loaded from Checkpoint - check_model_validity(tvars, assignment_map) - #for name, var in assignment_map.items(): - # print(name, "--->", var) - - return (assignment_map, initialized_variable_names) - -def dropout(input_tensor, dropout_prob): - """Perform dropout. - - Args: - input_tensor: float Tensor. - dropout_prob: Python float. The probability of dropping out a value (NOT of - *keeping* a dimension as in `tf.nn.dropout`). - - Returns: - A version of `input_tensor` with dropout applied. - """ - if dropout_prob is None or dropout_prob == 0.0: - return input_tensor - - output = tf.nn.dropout(input_tensor, 1 - (1.0 - dropout_prob)) - return output - - -def layer_norm(input_tensor, name=None): - """Run layer normalization on the last dimension of the tensor.""" - return bf.layer_norm( - inputs=input_tensor, begin_norm_axis=-1, begin_params_axis=-1, scope=name) - - -def layer_norm_and_dropout(input_tensor, dropout_prob, name=None): - """Runs layer normalization followed by dropout.""" - output_tensor = layer_norm(input_tensor, name) - output_tensor = dropout(output_tensor, dropout_prob) - return output_tensor - - -def create_initializer(initializer_range=0.02): - """Creates a `truncated_normal_initializer` with the given range.""" - return tf.compat.v1.truncated_normal_initializer(stddev=initializer_range) - - -def embedding_lookup(input_ids, - vocab_size, - embedding_size=128, - initializer_range=0.02, - word_embedding_name="word_embeddings", - use_one_hot_embeddings=False): - """Looks up words embeddings for id tensor. - - Args: - input_ids: int32 Tensor of shape [batch_size, seq_length] containing word - ids. - vocab_size: int. Size of the embedding vocabulary. - embedding_size: int. Width of the word embeddings. - initializer_range: float. Embedding initialization range. - word_embedding_name: string. Name of the embedding table. - use_one_hot_embeddings: bool. If True, use one-hot method for word - embeddings. If False, use `tf.gather()`. - - Returns: - float Tensor of shape [batch_size, seq_length, embedding_size]. - """ - # This function assumes that the input is of shape [batch_size, seq_length, - # num_inputs]. - # - # If the input is a 2D tensor of shape [batch_size, seq_length], we - # reshape to [batch_size, seq_length, 1]. - if input_ids.shape.ndims == 2: - input_ids = tf.expand_dims(input_ids, axis=[-1]) - - embedding_table = tf.compat.v1.get_variable( - name=word_embedding_name, - shape=[vocab_size, embedding_size], - initializer=create_initializer(initializer_range)) - - flat_input_ids = tf.reshape(input_ids, [-1]) - if use_one_hot_embeddings: - one_hot_input_ids = tf.one_hot(flat_input_ids, depth=vocab_size) - output = tf.matmul(one_hot_input_ids, embedding_table) - else: - output = tf.gather(embedding_table, flat_input_ids) - - input_shape = get_shape_list(input_ids) - - output = tf.reshape(output, - input_shape[0:-1] + [input_shape[-1] * embedding_size]) - return (output, embedding_table) - - -def embedding_postprocessor(input_tensor, - use_token_type=False, - token_type_ids=None, - token_type_vocab_size=16, - token_type_embedding_name="token_type_embeddings", - use_position_embeddings=True, - position_embedding_name="position_embeddings", - initializer_range=0.02, - max_position_embeddings=512, - dropout_prob=0.1): - """Performs various post-processing on a word embedding tensor. - - Args: - input_tensor: float Tensor of shape [batch_size, seq_length, - embedding_size]. - use_token_type: bool. Whether to add embeddings for `token_type_ids`. - token_type_ids: (optional) int32 Tensor of shape [batch_size, seq_length]. - Must be specified if `use_token_type` is True. - token_type_vocab_size: int. The vocabulary size of `token_type_ids`. - token_type_embedding_name: string. The name of the embedding table variable - for token type ids. - use_position_embeddings: bool. Whether to add position embeddings for the - position of each token in the sequence. - position_embedding_name: string. The name of the embedding table variable - for positional embeddings. - initializer_range: float. Range of the weight initialization. - max_position_embeddings: int. Maximum sequence length that might ever be - used with this model. This can be longer than the sequence length of - input_tensor, but cannot be shorter. - dropout_prob: float. Dropout probability applied to the final output tensor. - - Returns: - float tensor with same shape as `input_tensor`. - - Raises: - ValueError: One of the tensor shapes or input values is invalid. - """ - input_shape = get_shape_list(input_tensor, expected_rank=3) - batch_size = input_shape[0] - seq_length = input_shape[1] - width = input_shape[2] - - output = input_tensor - - if use_token_type: - if token_type_ids is None: - raise ValueError("`token_type_ids` must be specified if" - "`use_token_type` is True.") - token_type_table = tf.compat.v1.get_variable( - name=token_type_embedding_name, - shape=[token_type_vocab_size, width], - initializer=create_initializer(initializer_range)) - # This vocab will be small so we always do one-hot here, since it is always - # faster for a small vocabulary. - flat_token_type_ids = tf.reshape(token_type_ids, [-1]) - one_hot_ids = tf.one_hot(flat_token_type_ids, depth=token_type_vocab_size) - token_type_embeddings = tf.matmul(one_hot_ids, token_type_table) - token_type_embeddings = tf.reshape(token_type_embeddings, - [batch_size, seq_length, width]) - output += token_type_embeddings - - if use_position_embeddings: - assert_op = tf.compat.v1.assert_less_equal(seq_length, max_position_embeddings) - with tf.control_dependencies([assert_op]): - full_position_embeddings = tf.compat.v1.get_variable( - name=position_embedding_name, - shape=[max_position_embeddings, width], - initializer=create_initializer(initializer_range)) - # Since the position embedding table is a learned variable, we create it - # using a (long) sequence length `max_position_embeddings`. The actual - # sequence length might be shorter than this, for faster training of - # tasks that do not have long sequences. - # - # So `full_position_embeddings` is effectively an embedding table - # for position [0, 1, 2, ..., max_position_embeddings-1], and the current - # sequence has positions [0, 1, 2, ... seq_length-1], so we can just - # perform a slice. - position_embeddings = tf.slice(full_position_embeddings, [0, 0], - [seq_length, -1]) - num_dims = len(output.shape.as_list()) - - # Only the last two dimensions are relevant (`seq_length` and `width`), so - # we broadcast among the first dimensions, which is typically just - # the batch size. - position_broadcast_shape = [] - for _ in range(num_dims - 2): - position_broadcast_shape.append(1) - position_broadcast_shape.extend([seq_length, width]) - position_embeddings = tf.reshape(position_embeddings, - position_broadcast_shape) - output += position_embeddings - - output = layer_norm_and_dropout(output, dropout_prob) - return output - - -def create_attention_mask_from_input_mask(from_tensor, to_mask): - """Create 3D attention mask from a 2D tensor mask. - - Args: - from_tensor: 2D or 3D Tensor of shape [batch_size, from_seq_length, ...]. - to_mask: int32 Tensor of shape [batch_size, to_seq_length]. - - Returns: - float Tensor of shape [batch_size, from_seq_length, to_seq_length]. - """ - from_shape = get_shape_list(from_tensor, expected_rank=[2, 3]) - batch_size = from_shape[0] - from_seq_length = from_shape[1] - - to_shape = get_shape_list(to_mask, expected_rank=2) - to_seq_length = to_shape[1] - - to_mask = tf.cast( - tf.reshape(to_mask, [batch_size, 1, to_seq_length]), tf.float32) - - # We don't assume that `from_tensor` is a mask (although it could be). We - # don't actually care if we attend *from* padding tokens (only *to* padding) - # tokens so we create a tensor of all ones. - try: - # `broadcast_ones` = [batch_size, from_seq_length, 1] - broadcast_ones = tf.ones( - shape=[batch_size, from_seq_length, 1], dtype=tf.float32) - - # Here we broadcast along two dimensions to create the mask. - mask = broadcast_ones * to_mask - except (NotImplementedError): - # Kernel bug, happens when the version of python is 3.7 and the version of numpy is >= 1.20.0 - mask = to_mask - return mask - - -def attention_layer(from_tensor, - to_tensor, - attention_mask=None, - num_attention_heads=1, - size_per_head=512, - query_act=None, - key_act=None, - value_act=None, - attention_probs_dropout_prob=0.0, - initializer_range=0.02, - do_return_2d_tensor=False, - batch_size=None, - from_seq_length=None, - to_seq_length=None): - """Performs multi-headed attention from `from_tensor` to `to_tensor`. - - This is an implementation of multi-headed attention based on "Attention - is all you Need". If `from_tensor` and `to_tensor` are the same, then - this is self-attention. Each timestep in `from_tensor` attends to the - corresponding sequence in `to_tensor`, and returns a fixed-with vector. - - This function first projects `from_tensor` into a "query" tensor and - `to_tensor` into "key" and "value" tensors. These are (effectively) a list - of tensors of length `num_attention_heads`, where each tensor is of shape - [batch_size, seq_length, size_per_head]. - - Then, the query and key tensors are dot-producted and scaled. These are - softmaxed to obtain attention probabilities. The value tensors are then - interpolated by these probabilities, then concatenated back to a single - tensor and returned. - - In practice, the multi-headed attention are done with transposes and - reshapes rather than actual separate tensors. - - Args: - from_tensor: float Tensor of shape [batch_size, from_seq_length, - from_width]. - to_tensor: float Tensor of shape [batch_size, to_seq_length, to_width]. - attention_mask: (optional) int32 Tensor of shape [batch_size, - from_seq_length, to_seq_length]. The values should be 1 or 0. The - attention scores will effectively be set to -infinity for any positions in - the mask that are 0, and will be unchanged for positions that are 1. - num_attention_heads: int. Number of attention heads. - size_per_head: int. Size of each attention head. - query_act: (optional) Activation function for the query transform. - key_act: (optional) Activation function for the key transform. - value_act: (optional) Activation function for the value transform. - attention_probs_dropout_prob: (optional) float. Dropout probability of the - attention probabilities. - initializer_range: float. Range of the weight initializer. - do_return_2d_tensor: bool. If True, the output will be of shape [batch_size - * from_seq_length, num_attention_heads * size_per_head]. If False, the - output will be of shape [batch_size, from_seq_length, num_attention_heads - * size_per_head]. - batch_size: (Optional) int. If the input is 2D, this might be the batch size - of the 3D version of the `from_tensor` and `to_tensor`. - from_seq_length: (Optional) If the input is 2D, this might be the seq length - of the 3D version of the `from_tensor`. - to_seq_length: (Optional) If the input is 2D, this might be the seq length - of the 3D version of the `to_tensor`. - - Returns: - float Tensor of shape [batch_size, from_seq_length, - num_attention_heads * size_per_head]. (If `do_return_2d_tensor` is - true, this will be of shape [batch_size * from_seq_length, - num_attention_heads * size_per_head]). - - Raises: - ValueError: Any of the arguments or tensor shapes are invalid. - """ - - def transpose_for_scores(input_tensor, batch_size, num_attention_heads, - seq_length, width): - output_tensor = tf.reshape( - input_tensor, [batch_size, seq_length, num_attention_heads, width]) - - output_tensor = tf.transpose(a=output_tensor, perm=[0, 2, 1, 3]) - return output_tensor - - from_shape = get_shape_list(from_tensor, expected_rank=[2, 3]) - to_shape = get_shape_list(to_tensor, expected_rank=[2, 3]) - - if len(from_shape) != len(to_shape): - raise ValueError( - "The rank of `from_tensor` must match the rank of `to_tensor`.") - - if len(from_shape) == 3: - batch_size = from_shape[0] - from_seq_length = from_shape[1] - to_seq_length = to_shape[1] - elif len(from_shape) == 2: - if (batch_size is None or from_seq_length is None or to_seq_length is None): - raise ValueError( - "When passing in rank 2 tensors to attention_layer, the values " - "for `batch_size`, `from_seq_length`, and `to_seq_length` " - "must all be specified.") - - # Scalar dimensions referenced here: - # B = batch size (number of sequences) - # F = `from_tensor` sequence length - # T = `to_tensor` sequence length - # N = `num_attention_heads` - # H = `size_per_head` - - from_tensor_2d = reshape_to_matrix(from_tensor) - to_tensor_2d = reshape_to_matrix(to_tensor) - - # `query_layer` = [B*F, N*H] - query_layer = tf.compat.v1.layers.dense( - from_tensor_2d, - num_attention_heads * size_per_head, - activation=query_act, - name="query", - kernel_initializer=create_initializer(initializer_range)) - - # `key_layer` = [B*T, N*H] - key_layer = tf.compat.v1.layers.dense( - to_tensor_2d, - num_attention_heads * size_per_head, - activation=key_act, - name="key", - kernel_initializer=create_initializer(initializer_range)) - - # `value_layer` = [B*T, N*H] - value_layer = tf.compat.v1.layers.dense( - to_tensor_2d, - num_attention_heads * size_per_head, - activation=value_act, - name="value", - kernel_initializer=create_initializer(initializer_range)) - - # `query_layer` = [B, N, F, H] - query_layer = transpose_for_scores(query_layer, batch_size, - num_attention_heads, from_seq_length, - size_per_head) - - # `key_layer` = [B, N, T, H] - key_layer = transpose_for_scores(key_layer, batch_size, num_attention_heads, - to_seq_length, size_per_head) - - # Take the dot product between "query" and "key" to get the raw - # attention scores. - # `attention_scores` = [B, N, F, T] - attention_scores = tf.matmul(query_layer, key_layer, transpose_b=True) - attention_scores = tf.multiply(attention_scores, bf.r_cast( - 1.0 / math.sqrt(float(size_per_head)))) - - if attention_mask is not None: - # `attention_mask` = [B, 1, F, T] - attention_mask = tf.expand_dims(attention_mask, axis=[1]) - - # Since attention_mask is 1.0 for positions we want to attend and 0.0 for - # masked positions, this operation will create a tensor which is 0.0 for - # positions we want to attend and -10000.0 for masked positions. - adder = (1.0 - tf.cast(attention_mask, from_tensor.dtype)) * -10000.0 - - # Since we are adding it to the raw scores before the softmax, this is - # effectively the same as removing these entirely. - attention_scores += adder - - # Normalize the attention scores to probabilities. - # `attention_probs` = [B, N, F, T] - attention_probs = bf.softmax(attention_scores) - - # This is actually dropping out entire tokens to attend to, which might - # seem a bit unusual, but is taken from the original Transformer paper. - attention_probs = dropout(attention_probs, attention_probs_dropout_prob) - - # `value_layer` = [B, T, N, H] - value_layer = tf.reshape( - value_layer, - [batch_size, to_seq_length, num_attention_heads, size_per_head]) - - # `value_layer` = [B, N, T, H] - value_layer = tf.transpose(a=value_layer, perm=[0, 2, 1, 3]) - - # `context_layer` = [B, N, F, H] - context_layer = tf.matmul(attention_probs, value_layer) - - # `context_layer` = [B, F, N, H] - context_layer = tf.transpose(a=context_layer, perm=[0, 2, 1, 3]) - - if do_return_2d_tensor: - # `context_layer` = [B*F, N*H] - context_layer = tf.reshape( - context_layer, - [batch_size * from_seq_length, num_attention_heads * size_per_head]) - else: - # `context_layer` = [B, F, N*H] - context_layer = tf.reshape( - context_layer, - [batch_size, from_seq_length, num_attention_heads * size_per_head]) - - return context_layer - - -def transformer_model(input_tensor, - attention_mask=None, - hidden_size=768, - num_hidden_layers=12, - num_attention_heads=12, - intermediate_size=3072, - intermediate_act_fn=bf.gelu, - hidden_dropout_prob=0.1, - attention_probs_dropout_prob=0.1, - initializer_range=0.02, - do_return_all_layers=False): - """Multi-headed, multi-layer Transformer from "Attention is All You Need". - - This is almost an exact implementation of the original Transformer encoder. - - See the original paper: - https://arxiv.org/abs/1706.03762 - - Also see: - https://github.com/tensorflow/tensor2tensor/blob/master/tensor2tensor/models/transformer.py - - Args: - input_tensor: float Tensor of shape [batch_size, seq_length, hidden_size]. - attention_mask: (optional) int32 Tensor of shape [batch_size, seq_length, - seq_length], with 1 for positions that can be attended to and 0 in - positions that should not be. - hidden_size: int. Hidden size of the Transformer. - num_hidden_layers: int. Number of layers (blocks) in the Transformer. - num_attention_heads: int. Number of attention heads in the Transformer. - intermediate_size: int. The size of the "intermediate" (a.k.a., feed - forward) layer. - intermediate_act_fn: function. The non-linear activation function to apply - to the output of the intermediate/feed-forward layer. - hidden_dropout_prob: float. Dropout probability for the hidden layers. - attention_probs_dropout_prob: float. Dropout probability of the attention - probabilities. - initializer_range: float. Range of the initializer (stddev of truncated - normal). - do_return_all_layers: Whether to also return all layers or just the final - layer. - - Returns: - float Tensor of shape [batch_size, seq_length, hidden_size], the final - hidden layer of the Transformer. - - Raises: - ValueError: A Tensor shape or parameter is invalid. - """ - if hidden_size % num_attention_heads != 0: - raise ValueError( - "The hidden size (%d) is not a multiple of the number of attention " - "heads (%d)" % (hidden_size, num_attention_heads)) - - attention_head_size = int(hidden_size / num_attention_heads) - input_shape = get_shape_list(input_tensor, expected_rank=3) - batch_size = input_shape[0] - seq_length = input_shape[1] - input_width = input_shape[2] - - # The Transformer performs sum residuals on all layers so the input needs - # to be the same as the hidden size. - if input_width != hidden_size: - raise ValueError("The width of the input tensor (%d) != hidden size (%d)" % - (input_width, hidden_size)) - - # We keep the representation as a 2D tensor to avoid re-shaping it back and - # forth from a 3D tensor to a 2D tensor. Re-shapes are normally free on - # the GPU/CPU but may not be free on the TPU, so we want to minimize them to - # help the optimizer. - prev_output = reshape_to_matrix(input_tensor) - - all_layer_outputs = [] - for layer_idx in range(num_hidden_layers): - with tf.compat.v1.variable_scope("layer_%d" % layer_idx): - layer_input = prev_output - - with tf.compat.v1.variable_scope("attention"): - attention_heads = [] - with tf.compat.v1.variable_scope("self"): - attention_head = attention_layer( - from_tensor=layer_input, - to_tensor=layer_input, - attention_mask=attention_mask, - num_attention_heads=num_attention_heads, - size_per_head=attention_head_size, - attention_probs_dropout_prob=attention_probs_dropout_prob, - initializer_range=initializer_range, - do_return_2d_tensor=True, - batch_size=batch_size, - from_seq_length=seq_length, - to_seq_length=seq_length) - attention_heads.append(attention_head) - - attention_output = None - if len(attention_heads) == 1: - attention_output = attention_heads[0] - else: - # In the case where we have other sequences, we just concatenate - # them to the self-attention head before the projection. - attention_output = tf.concat(attention_heads, axis=-1) - - # Run a linear projection of `hidden_size` then add a residual - # with `layer_input`. - with tf.compat.v1.variable_scope("output"): - attention_output = tf.compat.v1.layers.dense( - attention_output, - hidden_size, - kernel_initializer=create_initializer(initializer_range)) - attention_output = dropout(attention_output, hidden_dropout_prob) - attention_output = layer_norm(attention_output + layer_input) - - # The activation is only applied to the "intermediate" hidden layer. - with tf.compat.v1.variable_scope("intermediate"): - intermediate_output = tf.compat.v1.layers.dense( - attention_output, - intermediate_size, - activation=intermediate_act_fn, - kernel_initializer=create_initializer(initializer_range)) - - # Down-project back to `hidden_size` then add the residual. - with tf.compat.v1.variable_scope("output"): - layer_output = tf.compat.v1.layers.dense( - intermediate_output, - hidden_size, - kernel_initializer=create_initializer(initializer_range)) - layer_output = dropout(layer_output, hidden_dropout_prob) - layer_output = layer_norm(layer_output + attention_output) - prev_output = layer_output - all_layer_outputs.append(layer_output) - - if do_return_all_layers: - final_outputs = [] - for layer_output in all_layer_outputs: - final_output = reshape_from_matrix(layer_output, input_shape) - final_outputs.append(final_output) - return final_outputs - else: - final_output = reshape_from_matrix(prev_output, input_shape) - return final_output - - -def get_shape_list(tensor, expected_rank=None, name=None): - """Returns a list of the shape of tensor, preferring static dimensions. - - Args: - tensor: A tf.Tensor object to find the shape of. - expected_rank: (optional) int. The expected rank of `tensor`. If this is - specified and the `tensor` has a different rank, and exception will be - thrown. - name: Optional name of the tensor for the error message. - - Returns: - A list of dimensions of the shape of tensor. All static dimensions will - be returned as python integers, and dynamic dimensions will be returned - as tf.Tensor scalars. - """ - if name is None: - name = tensor.name - - if expected_rank is not None: - assert_rank(tensor, expected_rank, name) - - shape = tensor.shape.as_list() - - non_static_indexes = [] - for (index, dim) in enumerate(shape): - if dim is None: - non_static_indexes.append(index) - - if not non_static_indexes: - return shape - - dyn_shape = tf.shape(input=tensor) - for index in non_static_indexes: - shape[index] = dyn_shape[index] - return shape - - -def reshape_to_matrix(input_tensor): - """Reshapes a >= rank 2 tensor to a rank 2 tensor (i.e., a matrix).""" - ndims = input_tensor.shape.ndims - if ndims < 2: - raise ValueError("Input tensor must have at least rank 2. Shape = %s" % - (input_tensor.shape)) - if ndims == 2: - return input_tensor - - width = input_tensor.shape[-1] - output_tensor = tf.reshape(input_tensor, [-1, width]) - return output_tensor - - -def reshape_from_matrix(output_tensor, orig_shape_list): - """Reshapes a rank 2 tensor back to its original rank >= 2 tensor.""" - if len(orig_shape_list) == 2: - return output_tensor - - output_shape = get_shape_list(output_tensor) - - orig_dims = orig_shape_list[0:-1] - width = output_shape[-1] - - return tf.reshape(output_tensor, orig_dims + [width]) - - -def assert_rank(tensor, expected_rank, name=None): - """Raises an exception if the tensor rank is not of the expected rank. - - Args: - tensor: A tf.Tensor to check the rank of. - expected_rank: Python integer or list of integers, expected rank. - name: Optional name of the tensor for the error message. - - Raises: - ValueError: If the expected shape doesn't match the actual shape. - """ - if name is None: - name = tensor.name - - expected_rank_dict = {} - if isinstance(expected_rank, six.integer_types): - expected_rank_dict[expected_rank] = True - else: - for x in expected_rank: - expected_rank_dict[x] = True - - actual_rank = tensor.shape.ndims - if actual_rank not in expected_rank_dict: - scope_name = tf.compat.v1.get_variable_scope().name - raise ValueError( - "For the tensor `%s` in scope `%s`, the actual rank " - "`%d` (shape = %s) is not equal to the expected rank `%s`" % - (name, scope_name, actual_rank, str(tensor.shape), str(expected_rank))) diff --git a/examples/tensorflow/nlp/bert_large_squad_model_zoo/quantization/ptq/optimization.py b/examples/tensorflow/nlp/bert_large_squad_model_zoo/quantization/ptq/optimization.py deleted file mode 100644 index 597ef36dc11..00000000000 --- a/examples/tensorflow/nlp/bert_large_squad_model_zoo/quantization/ptq/optimization.py +++ /dev/null @@ -1,247 +0,0 @@ -# coding=utf-8 -# Copyright 2018 The Google AI Language Team Authors. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -"""Functions and classes related to optimization (weight updates).""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import re -import tensorflow as tf -import generic_ops as bf - - -def create_optimizer(loss, init_lr, num_train_steps, num_warmup_steps, accum_steps=1, use_tpu=False, fine_tuning=True): - """Creates an optimizer training op.""" - global_step = tf.compat.v1.train.get_or_create_global_step() - - learning_rate = tf.constant(value=init_lr, shape=[], dtype=tf.float32) - - # Implements linear decay of the learning rate. - learning_rate = tf.compat.v1.train.polynomial_decay( - learning_rate, - global_step, - num_train_steps, - end_learning_rate=0.0, - power=1.0, - cycle=False) - - # Implements linear warmup. I.e., if global_step < num_warmup_steps, the - # learning rate will be `global_step/num_warmup_steps * init_lr`. - if num_warmup_steps: - global_steps_int = tf.cast(global_step, tf.int32) - warmup_steps_int = tf.constant(num_warmup_steps, dtype=tf.int32) - - global_steps_float = tf.cast(global_steps_int, tf.float32) - warmup_steps_float = tf.cast(warmup_steps_int, tf.float32) - - warmup_percent_done = global_steps_float / warmup_steps_float - warmup_learning_rate = init_lr * warmup_percent_done - - is_warmup = tf.cast(global_steps_int < warmup_steps_int, tf.float32) - learning_rate = ( - (1.0 - is_warmup) * learning_rate + is_warmup * warmup_learning_rate) - - # It is recommended that you use this optimizer for fine tuning, since this - # is how the model was trained (note that the Adam m/v variables are NOT - # loaded from init_checkpoint.) - optimizer = AdamWeightDecayOptimizer( - learning_rate=learning_rate, - weight_decay_rate=0.01, - beta_1=0.9, - beta_2=0.999, - epsilon=1e-6, - exclude_from_weight_decay=["LayerNorm", "layer_norm", "bias"]) - - if use_tpu: - optimizer = tf.compat.v1.tpu.CrossShardOptimizer(optimizer) - - tvars = tf.compat.v1.trainable_variables() - - - if accum_steps > 1 : - #tf.compat.v1.logging.info("Accumulation Steps....") - grads_and_vars = optimizer.compute_gradients(loss * 1.0 / accum_steps, tvars) - - current_step = tf.compat.v1.get_variable(name="current_step", shape=[], dtype=tf.int32, - trainable=False, - initializer=tf.zeros_initializer) - accum_vars = [tf.compat.v1.get_variable( - name=tvar.name.split(":")[0] + "/agrads", - shape=tvar.shape.as_list(), - dtype=tf.float32, - trainable=False, - initializer=tf.zeros_initializer()) for tvar in tvars] - - apply_grads = tf.cast(tf.math.equal(current_step % accum_steps, 0), dtype=tf.bool) - current_step = tf.cond(apply_grads, - lambda:current_step.assign(tf.ones_like(current_step)), - lambda:current_step.assign_add(1)) - #lambda:inc_current_step(current_step, "Apply Grads:"), - #lambda:inc_current_step(current_step, "Step:")) - - grads_and_vars_and_accums = [(gv[0],gv[1],accum_vars[i]) for i, gv in enumerate(grads_and_vars) if gv[0] is not None] - grads, tvars, accum_vars = list(zip(*grads_and_vars_and_accums)) - - (cgrads, _) = tf.clip_by_global_norm(grads, clip_norm=1.0) - - #accum_vars = update_accum_vars(accum_vars, apply_grads, cgrads, current_step) - accum_vars = tf.cond(apply_grads, - lambda: [accum_vars[i].assign(grad) for i, grad in enumerate(cgrads)], - lambda: [accum_vars[i].assign_add(grad) for i, grad in enumerate(cgrads)]) - - def applyGrads(accum_vars, current_step): - #tf.compat.v1.logging.info("\t\t APPLYING GRADIENTS....:", global_step) - return optimizer.apply_gradients(list(zip(accum_vars, tvars)), global_step=global_step) - - apply_step = tf.identity(tf.cast(tf.math.equal(current_step % accum_steps, 0), dtype=tf.bool), name="apply_step") - update_op = tf.cond(apply_step, lambda: applyGrads(accum_vars, current_step), lambda: tf.no_op()) - - new_global_step = tf.cond(apply_step, lambda: global_step+1, lambda: global_step) - new_global_step = tf.identity(new_global_step, name='global_step_update') - train_op = tf.group(update_op, [global_step.assign(new_global_step)]) - else : - grads = tf.gradients(ys=loss, xs=tvars) - - # This is how the model was pre-trained. - if fine_tuning : - (grads, _) = tf.clip_by_global_norm(grads, clip_norm=1.0) - else : - gdtypes = [grad.dtype for grad in grads]; - mgrads = [tf.cast(grad, tf.float32) for grad in grads] - (grads, _) = tf.clip_by_global_norm(mgrads, clip_norm=1.0) - grads = [tf.cast(grad, ddtype) for grad, ddtype in zip(grads, gdtypes)] - - train_op = optimizer.apply_gradients( - zip(grads, tvars), global_step=global_step, fine_tuning=fine_tuning) - - # Normally the global step update is done inside of `apply_gradients`. - # However, `AdamWeightDecayOptimizer` doesn't do this. But if you use - # a different optimizer, you should probably take this line out. - new_global_step = global_step + 1 - new_global_step = tf.identity(new_global_step, name='global_step_update') - train_op = tf.group(train_op, [global_step.assign(new_global_step)]) - - return train_op - - -class AdamWeightDecayOptimizer(tf.compat.v1.train.Optimizer): - """A basic Adam optimizer that includes "correct" L2 weight decay.""" - - def __init__(self, - learning_rate, - weight_decay_rate=0.0, - beta_1=0.9, - beta_2=0.999, - epsilon=1e-6, - exclude_from_weight_decay=None, - name="AdamWeightDecayOptimizer"): - """Constructs a AdamWeightDecayOptimizer.""" - super(AdamWeightDecayOptimizer, self).__init__(False, name) - - self.learning_rate = learning_rate - self.weight_decay_rate = weight_decay_rate - self.beta_1 = beta_1 - self.beta_2 = beta_2 - self.epsilon = epsilon - self.exclude_from_weight_decay = exclude_from_weight_decay - - def apply_gradients(self, grads_and_vars, fine_tuning=True, global_step=None, name=None): - """See base class.""" - assignments = [] - for (grad, param) in grads_and_vars: - if grad is None or param is None: - continue - - param_name = self._get_variable_name(param.name) - - m = tf.compat.v1.get_variable( - name=param_name + "/adam_m", - shape=param.shape.as_list(), - dtype=tf.float32, - trainable=False, - initializer=tf.compat.v1.zeros_initializer()) - v = tf.compat.v1.get_variable( - name=param_name + "/adam_v", - shape=param.shape.as_list(), - dtype=tf.float32, - trainable=False, - initializer=tf.compat.v1.zeros_initializer()) - - # Standard Adam update. - if fine_tuning : - next_m = ( - tf.multiply(self.beta_1, m) + tf.multiply(1.0 - self.beta_1, grad)) - next_v = ( - tf.multiply(self.beta_2, v) + tf.multiply(1.0 - self.beta_2, - tf.square(grad))) - else : - next_m = ( - bf.multiply(self.beta_1, m) + bf.multiply(1.0 - self.beta_1, grad)) - next_v = ( - bf.multiply(self.beta_2, v) + bf.multiply(1.0 - self.beta_2, - tf.square(grad))) - - update = next_m / (tf.sqrt(next_v) + self.epsilon) - - # Just adding the square of the weights to the loss function is *not* - # the correct way of using L2 regularization/weight decay with Adam, - # since that will interact with the m and v parameters in strange ways. - # - # Instead we want ot decay the weights in a manner that doesn't interact - # with the m/v parameters. This is equivalent to adding the square - # of the weights to the loss with plain (non-momentum) SGD. - if self._do_use_weight_decay(param_name): - if fine_tuning : - update += self.weight_decay_rate * param - else : - update += tf.cast(self.weight_decay_rate * param, update.dtype) - - if fine_tuning : - update_with_lr = self.learning_rate * update - - next_param = param - update_with_lr - - assignments.extend( - [param.assign(next_param), - m.assign(next_m), - v.assign(next_v)]) - else : - update_with_lr = tf.cast(self.learning_rate, update.dtype) * update - - next_param = tf.cast(param, update_with_lr.dtype) - update_with_lr - - - param, m, v = tf.cast(next_param, param.dtype), tf.cast(m, m.dtype), tf.cast(v, v.dtype) - - assignments.extend([param, m, v]) - return tf.group(*assignments, name=name) - - def _do_use_weight_decay(self, param_name): - """Whether to use L2 weight decay for `param_name`.""" - if not self.weight_decay_rate: - return False - if self.exclude_from_weight_decay: - for r in self.exclude_from_weight_decay: - if re.search(r, param_name) is not None: - return False - return True - - def _get_variable_name(self, param_name): - """Get the variable name from the tensor name.""" - m = re.match("^(.*):\\d+$", param_name) - if m is not None: - param_name = m.group(1) - return param_name diff --git a/examples/tensorflow/nlp/bert_large_squad_model_zoo/quantization/ptq/prepare_dataset.sh b/examples/tensorflow/nlp/bert_large_squad_model_zoo/quantization/ptq/prepare_dataset.sh deleted file mode 100644 index acae8ce944d..00000000000 --- a/examples/tensorflow/nlp/bert_large_squad_model_zoo/quantization/ptq/prepare_dataset.sh +++ /dev/null @@ -1,53 +0,0 @@ -#!/bin/bash -# set -x - -OUTPUT_DIR="./data" - -help() -{ - cat <<- EOF - Desc: Prepare bert dataset - -h --help help info - --output_dir Output data directory - default: './data' -EOF - exit 0 -} - -function main { - init_params "$@" - convert_dataset -} - -# init params -function init_params { - for var in "$@" - do - case $var in - --output_dir=*) - OUTPUT_DIR=$(echo $var |cut -f2 -d=) - ;; - -h|--help) help - ;; - *) - echo "Error: No such parameter: ${var}" - exit 1 - ;; - esac - done -} - -# convert dataset -function convert_dataset { - if [ ! -d ${OUTPUT_DIR} ]; then - echo '${OUTPUT_DIR} already exists, please check...' - fi - wget https://storage.googleapis.com/bert_models/2019_05_30/wwm_uncased_L-24_H-1024_A-16.zip - unzip wwm_uncased_L-24_H-1024_A-16.zip - wget https://rajpurkar.github.io/SQuAD-explorer/dataset/dev-v1.1.json -P wwm_uncased_L-24_H-1024_A-16 - mv wwm_uncased_L-24_H-1024_A-16 ${OUTPUT_DIR} - -} - -main "$@" - diff --git a/examples/tensorflow/nlp/bert_large_squad_model_zoo/quantization/ptq/prepare_model.sh b/examples/tensorflow/nlp/bert_large_squad_model_zoo/quantization/ptq/prepare_model.sh deleted file mode 100644 index c1d9a4ed702..00000000000 --- a/examples/tensorflow/nlp/bert_large_squad_model_zoo/quantization/ptq/prepare_model.sh +++ /dev/null @@ -1,52 +0,0 @@ -#!/bin/bash -# set -x - -OUTPUT_DIR="./model" - -help() -{ - cat <<- EOF - Desc: Prepare bert model - -h --help help info - --output_dir Output model directory - default: './model' -EOF - exit 0 -} - -function main { - init_params "$@" - convert_model -} - -# init params -function init_params { - for var in "$@" - do - case $var in - --output_dir=*) - OUTPUT_DIR=$(echo $var |cut -f2 -d=) - ;; - -h|--help) help - ;; - *) - echo "Error: No such parameter: ${var}" - exit 1 - ;; - esac - done -} - -# convert model -function convert_model { - if [ ! -d ${OUTPUT_DIR} ]; then - echo '${OUTPUT_DIR} already exists, please check...' - fi - wget https://storage.googleapis.com/intel-optimized-tensorflow/models/v1_8/bert_large_checkpoints.zip - unzip bert_large_checkpoints.zip - mv bert_large_checkpoints ${OUTPUT_DIR} - -} - -main "$@" - diff --git a/examples/tensorflow/nlp/bert_large_squad_model_zoo/quantization/ptq/run_benchmark.sh b/examples/tensorflow/nlp/bert_large_squad_model_zoo/quantization/ptq/run_benchmark.sh deleted file mode 100644 index 38b0dc3807c..00000000000 --- a/examples/tensorflow/nlp/bert_large_squad_model_zoo/quantization/ptq/run_benchmark.sh +++ /dev/null @@ -1,47 +0,0 @@ -#!/bin/bash -set -x - -function main { - - init_params "$@" - run_benchmark - -} - -# init params -function init_params { - iters=100 - for var in "$@" - do - case $var in - --config=*) - config=$(echo $var |cut -f2 -d=) - ;; - --input_model=*) - input_model=$(echo $var |cut -f2 -d=) - ;; - --mode=*) - mode=$(echo $var |cut -f2 -d=) - ;; - *) - echo "Error: No such parameter: ${var}" - exit 1 - ;; - esac - done - -} - - -# run_tuning -function run_benchmark { - - python tune_squad.py \ - --config=${config} \ - --input_model=${input_model} \ - --mode=${mode} \ - --benchmark \ - -} - -main "$@" diff --git a/examples/tensorflow/nlp/bert_large_squad_model_zoo/quantization/ptq/run_tuning.sh b/examples/tensorflow/nlp/bert_large_squad_model_zoo/quantization/ptq/run_tuning.sh deleted file mode 100644 index be090267e8a..00000000000 --- a/examples/tensorflow/nlp/bert_large_squad_model_zoo/quantization/ptq/run_tuning.sh +++ /dev/null @@ -1,42 +0,0 @@ -#!/bin/bash -# set -x - -function main { - - init_params "$@" - - run_tuning - -} - -# init params -function init_params { - - for var in "$@" - do - case $var in - --config=*) - config=$(echo $var |cut -f2 -d=) - ;; - --input_model=*) - input_model=$(echo "$var" |cut -f2 -d=) - ;; - --output_model=*) - output_model=$(echo "$var" |cut -f2 -d=) - ;; - esac - done - -} - -# run_tuning -function run_tuning { - python tune_squad.py \ - --input_model=${input_model} \ - --output_model=${output_model} \ - --config=${config} \ - --tune \ - -} - -main "$@" diff --git a/examples/tensorflow/nlp/bert_large_squad_model_zoo/quantization/ptq/tokenization.py b/examples/tensorflow/nlp/bert_large_squad_model_zoo/quantization/ptq/tokenization.py deleted file mode 100644 index 52c92adb81f..00000000000 --- a/examples/tensorflow/nlp/bert_large_squad_model_zoo/quantization/ptq/tokenization.py +++ /dev/null @@ -1,399 +0,0 @@ -# coding=utf-8 -# Copyright 2018 The Google AI Language Team Authors. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -"""Tokenization classes.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import collections -import re -import unicodedata -import six -import tensorflow as tf - - -def validate_case_matches_checkpoint(do_lower_case, init_checkpoint): - """Checks whether the casing config is consistent with the checkpoint name.""" - - # The casing has to be passed in by the user and there is no explicit check - # as to whether it matches the checkpoint. The casing information probably - # should have been stored in the bert_config.json file, but it's not, so - # we have to heuristically detect it to validate. - - if not init_checkpoint: - return - - m = re.match("^.*?([A-Za-z0-9_-]+)/bert_model.ckpt", init_checkpoint) - if m is None: - return - - model_name = m.group(1) - - lower_models = [ - "uncased_L-24_H-1024_A-16", "uncased_L-12_H-768_A-12", - "multilingual_L-12_H-768_A-12", "chinese_L-12_H-768_A-12" - ] - - cased_models = [ - "cased_L-12_H-768_A-12", "cased_L-24_H-1024_A-16", - "multi_cased_L-12_H-768_A-12" - ] - - is_bad_config = False - if model_name in lower_models and not do_lower_case: - is_bad_config = True - actual_flag = "False" - case_name = "lowercased" - opposite_flag = "True" - - if model_name in cased_models and do_lower_case: - is_bad_config = True - actual_flag = "True" - case_name = "cased" - opposite_flag = "False" - - if is_bad_config: - raise ValueError( - "You passed in `--do_lower_case=%s` with `--init_checkpoint=%s`. " - "However, `%s` seems to be a %s model, so you " - "should pass in `--do_lower_case=%s` so that the fine-tuning matches " - "how the model was pre-training. If this error is wrong, please " - "just comment out this check." % (actual_flag, init_checkpoint, - model_name, case_name, opposite_flag)) - - -def convert_to_unicode(text): - """Converts `text` to Unicode (if it's not already), assuming utf-8 input.""" - if six.PY3: - if isinstance(text, str): - return text - elif isinstance(text, bytes): - return text.decode("utf-8", "ignore") - else: - raise ValueError("Unsupported string type: %s" % (type(text))) - elif six.PY2: - if isinstance(text, str): - return text.decode("utf-8", "ignore") - elif isinstance(text, unicode): - return text - else: - raise ValueError("Unsupported string type: %s" % (type(text))) - else: - raise ValueError("Not running on Python2 or Python 3?") - - -def printable_text(text): - """Returns text encoded in a way suitable for print or `tf.logging`.""" - - # These functions want `str` for both Python2 and Python3, but in one case - # it's a Unicode string and in the other it's a byte string. - if six.PY3: - if isinstance(text, str): - return text - elif isinstance(text, bytes): - return text.decode("utf-8", "ignore") - else: - raise ValueError("Unsupported string type: %s" % (type(text))) - elif six.PY2: - if isinstance(text, str): - return text - elif isinstance(text, unicode): - return text.encode("utf-8") - else: - raise ValueError("Unsupported string type: %s" % (type(text))) - else: - raise ValueError("Not running on Python2 or Python 3?") - - -def load_vocab(vocab_file): - """Loads a vocabulary file into a dictionary.""" - vocab = collections.OrderedDict() - index = 0 - with tf.io.gfile.GFile(vocab_file, "r") as reader: - while True: - token = convert_to_unicode(reader.readline()) - if not token: - break - token = token.strip() - vocab[token] = index - index += 1 - return vocab - - -def convert_by_vocab(vocab, items): - """Converts a sequence of [tokens|ids] using the vocab.""" - output = [] - for item in items: - output.append(vocab[item]) - return output - - -def convert_tokens_to_ids(vocab, tokens): - return convert_by_vocab(vocab, tokens) - - -def convert_ids_to_tokens(inv_vocab, ids): - return convert_by_vocab(inv_vocab, ids) - - -def whitespace_tokenize(text): - """Runs basic whitespace cleaning and splitting on a piece of text.""" - text = text.strip() - if not text: - return [] - tokens = text.split() - return tokens - - -class FullTokenizer(object): - """Runs end-to-end tokenziation.""" - - def __init__(self, vocab_file, do_lower_case=True): - self.vocab = load_vocab(vocab_file) - self.inv_vocab = {v: k for k, v in self.vocab.items()} - self.basic_tokenizer = BasicTokenizer(do_lower_case=do_lower_case) - self.wordpiece_tokenizer = WordpieceTokenizer(vocab=self.vocab) - - def tokenize(self, text): - split_tokens = [] - for token in self.basic_tokenizer.tokenize(text): - for sub_token in self.wordpiece_tokenizer.tokenize(token): - split_tokens.append(sub_token) - - return split_tokens - - def convert_tokens_to_ids(self, tokens): - return convert_by_vocab(self.vocab, tokens) - - def convert_ids_to_tokens(self, ids): - return convert_by_vocab(self.inv_vocab, ids) - - -class BasicTokenizer(object): - """Runs basic tokenization (punctuation splitting, lower casing, etc.).""" - - def __init__(self, do_lower_case=True): - """Constructs a BasicTokenizer. - - Args: - do_lower_case: Whether to lower case the input. - """ - self.do_lower_case = do_lower_case - - def tokenize(self, text): - """Tokenizes a piece of text.""" - text = convert_to_unicode(text) - text = self._clean_text(text) - - # This was added on November 1st, 2018 for the multilingual and Chinese - # models. This is also applied to the English models now, but it doesn't - # matter since the English models were not trained on any Chinese data - # and generally don't have any Chinese data in them (there are Chinese - # characters in the vocabulary because Wikipedia does have some Chinese - # words in the English Wikipedia.). - text = self._tokenize_chinese_chars(text) - - orig_tokens = whitespace_tokenize(text) - split_tokens = [] - for token in orig_tokens: - if self.do_lower_case: - token = token.lower() - token = self._run_strip_accents(token) - split_tokens.extend(self._run_split_on_punc(token)) - - output_tokens = whitespace_tokenize(" ".join(split_tokens)) - return output_tokens - - def _run_strip_accents(self, text): - """Strips accents from a piece of text.""" - text = unicodedata.normalize("NFD", text) - output = [] - for char in text: - cat = unicodedata.category(char) - if cat == "Mn": - continue - output.append(char) - return "".join(output) - - def _run_split_on_punc(self, text): - """Splits punctuation on a piece of text.""" - chars = list(text) - i = 0 - start_new_word = True - output = [] - while i < len(chars): - char = chars[i] - if _is_punctuation(char): - output.append([char]) - start_new_word = True - else: - if start_new_word: - output.append([]) - start_new_word = False - output[-1].append(char) - i += 1 - - return ["".join(x) for x in output] - - def _tokenize_chinese_chars(self, text): - """Adds whitespace around any CJK character.""" - output = [] - for char in text: - cp = ord(char) - if self._is_chinese_char(cp): - output.append(" ") - output.append(char) - output.append(" ") - else: - output.append(char) - return "".join(output) - - def _is_chinese_char(self, cp): - """Checks whether CP is the codepoint of a CJK character.""" - # This defines a "chinese character" as anything in the CJK Unicode block: - # https://en.wikipedia.org/wiki/CJK_Unified_Ideographs_(Unicode_block) - # - # Note that the CJK Unicode block is NOT all Japanese and Korean characters, - # despite its name. The modern Korean Hangul alphabet is a different block, - # as is Japanese Hiragana and Katakana. Those alphabets are used to write - # space-separated words, so they are not treated specially and handled - # like the all of the other languages. - if ((cp >= 0x4E00 and cp <= 0x9FFF) or # - (cp >= 0x3400 and cp <= 0x4DBF) or # - (cp >= 0x20000 and cp <= 0x2A6DF) or # - (cp >= 0x2A700 and cp <= 0x2B73F) or # - (cp >= 0x2B740 and cp <= 0x2B81F) or # - (cp >= 0x2B820 and cp <= 0x2CEAF) or - (cp >= 0xF900 and cp <= 0xFAFF) or # - (cp >= 0x2F800 and cp <= 0x2FA1F)): # - return True - - return False - - def _clean_text(self, text): - """Performs invalid character removal and whitespace cleanup on text.""" - output = [] - for char in text: - cp = ord(char) - if cp == 0 or cp == 0xfffd or _is_control(char): - continue - if _is_whitespace(char): - output.append(" ") - else: - output.append(char) - return "".join(output) - - -class WordpieceTokenizer(object): - """Runs WordPiece tokenziation.""" - - def __init__(self, vocab, unk_token="[UNK]", max_input_chars_per_word=200): - self.vocab = vocab - self.unk_token = unk_token - self.max_input_chars_per_word = max_input_chars_per_word - - def tokenize(self, text): - """Tokenizes a piece of text into its word pieces. - - This uses a greedy longest-match-first algorithm to perform tokenization - using the given vocabulary. - - For example: - input = "unaffable" - output = ["un", "##aff", "##able"] - - Args: - text: A single token or whitespace separated tokens. This should have - already been passed through `BasicTokenizer. - - Returns: - A list of wordpiece tokens. - """ - - text = convert_to_unicode(text) - - output_tokens = [] - for token in whitespace_tokenize(text): - chars = list(token) - if len(chars) > self.max_input_chars_per_word: - output_tokens.append(self.unk_token) - continue - - is_bad = False - start = 0 - sub_tokens = [] - while start < len(chars): - end = len(chars) - cur_substr = None - while start < end: - substr = "".join(chars[start:end]) - if start > 0: - substr = "##" + substr - if substr in self.vocab: - cur_substr = substr - break - end -= 1 - if cur_substr is None: - is_bad = True - break - sub_tokens.append(cur_substr) - start = end - - if is_bad: - output_tokens.append(self.unk_token) - else: - output_tokens.extend(sub_tokens) - return output_tokens - - -def _is_whitespace(char): - """Checks whether `chars` is a whitespace character.""" - # \t, \n, and \r are technically contorl characters but we treat them - # as whitespace since they are generally considered as such. - if char == " " or char == "\t" or char == "\n" or char == "\r": - return True - cat = unicodedata.category(char) - if cat == "Zs": - return True - return False - - -def _is_control(char): - """Checks whether `chars` is a control character.""" - # These are technically control characters but we count them as whitespace - # characters. - if char == "\t" or char == "\n" or char == "\r": - return False - cat = unicodedata.category(char) - if cat in ("Cc", "Cf"): - return True - return False - - -def _is_punctuation(char): - """Checks whether `chars` is a punctuation character.""" - cp = ord(char) - # We treat all non-letter/number ASCII as punctuation. - # Characters such as "^", "$", and "`" are not in the Unicode - # Punctuation class but we treat them as punctuation anyways, for - # consistency. - if ((cp >= 33 and cp <= 47) or (cp >= 58 and cp <= 64) or - (cp >= 91 and cp <= 96) or (cp >= 123 and cp <= 126)): - return True - cat = unicodedata.category(char) - if cat.startswith("P"): - return True - return False diff --git a/examples/tensorflow/nlp/bert_large_squad_model_zoo/quantization/ptq/tune_squad.py b/examples/tensorflow/nlp/bert_large_squad_model_zoo/quantization/ptq/tune_squad.py deleted file mode 100644 index 3d94b8d87d9..00000000000 --- a/examples/tensorflow/nlp/bert_large_squad_model_zoo/quantization/ptq/tune_squad.py +++ /dev/null @@ -1,63 +0,0 @@ -#!/usr/bin/env python - -# coding=utf-8 -# Copyright 2018 The Google AI Language Team Authors. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -"""Run BERT on SQuAD 1.1 and SQuAD 2.0.""" - -import tensorflow as tf -import numpy as np -from neural_compressor.experimental.data.datasets.bert_dataset import TensorflowModelZooBertDataset -from neural_compressor.experimental.data.dataloaders.tensorflow_dataloader import TensorflowModelZooBertDataLoader - -flags = tf.compat.v1.flags -FLAGS = flags.FLAGS - -## Required parameters -flags.DEFINE_string( - 'input_model', None, 'Run inference with specified pb graph.') - -flags.DEFINE_string( - 'output_model', None, 'The output model of the quantized model.') - -flags.DEFINE_string( - 'mode', 'performance', 'define benchmark mode for accuracy or performance') - -flags.DEFINE_bool( - 'tune', False, 'whether to tune the model') - -flags.DEFINE_bool( - 'benchmark', False, 'whether to benchmark the model') - -flags.DEFINE_string( - 'config', 'bert.yaml', 'yaml configuration of the model') - -def main(_): - tf.compat.v1.disable_eager_execution() - tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.INFO) - if FLAGS.benchmark: - from neural_compressor.experimental import Benchmark - evaluator = Benchmark(FLAGS.config) - evaluator.model = FLAGS.input_model - evaluator(FLAGS.mode) - - elif FLAGS.tune: - from neural_compressor.experimental import Quantization - quantizer = Quantization(FLAGS.config) - quantizer.model = FLAGS.input_model - q_model = quantizer.fit() - q_model.save(FLAGS.output_model) - -if __name__ == "__main__": - tf.compat.v1.app.run() \ No newline at end of file diff --git a/examples/tensorflow/nlp/distilbert_base/quantization/ptq/README.md b/examples/tensorflow/nlp/distilbert_base/quantization/ptq/README.md index 3425b53e951..dffcfe4ba4f 100644 --- a/examples/tensorflow/nlp/distilbert_base/quantization/ptq/README.md +++ b/examples/tensorflow/nlp/distilbert_base/quantization/ptq/README.md @@ -34,7 +34,8 @@ Intel® Extension for TensorFlow is mandatory to be installed for quantizing the ```shell pip install --upgrade intel-extension-for-tensorflow[gpu] ``` -For any more details, please follow the procedure in [install-gpu-drivers](https://github.com/intel/intel-extension-for-tensorflow/blob/main/docs/install/install_for_gpu.md#install-gpu-drivers). +Please refer to the [Installation Guides](https://dgpu-docs.intel.com/installation-guides/ubuntu/ubuntu-focal-dc.html) for latest Intel GPU driver installation. +For any more details, please follow the procedure in [install-gpu-drivers](https://github.com/intel-innersource/frameworks.ai.infrastructure.intel-extension-for-tensorflow.intel-extension-for-tensorflow/blob/master/docs/install/install_for_gpu.md#install-gpu-drivers). #### Quantizing the model on Intel CPU (Experimental): Intel® Extension for TensorFlow for Intel CPUs is experimental currently. It's not mandatory for quantizing the model on Intel CPUs. @@ -55,7 +56,6 @@ bash run_tuning.sh \ --input_model=$INPUT_MODEL \ --dataset_location=$DATASET_DIR \ --output_model=$OUTPUT_MODEL \ - --config=$CONFIG_FILE \ --batch_size=$BATCH_SIZE \ --max_seq_length=$MAX_SEQ \ --warmup_steps=$WARMUPS \ @@ -64,11 +64,11 @@ bash run_tuning.sh \ ``` ### Run Benchmark: ```shell -# benchmark mode: only get performance +# performance mode: get performance bash run_benchmark.sh \ --input_model=$INPUT_MODEL \ --dataset_location=$DATASET_DIR \ - --mode=benchmark \ + --mode=performance \ --batch_size=$BATCH_SIZE \ --max_seq_length=$MAX_SEQ \ --iters=$ITERS \ @@ -78,7 +78,7 @@ bash run_benchmark.sh \ ``` ```shell -# accuracy mode: get performance and accuracy +# accuracy mode: get accuracy bash run_benchmark.sh \ --input_model=$INPUT_MODEL \ --dataset_location=$DATASET_DIR \ @@ -94,7 +94,6 @@ Where (Default values are shown in the square brackets): * $INPUT_MODEL ["./distilbert_base_fp32.pb"]-- The path to input FP32 frozen model .pb file to load * $DATASET_DIR ["./sst2_validation_dataset"]-- The path to input dataset directory * $OUTPUT_MODEL ["./output_distilbert_base_int8.pb"]-- The user-specified export path to the output INT8 quantized model - * $CONFIG_FILE ["./distilbert_base.yaml"]-- The path to quantization configuration .yaml file to load for tuning * $BATCH_SIZE [128]-- The batch size for model inference * $MAX_SEQ [128]-- The maximum total sequence length after tokenization * $ITERS [872]-- The number of iterations to run in benchmark mode, maximum value is 872 @@ -108,7 +107,7 @@ Details of enabling Intel® Neural Compressor on DistilBERT base for TensorFlow This is a tutorial of how to enable DistilBERT base model with Intel® Neural Compressor. ## User Code Analysis -1. User specifies fp32 *model*, calibration dataloader *q_dataloader*, evaluation dataloader *eval_dataloader* and metric in tuning.metric field of model-specific yaml config file. +1. User specifies fp32 *model*, calibration dataloader *q_dataloader*, evaluation dataloader *eval_dataloader* and metric. 2. User specifies fp32 *model*, calibration dataloader *q_dataloader* and a custom *eval_func* which encapsulates the evaluation dataloader and metric by itself. @@ -138,45 +137,43 @@ class Dataloader(object): yield feed_dict, labels ``` -### Write Yaml Config File -In examples directory, there is a distilbert_base.yaml for tuning the model on Intel CPUs. The 'framework' in the yaml is set to 'tensorflow'. If running this example on Intel GPUs, the 'framework' should be set to 'tensorflow_itex' and the device in yaml file should be set to 'gpu'. The distilbert_base_itex.yaml is prepared for the GPU case. We could remove most of items and only keep mandatory item for tuning. We also implement a calibration dataloader and have evaluation field for creation of evaluation function at internal neural_compressor. - -```yaml -model: - name: distilbert_base - framework: tensorflow - -device: cpu # optional. default value is cpu, other value is gpu. - -quantization: - calibration: - sampling_size: 500 - model_wise: - weight: - granularity: per_channel - -tuning: - accuracy_criterion: - relative: 0.02 - exit_policy: - timeout: 0 - max_trials: 100 - performance_only: False - random_seed: 9527 -``` +### Quantization Config +The Quantization Config class has default parameters setting for running on Intel CPUs. If running this example on Intel GPUs, the 'backend' parameter should be set to 'itex' and the 'device' parameter should be set to 'gpu'. -In this case we calibrate and quantize the model, and use our user-defined calibration dataloader. +``` +config = PostTrainingQuantConfig( + device="gpu", + backend="itex", + ... + ) +``` ### Code Update After prepare step is done, we add the code for quantization tuning to generate quantized model. +#### Tune +```python + from neural_compressor import quantization + from neural_compressor.config import PostTrainingQuantConfig, AccuracyCriterion + accuracy_criterion = AccuracyCriterion(tolerable_loss=0.02) + config = PostTrainingQuantConfig(calibration_sampling_size=[500], + accuracy_criterion=accuracy_criterion) + q_model = quantization.fit(model=graph, conf=config, calib_dataloader=self.dataloader, + eval_func=self.eval_func) + try: + q_model.save(ARGS.output_graph) + except Exception as e: + tf.compat.v1.logging.error("Failed to save model due to {}".format(str(e))) +``` +#### Benchmark ```python -from neural_compressor.experimental import Quantization, common -quantizer = Quantization(ARGS.config) -quantizer.calib_dataloader = self.dataloader -quantizer.model = common.Model(graph) -quantizer.eval_func = self.eval_func -q_model = quantizer.fit() + from neural_compressor.benchmark import fit + from neural_compressor.config import BenchmarkConfig + if ARGS.mode == 'performance': + conf = BenchmarkConfig(cores_per_instance=28, num_of_instance=1) + fit(graph, conf, b_func=self.eval_func) + elif ARGS.mode == 'accuracy': + self.eval_func(graph) ``` -The Intel® Neural Compressor quantizer.fit() function will return a best quantized model under time constraint. +The Intel® Neural Compressor quantization.fit() function will return a best quantized model under time constraint. diff --git a/examples/tensorflow/nlp/distilbert_base/quantization/ptq/distilbert_base.yaml b/examples/tensorflow/nlp/distilbert_base/quantization/ptq/distilbert_base.yaml deleted file mode 100644 index b94d3c8cbae..00000000000 --- a/examples/tensorflow/nlp/distilbert_base/quantization/ptq/distilbert_base.yaml +++ /dev/null @@ -1,41 +0,0 @@ -# -# -*- coding: utf-8 -*- -# -# Copyright (c) 2022 Intel Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -version: 1.0 - -model: - name: distilbert_base - framework: tensorflow - -device: cpu # optional. default value is cpu, other value is gpu. - -quantization: - calibration: - sampling_size: 500 - model_wise: - weight: - granularity: per_channel - -tuning: - accuracy_criterion: - relative: 0.02 - exit_policy: - timeout: 0 - max_trials: 100 - performance_only: False - random_seed: 9527 diff --git a/examples/tensorflow/nlp/distilbert_base/quantization/ptq/distilbert_base_itex.yaml b/examples/tensorflow/nlp/distilbert_base/quantization/ptq/distilbert_base_itex.yaml deleted file mode 100644 index 887f0f5ad99..00000000000 --- a/examples/tensorflow/nlp/distilbert_base/quantization/ptq/distilbert_base_itex.yaml +++ /dev/null @@ -1,41 +0,0 @@ -# -# -*- coding: utf-8 -*- -# -# Copyright (c) 2022 Intel Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -version: 1.0 - -model: - name: distilbert_base - framework: tensorflow_itex - -device: gpu # optional. set cpu if installed intel-extension-for-tensorflow[cpu], set gpu if installed intel-extension-for-tensorflow[gpu]. - -quantization: - calibration: - sampling_size: 500 - model_wise: - weight: - granularity: per_channel - -tuning: - accuracy_criterion: - relative: 0.02 - exit_policy: - timeout: 0 - max_trials: 100 - performance_only: False - random_seed: 9527 diff --git a/examples/tensorflow/nlp/distilbert_base/quantization/ptq/run_benchmark.sh b/examples/tensorflow/nlp/distilbert_base/quantization/ptq/run_benchmark.sh index f02470b85c3..733b2be988e 100644 --- a/examples/tensorflow/nlp/distilbert_base/quantization/ptq/run_benchmark.sh +++ b/examples/tensorflow/nlp/distilbert_base/quantization/ptq/run_benchmark.sh @@ -14,20 +14,18 @@ function init_params { # set default value input_model="./distilbert_base_fp32.pb" dataset_location="./sst2_validation_dataset" - mode="accuracy" + mode="performance" batch_size=128 max_seq_length=128 iters=872 warmup_steps=10 num_inter=2 num_intra=28 + benchmark=True for var in "$@" do case $var in - --topology=*) - topology=$(echo $var |cut -f2 -d=) - ;; --input_model=*) input_model=$(echo "$var" |cut -f2 -d=) ;; @@ -55,9 +53,8 @@ function init_params { --num_intra=*) num_intra=$(echo ${var} |cut -f2 -d=) ;; - *) - echo "Parameter error: ${var}" - exit 1 + --benchmark=*) + benchmark=$(echo ${var} |cut -f2 -d=) ;; esac done @@ -70,6 +67,7 @@ function run_benchmark { python run_inference.py \ --in-graph=${input_model} \ --data-location=${dataset_location} \ + --benchmark=${benchmark} \ --mode=${mode} \ --steps=${iters} \ --warmup-steps=${warmup_steps} \ diff --git a/examples/tensorflow/nlp/distilbert_base/quantization/ptq/run_inference.py b/examples/tensorflow/nlp/distilbert_base/quantization/ptq/run_inference.py index 92fcaf5338d..5d854a0c64d 100644 --- a/examples/tensorflow/nlp/distilbert_base/quantization/ptq/run_inference.py +++ b/examples/tensorflow/nlp/distilbert_base/quantization/ptq/run_inference.py @@ -1,3 +1,4 @@ + # # -*- coding: utf-8 -*- # @@ -25,7 +26,6 @@ from transformers import AutoTokenizer from datasets import load_from_disk from tensorflow.core.protobuf import saved_model_pb2 -from neural_compressor.experimental import Quantization, common from neural_compressor.utils.utility import dump_elapsed_time from neural_compressor.utils import logger @@ -55,10 +55,20 @@ default="output_distilbert_base_int8.pb" ) arg_parser.add_argument("-m", "--mode", type=str, - choices=["benchmark", "accuracy", "tune"], - help="One of three options: 'benchmark'/'accuracy'/'tune'.", + choices=['performance', 'accuracy'], + help="One of two options: 'performance'/'accuracy'.", dest="mode", - default="tune" + default="performance" + ) +arg_parser.add_argument("--tune", type=bool, + help="whether to apply quantization", + dest="tune", + default=False + ) +arg_parser.add_argument("--benchmark", type=bool, + help="whether to do benchmark", + dest="benchmark", + default=False ) arg_parser.add_argument('-e', "--num-inter-threads", type=int, help="The number of inter-thread.", @@ -181,9 +191,9 @@ def validate_args(self): logger.warning("Warmup steps greater than max possible value of 22." + \ " Setting to max value of ", MAX_WARMUP_STEPS) ARGS.warmup_steps = MAX_WARMUP_STEPS - if ARGS.mode in ["tune", "accuracy"]: + if ARGS.tune or (ARGS.benchmark and ARGS.mode == "accuracy"): ARGS.steps = MAX_STEPS - elif ARGS.mode == "benchmark": + elif ARGS.benchmark: if ARGS.steps > (MAX_STEPS - MAX_WARMUP_STEPS): logger.warning("Steps greater than max possible value of {}.".format(MAX_STEPS - MAX_WARMUP_STEPS)) logger.warning("Setting to max value of {}".format(MAX_STEPS - MAX_WARMUP_STEPS)) @@ -244,11 +254,11 @@ def eval_func(self, graph): start_time = time.time() pred = sess.run(output, feed_dict=feed_dict) run_time = time.time() - start_time - if ARGS.mode in ["tune", "accuracy"]: + if ARGS.tune or (ARGS.benchmark and ARGS.mode == "accuracy"): total_correct_predictions += self.get_correct_predictions(pred, labels) total_time += run_time time_per_batch = total_time / float(ARGS.steps / ARGS.batch_size) - if ARGS.mode in ["tune", "accuracy"]: + if ARGS.tune or (ARGS.benchmark and ARGS.mode == "accuracy"): accuracy = total_correct_predictions / ARGS.steps logger.info("Accuracy: {:.4f}".format(accuracy)) if self.dataloader.batch_size == 1: @@ -258,18 +268,28 @@ def eval_func(self, graph): def run(self): graph = self.load_graph() - if ARGS.mode == "tune": - quantizer = Quantization(ARGS.config) - quantizer.calib_dataloader = self.dataloader - quantizer.model = common.Model(graph) - quantizer.eval_func = self.eval_func - q_model = quantizer.fit() + if ARGS.tune: + from neural_compressor import quantization + from neural_compressor.config import PostTrainingQuantConfig, AccuracyCriterion + accuracy_criterion = AccuracyCriterion(tolerable_loss=0.02) + config = PostTrainingQuantConfig(calibration_sampling_size=[500], + accuracy_criterion=accuracy_criterion) + q_model = quantization.fit(model=graph, conf=config, calib_dataloader=self.dataloader, + eval_func=self.eval_func) try: q_model.save(ARGS.output_graph) except Exception as e: tf.compat.v1.logging.error("Failed to save model due to {}".format(str(e))) - else: - self.eval_func(graph) + elif ARGS.benchmark: + assert ARGS.mode == 'performance' or ARGS.mode == 'accuracy', \ + "Benchmark only supports performance or accuracy mode." + from neural_compressor.benchmark import fit + from neural_compressor.config import BenchmarkConfig + if ARGS.mode == 'performance': + conf = BenchmarkConfig(cores_per_instance=28, num_of_instance=1) + fit(graph, conf, b_func=self.eval_func) + elif ARGS.mode == 'accuracy': + self.eval_func(graph) if __name__ == "__main__": diff --git a/examples/tensorflow/nlp/distilbert_base/quantization/ptq/run_tuning.sh b/examples/tensorflow/nlp/distilbert_base/quantization/ptq/run_tuning.sh index 7695c3a1151..1a9b8dea7e5 100644 --- a/examples/tensorflow/nlp/distilbert_base/quantization/ptq/run_tuning.sh +++ b/examples/tensorflow/nlp/distilbert_base/quantization/ptq/run_tuning.sh @@ -15,19 +15,16 @@ function init_params { input_model="./distilbert_base_fp32.pb" dataset_location="./sst2_validation_dataset" output_model="./output_distilbert_base_int8.pb" - config="./distilbert_base.yaml" batch_size=128 max_seq_length=128 warmup_steps=10 num_inter=2 num_intra=28 + tune=True for var in "$@" do case $var in - --topology=*) - topology=$(echo $var |cut -f2 -d=) - ;; --input_model=*) input_model=$(echo ${var} |cut -f2 -d=) ;; @@ -37,9 +34,6 @@ function init_params { --output_model=*) output_model=$(echo ${var} |cut -f2 -d=) ;; - --config=*) - config=$(echo ${var} |cut -f2 -d=) - ;; --batch_size=*) batch_size=$(echo ${var} |cut -f2 -d=) ;; @@ -55,9 +49,8 @@ function init_params { --num_intra=*) num_intra=$(echo ${var} |cut -f2 -d=) ;; - *) - echo "Parameter error: ${var}" - exit 1 + --tune=*) + tune=$(echo ${var} |cut -f2 -d=) ;; esac done @@ -71,8 +64,7 @@ function run_tuning { --in-graph=${input_model} \ --data-location=${dataset_location} \ --output-graph=${output_model} \ - --config=${config} \ - --mode=tune \ + --tune=${tune} \ --warmup-steps=${warmup_steps} \ --batch-size=${batch_size} \ --max-seq-length=${max_seq_length} \ diff --git a/examples/tensorflow/nlp/transformer_lt/quantization/ptq/README.md b/examples/tensorflow/nlp/transformer_lt/quantization/ptq/README.md index 1510a4daff2..d0de7dd592a 100644 --- a/examples/tensorflow/nlp/transformer_lt/quantization/ptq/README.md +++ b/examples/tensorflow/nlp/transformer_lt/quantization/ptq/README.md @@ -11,9 +11,9 @@ This document is used to list steps of reproducing TensorFlow Intel® Neural Com pip install neural-compressor ``` -### 2. Install Intel Tensorflow +### 2. Install Tensorflow ```shell -pip install intel-tensorflow +pip install tensorflow ``` > Note: Supported Tensorflow [Version](../../../../../../README.md#supported-frameworks). @@ -25,7 +25,8 @@ Intel Extension for Tensorflow is mandatory to be installed for quantizing the m ```shell pip install --upgrade intel-extension-for-tensorflow[gpu] ``` -For any more details, please follow the procedure in [install-gpu-drivers](https://github.com/intel-innersource/frameworks.ai.infrastructure.intel-extension-for-tensorflow.intel-extension-for-tensorflow/blob/master/docs/install/install_for_gpu.md#install-gpu-drivers) +Please refer to the [Installation Guides](https://dgpu-docs.intel.com/installation-guides/ubuntu/ubuntu-focal-dc.html) for latest Intel GPU driver installation. +For any more details, please follow the procedure in [install-gpu-drivers](https://github.com/intel-innersource/frameworks.ai.infrastructure.intel-extension-for-tensorflow.intel-extension-for-tensorflow/blob/master/docs/install/install_for_gpu.md#install-gpu-drivers). #### Quantizing the model on Intel CPU(Experimental) Intel Extension for Tensorflow for Intel CPUs is experimental currently. It's not mandatory for quantizing the model on Intel CPUs. @@ -56,7 +57,7 @@ bash prepare_dataset_model.sh ## Run Command ```shell -python main.py --input_graph=/path/to/fp32_graphdef.pb --inputs_file=/path/to/newstest2014.en --reference_file=/path/to/newstest2014.de --vocab_file=/path/to/vocab.txt --config=./transformer_lt.yaml --tune +python main.py --input_graph=/path/to/fp32_graphdef.pb --inputs_file=/path/to/newstest2014.en --reference_file=/path/to/newstest2014.de --vocab_file=/path/to/vocab.txt --tune ``` Details of enabling Intel® Neural Compressor on transformer-lt for Tensorflow. @@ -76,66 +77,66 @@ Below dataset class uses getitem to provide the model with input. ```python class Dataset(object): - def __init__(self, *args): - # initialize dataset related info here - ... + def __init__(self, *args): + # initialize dataset related info here + ... - def __getitem__(self, index): - data = self.batch[index] - label = self.ref_lines[index] - return data[0], label + def __getitem__(self, index): + data = self.batch[index] + label = self.ref_lines[index] + return data[0], label - def __len__(self): - return len(self.batch) + def __len__(self): + return len(self.batch) ``` ### Evaluation Part Adaption We evaluate the model with BLEU score, its source: https://github.com/tensorflow/tensor2tensor/blob/master/tensor2tensor/utils/bleu_hook.py -### Write Yaml config file -In examples directory, there is a transformer_lt.yaml for tuning the model on Intel CPUs. The 'framework' in the yaml is set to 'tensorflow'. If running this example on Intel GPUs, the 'framework' should be set to 'tensorflow_itex' and the device in yaml file should be set to 'gpu'. The transformer_lt_itex.yaml is prepared for the GPU case. We could remove most of items and only keep mandatory item for tuning. We also implement a calibration dataloader and have evaluation field for creation of evaluation function at internal neural_compressor. - -```yaml -model: - name: transformer_lt - framework: tensorflow - inputs: input_tensor - outputs: model/Transformer/strided_slice_19 - -device: cpu # optional. default value is cpu, other value is gpu. - -quantization: - calibration: - sampling_size: 500 - model_wise: - weight: - granularity: per_channel - -tuning: - accuracy_criterion: - relative: 0.01 - exit_policy: - timeout: 0 - max_trials: 100 - random_seed: 9527 +### Quantization Config +The Quantization Config class has default parameters setting for running on Intel CPUs. If running this example on Intel GPUs, the 'backend' parameter should be set to 'itex' and the 'device' parameter should be set to 'gpu'. + +``` +config = PostTrainingQuantConfig( + device="gpu", + backend="itex", + inputs=['input_tensor'], + outputs=['model/Transformer/strided_slice_19'], + ... + ) ``` -Here we set the input tensor and output tensors name into *inputs* and *outputs* field. +Here we set the input tensor and output tensors name into *inputs* and *outputs* args. In this case we calibrate and quantize the model, and use our calibration dataloader initialized from a 'Dataset' object. ### Code update After prepare step is done, we add tune code to generate quantized model. +#### Tune ```python - from neural_compressor.experimental import Quantization - from neural_compressor.adaptor.tf_utils.util import write_graph - quantizer = Quantization(FLAGS.config) - ds = Dataset(FLAGS.inputs_file, FLAGS.reference_file, FLAGS.vocab_file) - quantizer.calib_dataloader = common.DataLoader(ds, collate_fn=collate_fn, batch_size=FLAGS.batch_size) - quantizer.model = common.Model(graph) - quantizer.eval_func = eval_func - q_model = quantizer.fit() - q_model.save(FLAGS.output_model) + from neural_compressor import quantization + from neural_compressor.data.dataloaders.dataloader import DataLoader + from neural_compressor.config import PostTrainingQuantConfig + ds = Dataset(FLAGS.inputs_file, FLAGS.reference_file, FLAGS.vocab_file) + calib_dataloader = DataLoader(dataset=ds, collate_fn=collate_fn, \ + batch_size=FLAGS.batch_size, framework='tensorflow') + conf = PostTrainingQuantConfig(inputs=['input_tensor'], + outputs=['model/Transformer/strided_slice_19'], + calibration_sampling_size=[500]) + q_model = quantization.fit(graph, conf=conf, calib_dataloader=calib_dataloader, + eval_func=eval_func) + try: + q_model.save(FLAGS.output_model) + except Exception as e: + print("Failed to save model due to {}".format(str(e))) ``` - -The Intel® Neural Compressor quantizer.fit() function will return a best quantized model under time constraint. +#### Benchmark +```python + from neural_compressor.benchmark import fit + from neural_compressor.config import BenchmarkConfig + if FLAGS.mode == 'performance': + fit(graph, conf=BenchmarkConfig(), b_func=eval_func) + elif FLAGS.mode == 'accuracy': + eval_func(graph) +``` +The Intel® Neural Compressor quantization.fit() function will return a best quantized model under time constraint. diff --git a/examples/tensorflow/nlp/transformer_lt/quantization/ptq/main.py b/examples/tensorflow/nlp/transformer_lt/quantization/ptq/main.py index 6f1949c5cd4..5761ee51732 100644 --- a/examples/tensorflow/nlp/transformer_lt/quantization/ptq/main.py +++ b/examples/tensorflow/nlp/transformer_lt/quantization/ptq/main.py @@ -1,7 +1,7 @@ # # -*- coding: utf-8 -*- # -# Copyright (c) 2021 Intel Corporation +# Copyright (c) 2022 Intel Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -49,16 +49,19 @@ flags.DEFINE_string("vocab_file", None, "Path to subtoken vocabulary file.") -flags.DEFINE_string("config", None, - "Config json file") - flags.DEFINE_string("output_model", None, "The output model of the quantized model.") -flags.DEFINE_string("mode", "tune", - "One of three options: 'benchmark'/'accuracy'/'tune'.") +flags.DEFINE_bool('tune', False, + 'whether to tune the model') + +flags.DEFINE_bool('benchmark', False, + 'whether to benchmark the model') + +flags.DEFINE_string("mode", 'performance', + "One of three options: 'performance'/'accuracy'.") -flags.DEFINE_integer("iters", -1, +flags.DEFINE_integer("iters", 100, "The iteration used for benchmark.") class UnicodeRegex(object): @@ -145,13 +148,16 @@ def eval_func(infer_graph, iteration=-1): config.use_per_session_threads = 1 config.inter_op_parallelism_threads = 1 sess = tf.compat.v1.Session(graph=infer_graph, config=config) + iteration=-1 time_list = [] bleu_eval = bleu() predictions = [] labels = [] warmup = 10 - if iteration != -1: - assert iteration >= warmup, 'iteration must be larger than warmup' + if FLAGS.benchmark and FLAGS.mode == 'performance': + iteration = FLAGS.iters + assert iteration >= warmup, 'iteration must be larger than warmup' + for idx, (input_data, label) in enumerate(dataloader): if idx < iteration or iteration == -1: time_start = time.time() @@ -162,10 +168,12 @@ def eval_func(infer_graph, iteration=-1): labels.extend(label) else: break + latency = np.array(time_list[warmup: ]).mean() / FLAGS.batch_size - print('Batch size = {}'.format(FLAGS.batch_size)) - print('Latency: {:.3f} ms'.format(latency * 1000)) - print('Throughput: {:.3f} items/sec'.format(1./ latency)) + if FLAGS.benchmark and FLAGS.mode == 'performance': + print('Batch size = {}'.format(FLAGS.batch_size)) + print('Latency: {:.3f} ms'.format(latency * 1000)) + print('Throughput: {:.3f} items/sec'.format(1./ latency)) # only calculate accuracy when running out all predictions if iteration == -1: @@ -224,23 +232,33 @@ def __len__(self): def main(_): graph = load_graph(FLAGS.input_graph) - if FLAGS.mode == 'tune': - from neural_compressor.experimental import Quantization, common - quantizer = Quantization(FLAGS.config) + if FLAGS.tune: + from neural_compressor import quantization + from neural_compressor.data.dataloaders.dataloader import DataLoader + from neural_compressor.config import PostTrainingQuantConfig ds = Dataset(FLAGS.inputs_file, FLAGS.reference_file, FLAGS.vocab_file) - quantizer.calib_dataloader = common.DataLoader(ds, collate_fn=collate_fn, \ - batch_size=FLAGS.batch_size) - quantizer.model = common.Model(graph) - quantizer.eval_func = eval_func - q_model = quantizer.fit() + calib_dataloader = DataLoader(dataset=ds, collate_fn=collate_fn, \ + batch_size=FLAGS.batch_size, framework='tensorflow') + conf = PostTrainingQuantConfig(inputs=['input_tensor'], + outputs=['model/Transformer/strided_slice_19'], + calibration_sampling_size=[500]) + q_model = quantization.fit(graph, conf=conf, calib_dataloader=calib_dataloader, + eval_func=eval_func) try: q_model.save(FLAGS.output_model) except Exception as e: print("Failed to save model due to {}".format(str(e))) - elif FLAGS.mode == 'benchmark': - eval_func(graph, FLAGS.iters) - elif FLAGS.mode == 'accuracy': - eval_func(graph, -1) + + if FLAGS.benchmark: + assert FLAGS.mode == 'performance' or FLAGS.mode == 'accuracy', \ + "Benchmark only supports performance or accuracy mode." + from neural_compressor.benchmark import fit + from neural_compressor.config import BenchmarkConfig + if FLAGS.mode == 'performance': + conf = BenchmarkConfig(cores_per_instance=28, num_of_instance=1) + fit(graph, conf, b_func=eval_func) + elif FLAGS.mode == 'accuracy': + eval_func(graph) if __name__ == "__main__": tf.compat.v1.app.run() diff --git a/examples/tensorflow/nlp/transformer_lt/quantization/ptq/run_benchmark.sh b/examples/tensorflow/nlp/transformer_lt/quantization/ptq/run_benchmark.sh index da7f6fc5f5f..87bc4c7d5c1 100644 --- a/examples/tensorflow/nlp/transformer_lt/quantization/ptq/run_benchmark.sh +++ b/examples/tensorflow/nlp/transformer_lt/quantization/ptq/run_benchmark.sh @@ -15,9 +15,6 @@ function init_params { for var in "$@" do case $var in - --topology=*) - topology=$(echo $var |cut -f2 -d=) - ;; --dataset_location=*) dataset_location=$(echo "$var" |cut -f2 -d=) ;; @@ -33,10 +30,6 @@ function init_params { --iters=*) iters=$(echo ${var} |cut -f2 -d=) ;; - *) - echo "Error: No such parameter: ${var}" - exit 1 - ;; esac done @@ -46,8 +39,8 @@ function define_mode { if [[ ${mode} == "accuracy" ]]; then mode="accuracy" - elif [[ ${mode} == "benchmark" ]]; then - mode="benchmark" + elif [[ ${mode} == "performance" ]]; then + mode="performance" else echo "Error: No such mode: ${mode}" exit 1 @@ -56,13 +49,12 @@ function define_mode { # run_benchmark function run_benchmark { - config=$topology'.yaml' python main.py \ --input_graph=${input_model} \ --inputs_file=${dataset_location}/newstest2014.en \ --reference_file=${dataset_location}/newstest2014.de \ --vocab_file=${dataset_location}/vocab.txt \ - --config=${config} \ + --benchmark \ --mode=${mode} \ --iters=${iters} \ --batch_size=${batch_size} diff --git a/examples/tensorflow/nlp/transformer_lt/quantization/ptq/run_tuning.sh b/examples/tensorflow/nlp/transformer_lt/quantization/ptq/run_tuning.sh index ed210ea91eb..2f2075cf346 100644 --- a/examples/tensorflow/nlp/transformer_lt/quantization/ptq/run_tuning.sh +++ b/examples/tensorflow/nlp/transformer_lt/quantization/ptq/run_tuning.sh @@ -11,13 +11,9 @@ function main { # init params function init_params { - for var in "$@" do case $var in - --topology=*) - topology=$(echo $var |cut -f2 -d=) - ;; --dataset_location=*) dataset_location=$(echo "$var" |cut -f2 -d=) ;; @@ -34,15 +30,13 @@ function init_params { # run_tuning function run_tuning { - config=$topology'.yaml' python main.py \ --input_graph=${input_model} \ --inputs_file=${dataset_location}/newstest2014.en \ --reference_file=${dataset_location}/newstest2014.de \ --vocab_file=${dataset_location}/vocab.txt \ - --config=${config} \ --output_model=${output_model} \ - --mode=tune + --tune } main "$@" diff --git a/examples/tensorflow/nlp/transformer_lt/quantization/ptq/transformer_lt.yaml b/examples/tensorflow/nlp/transformer_lt/quantization/ptq/transformer_lt.yaml deleted file mode 100644 index f8de7826c45..00000000000 --- a/examples/tensorflow/nlp/transformer_lt/quantization/ptq/transformer_lt.yaml +++ /dev/null @@ -1,39 +0,0 @@ -# -# Copyright (c) 2021 Intel Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -version: 1.0 - -model: - name: transformer_lt - framework: tensorflow - inputs: input_tensor - outputs: model/Transformer/strided_slice_19 - -device: cpu # optional. default value is cpu, other value is gpu. - -quantization: - calibration: - sampling_size: 500 - model_wise: - weight: - granularity: per_channel - -tuning: - accuracy_criterion: - relative: 0.01 - exit_policy: - timeout: 0 - max_trials: 100 - random_seed: 9527 diff --git a/examples/tensorflow/nlp/transformer_lt/quantization/ptq/transformer_lt_itex.yaml b/examples/tensorflow/nlp/transformer_lt/quantization/ptq/transformer_lt_itex.yaml deleted file mode 100644 index 1e285c55dd9..00000000000 --- a/examples/tensorflow/nlp/transformer_lt/quantization/ptq/transformer_lt_itex.yaml +++ /dev/null @@ -1,37 +0,0 @@ -# -# Copyright (c) 2021 Intel Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -model: - name: transformer_lt - framework: tensorflow_itex - inputs: input_tensor - outputs: model/Transformer/strided_slice_19 - -device: gpu # optional. set cpu if installed intel-extension-for-tensorflow[cpu], set gpu if installed intel-extension-for-tensorflow[gpu]. - -quantization: - calibration: - sampling_size: 500 - model_wise: - weight: - granularity: per_channel - -tuning: - accuracy_criterion: - relative: 0.01 - exit_policy: - timeout: 0 - max_trials: 100 - random_seed: 9527 diff --git a/examples/tensorflow/nlp/transformer_lt_mlperf/quantization/ptq/README.md b/examples/tensorflow/nlp/transformer_lt_mlperf/quantization/ptq/README.md index a8789bbd37c..730feb89cb0 100644 --- a/examples/tensorflow/nlp/transformer_lt_mlperf/quantization/ptq/README.md +++ b/examples/tensorflow/nlp/transformer_lt_mlperf/quantization/ptq/README.md @@ -26,7 +26,8 @@ Intel Extension for Tensorflow is mandatory to be installed for quantizing the m ```shell pip install --upgrade intel-extension-for-tensorflow[gpu] ``` -For any more details, please follow the procedure in [install-gpu-drivers](https://github.com/intel-innersource/frameworks.ai.infrastructure.intel-extension-for-tensorflow.intel-extension-for-tensorflow/blob/master/docs/install/install_for_gpu.md#install-gpu-drivers) +Please refer to the [Installation Guides](https://dgpu-docs.intel.com/installation-guides/ubuntu/ubuntu-focal-dc.html) for latest Intel GPU driver installation. +For any more details, please follow the procedure in [install-gpu-drivers](https://github.com/intel-innersource/frameworks.ai.infrastructure.intel-extension-for-tensorflow.intel-extension-for-tensorflow/blob/master/docs/install/install_for_gpu.md#install-gpu-drivers). #### Quantizing the model on Intel CPU(Experimental) Intel Extension for Tensorflow for Intel CPUs is experimental currently. It's not mandatory for quantizing the model on Intel CPUs. @@ -49,7 +50,6 @@ bash run_tuning.sh \ --dataset_location=$DATASET_DIR \ --output_model=$OUTPUT_MODEL \ --file_out=$OUTPUT_TRANSLATION_FILE \ - --config=$CONFIG_FILE \ --batch_size=$BATCH_SIZE \ --warmup_steps=$WARMUPS \ --bleu_variant=$VARIANT \ @@ -58,12 +58,12 @@ bash run_tuning.sh \ ``` ### Run Benchmark: ``` -# benchmark mode: only get performance +# performance mode: get performance bash run_benchmark.sh \ --input_model=$INPUT_MODEL \ --dataset_location=$DATASET_DIR \ --file_out=$OUTPUT_TRANSLATION_FILE \ - --mode=benchmark \ + --mode=performance \ --batch_size=$BATCH_SIZE \ --iters=$ITERATIONS \ --warmup_steps=$WARMUPS \ @@ -73,7 +73,7 @@ bash run_benchmark.sh \ ``` ``` -# accuracy mode: get performance and accuracy +# accuracy mode: get accuracy bash run_benchmark.sh \ --input_model=$INPUT_MODEL \ --dataset_location=$DATASET_DIR \ @@ -91,7 +91,6 @@ Where (Default values are shown in the square brackets): * $DATASET_DIR ["./transformer_uniform_data"]-- The path to input dataset directory, which should include newstest2014.en, newstest2014.de and vocab.ende.32768 * $OUTPUT_MODEL ["./output_transformer_mlperf_int8.pb"]-- The user-specified export path to the output INT8 quantized model * $OUTPUT_TRANSLATION_FILE ["./output_translation_result.txt"] -- The file path to save model translation result, only the most recent translation result will be saved - * $CONFIG_FILE ["./transformer_lt_mlperf.yaml"]-- The path to quantization configuration .yaml file to load for tuning * $BATCH_SIZE [64]-- The batch size for model inference * $ITERATIONS [-1]-- The user-defined total inference iterations in benchmark mode. If it is -1, it means to run the entire dataset * $WARMUPS [10]-- The number of warmup steps before benchmarking the model @@ -105,7 +104,7 @@ Details of enabling Intel® Neural Compressor on Transformer_LT_mlperf for Tenso This is a tutorial of how to enable Transformer_LT_mlperf model with Intel® Neural Compressor. ## User Code Analysis -1. User specifies fp32 *model*, calibration dataset *q_dataloader*, evaluation dataset *eval_dataloader* and metric in tuning.metric field of model-specific yaml config file. +1. User specifies fp32 *model*, calibration dataset *q_dataloader*, evaluation dataset *eval_dataloader* and metric. 2. User specifies fp32 *model*, calibration dataset *q_dataloader* and a custom *eval_func* which encapsulates the evaluation dataset and metric by itself. @@ -131,51 +130,54 @@ class Dataset(object): ### Evaluation Part Adaption We evaluate the model with BLEU score, its source: https://github.com/IntelAI/models/blob/master/models/language_translation/tensorflow/transformer_mlperf/inference/fp32/transformer/compute_bleu.py -### Write Yaml config file -In examples directory, there is a transformer_lt_mlperf.yaml for tuning the model on Intel CPUs. The 'framework' in the yaml is set to 'tensorflow'. If running this example on Intel GPUs, the 'framework' should be set to 'tensorflow_itex' and the device in yaml file should be set to 'gpu'. The transformer_lt_mlperf_itex.yaml is prepared for the GPU case. We could remove most of items and only keep mandatory item for tuning. We also implement a calibration dataloader and have evaluation field for creation of evaluation function at internal neural_compressor. - -```yaml -model: - name: transformer_lt_mlperf - framework: tensorflow - inputs: input_tokens - outputs: model/Transformer/strided_slice_15 - -device: cpu # optional. default value is cpu, other value is gpu. - -quantization: - calibration: - sampling_size: 500 - model_wise: - weight: - granularity: per_channel - -tuning: - accuracy_criterion: - relative: 0.01 - exit_policy: - timeout: 0 - max_trials: 100 - random_seed: 9527 +### Quantization Config +The Quantization Config class has default parameters setting for running on Intel CPUs. If running this example on Intel GPUs, the 'backend' parameter should be set to 'itex' and the 'device' parameter should be set to 'gpu'. + +``` +config = PostTrainingQuantConfig( + device="gpu", + backend="itex", + inputs=['input_tokens'], + outputs=['model/Transformer/strided_slice_15'], + ... + ) ``` -Here we set the input tensor and output tensors name into *inputs* and *outputs* field. +Here we set the input tensor and output tensors name into *inputs* and *outputs* args. In this case we calibrate and quantize the model, and use our calibration dataloader initialized from a 'Dataset' object. ### Code update After prepare step is done, we add tune code to generate quantized model. +#### Tune ```python -from neural_compressor.experimental import Quantization, common -quantizer = Quantization(FLAGS.config) -dataset = Dataset(FLAGS.input_file, FLAGS.vocab_file) -quantizer.calib_dataloader = common.DataLoader(dataset, - collate_fn = collate_fn, - batch_size = FLAGS.batch_size) -quantizer.model = common.Model(graph) -quantizer.eval_func = eval_func -q_model = quantizer.fit() -q_model.save(FLAGS.output_model) + graph = load_graph(FLAGS.input_graph) + from neural_compressor import quantization + from neural_compressor.config import PostTrainingQuantConfig + from neural_compressor.data.dataloaders.dataloader import DataLoader + dataset = Dataset(FLAGS.input_file, FLAGS.vocab_file) + calib_dataloader = DataLoader(dataset = dataset, + framework ='tensorflow', + collate_fn = collate_fn, + batch_size = FLAGS.batch_size) + + conf = PostTrainingQuantConfig(inputs=['input_tokens'], + outputs=['model/Transformer/strided_slice_15'], + calibration_sampling_size=[500]) + q_model = quantization.fit(graph, conf=conf, calib_dataloader=calib_dataloader, + eval_func=eval_func) + try: + q_model.save(FLAGS.output_model) + except Exception as e: + tf.compat.v1.logging.error("Failed to save model due to {}".format(str(e))) ``` - -The Intel® Neural Compressor quantizer.fit() function will return a best quantized model under time constraint. +#### Benchmark +```python + from neural_compressor.benchmark import fit + from neural_compressor.config import BenchmarkConfig + if FLAGS.mode == 'performance': + fit(graph, conf=BenchmarkConfig(), b_func=eval_func) + elif FLAGS.mode == 'accuracy': + eval_func(graph) +``` +The Intel® Neural Compressor quantization.fit() function will return a best quantized model under time constraint. \ No newline at end of file diff --git a/examples/tensorflow/nlp/transformer_lt_mlperf/quantization/ptq/run_benchmark.sh b/examples/tensorflow/nlp/transformer_lt_mlperf/quantization/ptq/run_benchmark.sh index a2b27dfb5f8..65a8db67a23 100644 --- a/examples/tensorflow/nlp/transformer_lt_mlperf/quantization/ptq/run_benchmark.sh +++ b/examples/tensorflow/nlp/transformer_lt_mlperf/quantization/ptq/run_benchmark.sh @@ -12,11 +12,9 @@ function main { # init params function init_params { # set default value - topology="transformer_lt_mlperf" input_model="./transformer_mlperf_fp32.pb" dataset_location="./transformer_uniform_data" file_out="./output_translation_result.txt" - mode="accuracy" batch_size=64 iters=-1 warmup_steps=10 @@ -27,9 +25,6 @@ function init_params { for var in "$@" do case $var in - --topology=*) - topology=$(echo $var |cut -f2 -d=) - ;; --input_model=*) input_model=$(echo "$var" |cut -f2 -d=) ;; @@ -60,10 +55,6 @@ function init_params { --num_intra=*) num_intra=$(echo ${var} |cut -f2 -d=) ;; - *) - echo "Parameter error: ${var}" - exit 1 - ;; esac done @@ -78,6 +69,7 @@ function run_benchmark { --reference_file=${dataset_location}/newstest2014.de \ --vocab_file=${dataset_location}/vocab.ende.32768 \ --file_out=${file_out} \ + --benchmark \ --mode=${mode} \ --iters=${iters} \ --warmup_steps=${warmup_steps} \ diff --git a/examples/tensorflow/nlp/transformer_lt_mlperf/quantization/ptq/run_inference.py b/examples/tensorflow/nlp/transformer_lt_mlperf/quantization/ptq/run_inference.py index 55b654e0b64..98c7e5e68d8 100644 --- a/examples/tensorflow/nlp/transformer_lt_mlperf/quantization/ptq/run_inference.py +++ b/examples/tensorflow/nlp/transformer_lt_mlperf/quantization/ptq/run_inference.py @@ -1,7 +1,7 @@ #!/usr/bin/env python # -*- coding: utf-8 -*- # -# Copyright (c) 2021 Intel Corporation +# Copyright (c) 2022 Intel Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -31,7 +31,6 @@ from utils import tokenizer from utils.tokenizer import Subtokenizer from utils import metrics -from neural_compressor.experimental import Quantization, common from neural_compressor.data import DATALOADERS from neural_compressor.utils.utility import dump_elapsed_time from neural_compressor.utils import logger @@ -40,48 +39,63 @@ INPUT_TENSOR_NAMES = ['input_tokens:0'] OUTPUT_TENSOR_NAMES = ["model/Transformer/strided_slice_15:0"] -flags.DEFINE_string( - "config", "transformer_lt_mlperf.yaml", - """Quantization configuration file to load.""") flags.DEFINE_string( "input_graph", "transformer_mlperf_fp32.pb", """TensorFlow 'GraphDef' file to load.""") + flags.DEFINE_string( "output_model", "output_transformer_mlperf_int8.pb", """The output model of the quantized model.""") + flags.DEFINE_bool( "input_binary", True, """Whether the input files are in binary format.""") + flags.DEFINE_string( "vocab_file", "vocab.ende.32768", """Path to subtoken vocabulary file.""") + flags.DEFINE_string( "input_file", "newstest2014.en", """File containing text to translate.""") + flags.DEFINE_string( "reference_file", "newstest2014.de", """File containing reference translation.""") + flags.DEFINE_string( "file_out", "output_translation_result.txt", - """Save latest translation to this file when using 'accuracy'/'tune' mode.""") + """Save latest translation to this file when using 'accuracy' mode.""") + flags.DEFINE_integer( "batch_size", 64, """The validation batch size.""") + flags.DEFINE_integer( "num_inter", 2, """Number of inter op parallelism thread to use.""") + flags.DEFINE_integer( "num_intra", 28, """Number of intra op parallelism thread to use.""") + flags.DEFINE_integer( "warmup_steps", 10, """Number of warmup steps before benchmarking the model.""") + flags.DEFINE_integer( - "iters", -1, - "The iteration used for 'benchmark' mode.") -flags.DEFINE_string( - "mode", "tune", - """One of three options: 'benchmark'/'accuracy'/'tune'.""") + "iters", 100, + "The iteration used for performance mode.") + +flags.DEFINE_bool('tune', False, + 'whether to tune the model') + +flags.DEFINE_bool('benchmark', False, + 'whether to benchmark the model') + +flags.DEFINE_string("mode", None, + "One of three options: 'performance'/'accuracy'.") + flags.DEFINE_string( "bleu_variant", "uncased", """One of two options: 'uncased'/'cased'.""") @@ -203,8 +217,6 @@ def collate_fn(batch): @dump_elapsed_time(customized_msg="Customized eval_func") def eval_func(infer_graph): - assert FLAGS.mode in ["benchmark", "accuracy", "tune"], \ - "'mode' must be one of three options: 'benchmark'/'accuracy'/'tune'." dataset = Dataset(FLAGS.input_file, FLAGS.vocab_file) sorted_keys = dataset.sorted_keys dataloader = DATALOADERS['tensorflow'] \ @@ -215,47 +227,50 @@ def eval_func(infer_graph): session_config = tf.compat.v1.ConfigProto( inter_op_parallelism_threads = FLAGS.num_inter, intra_op_parallelism_threads = FLAGS.num_intra) + with tf.compat.v1.Session(config=session_config, graph=infer_graph) as sess: time_list = [] translations = [] warmup = FLAGS.warmup_steps if FLAGS.warmup_steps > 0 else 0 - iteration = FLAGS.iters \ - if FLAGS.iters > -1 and FLAGS.mode == "benchmark" else len(dataloader) - assert iteration != 0, \ - "'iteration' cannot be zero." - assert iteration >= warmup, \ + iteration = -1 + if FLAGS.benchmark and FLAGS.mode == "performance": + iteration = FLAGS.iters + assert iteration >= warmup, \ "'iteration' must be greater than or equal to warmup." - assert iteration <= len(dataloader), \ - "'iteration' must be less than or equal to len(dataloader)." - if FLAGS.mode == "benchmark": logger.info('******** Start to get performance of the model ********') else: logger.info('******** Start to get accuracy and performance of the model ********') + assert iteration != 0, \ + "'iteration' cannot be zero." + assert iteration <= len(dataloader), \ + "'iteration' must be less than or equal to len(dataloader)." + if warmup > 0: logger.info('Start to do warm-up with {}/{} (steps/total_iterations) before getting performance.' \ .format(warmup, iteration)) else: logger.info('Start to get performance with {} iterations.'.format(iteration)) for idx, (input_data, _) in enumerate(dataloader): - if idx < iteration: - if idx == warmup and warmup > 0: - logger.info('The warm-up is over.') - logger.info('Start to get performance with {}/{} (steps/total_iterations).' \ - .format(iteration - warmup, iteration)) - feed_dict = {input_tensors[0]: input_data} - time_start = time.time() - dec_tensor = sess.run(output_tensors, feed_dict) - duration = time.time() - time_start - time_list.append(duration) - translations.append(dec_tensor) - else: + if idx == warmup and warmup > 0: + logger.info('The warm-up is over.') + logger.info('Start to get performance with {}/{} (steps/total_iterations).' \ + .format(iteration - warmup, iteration)) + feed_dict = {input_tensors[0]: input_data} + time_start = time.time() + dec_tensor = sess.run(output_tensors, feed_dict) + duration = time.time() - time_start + time_list.append(duration) + translations.append(dec_tensor) + if iteration == idx + 1: break + latency = np.array(time_list[warmup:]).mean() / FLAGS.batch_size - logger.info('Batch-size = {}'.format(FLAGS.batch_size)) - logger.info('Latency: {:.3f} ms'.format(latency * 1000)) - logger.info('Throughput: {:.3f} items/sec'.format(1./ latency)) + if FLAGS.benchmark and FLAGS.mode == "performance": + logger.info('Batch-size = {}'.format(FLAGS.batch_size)) + logger.info('Latency: {:.3f} ms'.format(latency * 1000)) + logger.info('Throughput: {:.3f} items/sec'.format(1./ latency)) - if FLAGS.mode != "benchmark": + if FLAGS.mode != "performance": """Write translations to file and calculate BLEU score.""" translation_count = 0 decoded_translations=[] @@ -288,22 +303,37 @@ def eval_func(infer_graph): def main(unused_args): graph = load_graph(FLAGS.input_graph) - if FLAGS.mode == 'tune': - quantizer = Quantization(FLAGS.config) + if FLAGS.tune: + from neural_compressor import quantization + from neural_compressor.config import PostTrainingQuantConfig + from neural_compressor.data.dataloaders.dataloader import DataLoader dataset = Dataset(FLAGS.input_file, FLAGS.vocab_file) - quantizer.calib_dataloader = common.DataLoader(dataset, - collate_fn = collate_fn, - batch_size = FLAGS.batch_size) - quantizer.model = common.Model(graph) - quantizer.eval_func = eval_func - q_model = quantizer.fit() + calib_dataloader = DataLoader(dataset = dataset, + framework ='tensorflow', + collate_fn = collate_fn, + batch_size = FLAGS.batch_size) + + conf = PostTrainingQuantConfig(inputs=['input_tokens'], + outputs=['model/Transformer/strided_slice_15'], + calibration_sampling_size=[500]) + q_model = quantization.fit(graph, conf=conf, calib_dataloader=calib_dataloader, + eval_func=eval_func) try: q_model.save(FLAGS.output_model) except Exception as e: tf.compat.v1.logging.error("Failed to save model due to {}".format(str(e))) - else: - eval_func(graph) + if FLAGS.benchmark: + assert FLAGS.mode == 'performance' or FLAGS.mode == 'accuracy', \ + "Benchmark only supports performance or accuracy mode." + from neural_compressor.benchmark import fit + from neural_compressor.config import BenchmarkConfig + if FLAGS.mode == 'performance': + conf = BenchmarkConfig(cores_per_instance=28, num_of_instance=1) + fit(graph, conf, b_func=eval_func) + elif FLAGS.mode == 'accuracy': + eval_func(graph) + if __name__ == "__main__": app.run() diff --git a/examples/tensorflow/nlp/transformer_lt_mlperf/quantization/ptq/run_tuning.sh b/examples/tensorflow/nlp/transformer_lt_mlperf/quantization/ptq/run_tuning.sh index 3f27cea72f5..87fffb0f6c1 100644 --- a/examples/tensorflow/nlp/transformer_lt_mlperf/quantization/ptq/run_tuning.sh +++ b/examples/tensorflow/nlp/transformer_lt_mlperf/quantization/ptq/run_tuning.sh @@ -12,12 +12,10 @@ function main { # init params function init_params { # set default value - topology="transformer_lt_mlperf" input_model="./transformer_mlperf_fp32.pb" dataset_location="./transformer_uniform_data" output_model="./output_transformer_mlperf_int8.pb" file_out="./output_translation_result.txt" - config="./transformer_lt_mlperf.yaml" batch_size=64 warmup_steps=10 bleu_variant="uncased" @@ -27,9 +25,6 @@ function init_params { for var in "$@" do case $var in - --topology=*) - topology=$(echo $var |cut -f2 -d=) - ;; --input_model=*) input_model=$(echo ${var} |cut -f2 -d=) ;; @@ -42,9 +37,6 @@ function init_params { --file_out=*) file_out=$(echo ${var} |cut -f2 -d=) ;; - --config=*) - config=$(echo ${var} |cut -f2 -d=) - ;; --batch_size=*) batch_size=$(echo ${var} |cut -f2 -d=) ;; @@ -60,10 +52,6 @@ function init_params { --num_intra=*) num_intra=$(echo ${var} |cut -f2 -d=) ;; - *) - echo "Parameter error: ${var}" - exit 1 - ;; esac done @@ -79,8 +67,7 @@ function run_tuning { --vocab_file=${dataset_location}/vocab.ende.32768 \ --output_model=${output_model} \ --file_out=${file_out} \ - --config=${config} \ - --mode=tune \ + --tune \ --warmup_steps=${warmup_steps} \ --batch_size=${batch_size} \ --bleu_variant=${bleu_variant} \ diff --git a/examples/tensorflow/nlp/transformer_lt_mlperf/quantization/ptq/transformer_lt_mlperf.yaml b/examples/tensorflow/nlp/transformer_lt_mlperf/quantization/ptq/transformer_lt_mlperf.yaml deleted file mode 100644 index 8973f47e33d..00000000000 --- a/examples/tensorflow/nlp/transformer_lt_mlperf/quantization/ptq/transformer_lt_mlperf.yaml +++ /dev/null @@ -1,39 +0,0 @@ -# -# Copyright (c) 2021 Intel Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -version: 1.0 - -model: - name: transformer_lt_mlperf - framework: tensorflow - inputs: input_tokens - outputs: model/Transformer/strided_slice_15 - -device: cpu # optional. default value is cpu, other value is gpu. - -quantization: - calibration: - sampling_size: 500 - model_wise: - weight: - granularity: per_channel - -tuning: - accuracy_criterion: - relative: 0.01 - exit_policy: - timeout: 0 - max_trials: 100 - random_seed: 9527 diff --git a/examples/tensorflow/nlp/transformer_lt_mlperf/quantization/ptq/transformer_lt_mlperf_itex.yaml b/examples/tensorflow/nlp/transformer_lt_mlperf/quantization/ptq/transformer_lt_mlperf_itex.yaml deleted file mode 100644 index ead560394db..00000000000 --- a/examples/tensorflow/nlp/transformer_lt_mlperf/quantization/ptq/transformer_lt_mlperf_itex.yaml +++ /dev/null @@ -1,39 +0,0 @@ -# -# Copyright (c) 2021 Intel Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -version: 1.0 - -model: - name: transformer_lt_mlperf - framework: tensorflow_itex - inputs: input_tokens - outputs: model/Transformer/strided_slice_15 - -device: gpu # optional. set cpu if installed intel-extension-for-tensorflow[cpu], set gpu if installed intel-extension-for-tensorflow[gpu]. - -quantization: - calibration: - sampling_size: 500 - model_wise: - weight: - granularity: per_channel - -tuning: - accuracy_criterion: - relative: 0.01 - exit_policy: - timeout: 0 - max_trials: 100 - random_seed: 9527 diff --git a/examples/tensorflow/object_detection/yolo_v3/quantization/ptq/utils.py b/examples/tensorflow/object_detection/yolo_v3/quantization/ptq/utils.py deleted file mode 100644 index c8b3f417dfb..00000000000 --- a/examples/tensorflow/object_detection/yolo_v3/quantization/ptq/utils.py +++ /dev/null @@ -1,205 +0,0 @@ -import tensorflow as tf -import numpy as np - -from PIL import Image -from tensorflow.core.framework import graph_pb2 -from tensorflow.python.platform import gfile -from tensorflow.python.data.experimental import parallel_interleave -from tensorflow.python.data.experimental import map_and_batch - -def read_graph(input_graph): - if not gfile.Exists(input_graph): - print("Input graph file '" + input_graph + "' does not exist!") - exit(-1) - - input_graph_def = graph_pb2.GraphDef() - with gfile.Open(input_graph, "rb") as f: - data = f.read() - input_graph_def.ParseFromString(data) - - return input_graph_def - -def parse_and_preprocess(serialized_example): - # Dense features in Example proto. - feature_map = { - 'image/encoded': tf.compat.v1.FixedLenFeature([], dtype=tf.string, - default_value=''), - 'image/object/class/text': tf.compat.v1.VarLenFeature(dtype=tf.string), - 'image/source_id': tf.compat.v1.FixedLenFeature([], dtype=tf.string, - default_value=''), - } - sparse_float32 = tf.compat.v1.VarLenFeature(dtype=tf.float32) - # Sparse features in Example proto. - feature_map.update( - {k: sparse_float32 for k in ['image/object/bbox/xmin', - 'image/object/bbox/ymin', - 'image/object/bbox/xmax', - 'image/object/bbox/ymax']}) - - features = tf.compat.v1.parse_single_example(serialized_example, feature_map) - - xmin = tf.expand_dims(features['image/object/bbox/xmin'].values, 0) - ymin = tf.expand_dims(features['image/object/bbox/ymin'].values, 0) - xmax = tf.expand_dims(features['image/object/bbox/xmax'].values, 0) - ymax = tf.expand_dims(features['image/object/bbox/ymax'].values, 0) - - # Note that we impose an ordering of (y, x) just to make life difficult. - bbox = tf.concat([ymin, xmin, ymax, xmax], 0) - - # Force the variable number of bounding boxes into the shape - # [1, num_boxes, coords]. - bbox = tf.expand_dims(bbox, 0) - bbox = tf.transpose(bbox, [0, 2, 1]) - - encoded_image = features['image/encoded'] - image_tensor = tf.image.decode_image(encoded_image, channels=3) - image_tensor.set_shape([None, None, 3]) - - label = features['image/object/class/text'].values - - image_id = features['image/source_id'] - - return image_tensor, bbox[0], label, image_id - -def get_input(data_location, batch_size=1): - tfrecord_paths = [data_location] - ds = tf.data.TFRecordDataset.list_files(tfrecord_paths) - - ds = ds.apply( - parallel_interleave( - tf.data.TFRecordDataset, cycle_length=28, block_length=5, - sloppy=True, - buffer_output_elements=10000, prefetch_input_elements=10000)) - ds = ds.prefetch(buffer_size=10000) - ds = ds.apply( - map_and_batch( - map_func=parse_and_preprocess, - batch_size=batch_size, - num_parallel_batches=28, - num_parallel_calls=None)) - ds = ds.prefetch(buffer_size=tf.data.experimental.AUTOTUNE) - ds_iter = tf.compat.v1.data.make_one_shot_iterator(ds) - images, bbox, label, image_id = ds_iter.get_next() - - return images, bbox, label, image_id - -def letter_box_image(image, output_height, output_width, fill_value): - """ - Fit image with final image with output_width and output_height. - :param image: PILLOW Image object. - :param output_height: width of the final image. - :param output_width: height of the final image. - :param fill_value: fill value for empty area. Can be uint8 or np.ndarray - :return: numpy image fit within letterbox. dtype=uint8, shape=(output_height, output_width) - """ - - height_ratio = float(output_height) / image.size[1] - width_ratio = float(output_width) / image.size[0] - fit_ratio = min(width_ratio, height_ratio) - fit_height = int(image.size[1] * fit_ratio) - fit_width = int(image.size[0] * fit_ratio) - fit_image = np.asarray(image.resize((fit_width, fit_height), resample=Image.BILINEAR)) - - if isinstance(fill_value, int): - fill_value = np.full(fit_image.shape[2], fill_value, fit_image.dtype) - - to_return = np.tile(fill_value, (output_height, output_width, 1)) - pad_top = int(0.5 * (output_height - fit_height)) - pad_left = int(0.5 * (output_width - fit_width)) - to_return[pad_top: pad_top+fit_height, pad_left: pad_left+fit_width] = fit_image - - return to_return - -def _iou(box1, box2): - """ - Computes Intersection over Union value for 2 bounding boxes - - :param box1: array of 4 values (top left and bottom right coords): [x0, y0, x1, x2] - :param box2: same as box1 - :return: IoU - """ - b1_x0, b1_y0, b1_x1, b1_y1 = box1 - b2_x0, b2_y0, b2_x1, b2_y1 = box2 - - int_x0 = max(b1_x0, b2_x0) - int_y0 = max(b1_y0, b2_y0) - int_x1 = min(b1_x1, b2_x1) - int_y1 = min(b1_y1, b2_y1) - - int_area = max(int_x1 - int_x0, 0) * max(int_y1 - int_y0, 0) - - b1_area = (b1_x1 - b1_x0) * (b1_y1 - b1_y0) - b2_area = (b2_x1 - b2_x0) * (b2_y1 - b2_y0) - - # we add small epsilon of 1e-05 to avoid division by 0 - iou = int_area / (b1_area + b2_area - int_area + 1e-05) - return iou - -def non_max_suppression(predictions_with_boxes, confidence_threshold, iou_threshold=0.4): - """ - Applies Non-max suppression to prediction boxes. - - :param predictions_with_boxes: 3D numpy array, first 4 values in 3rd dimension are bbox attrs, 5th is confidence - :param confidence_threshold: the threshold for deciding if prediction is valid - :param iou_threshold: the threshold for deciding if two boxes overlap - :return: dict: class -> [(box, score)] - """ - conf_mask = np.expand_dims( - (predictions_with_boxes[:, :, 4] > confidence_threshold), -1) - predictions = predictions_with_boxes * conf_mask - - result = {} - for i, image_pred in enumerate(predictions): - shape = image_pred.shape - tmp = image_pred - sum_t = np.sum(tmp, axis=1) - non_zero_idxs = sum_t != 0 - image_pred = image_pred[non_zero_idxs, :] - image_pred = image_pred.reshape(-1, shape[-1]) - - bbox_attrs = image_pred[:, :5] - classes = image_pred[:, 5:] - classes = np.argmax(classes, axis=-1) - - unique_classes = list(set(classes.reshape(-1))) - - for cls in unique_classes: - cls_mask = classes == cls - cls_boxes = bbox_attrs[np.nonzero(cls_mask)] - cls_boxes = cls_boxes[cls_boxes[:, -1].argsort()[::-1]] - cls_scores = cls_boxes[:, -1] - cls_boxes = cls_boxes[:, :-1] - - while len(cls_boxes) > 0: - box = cls_boxes[0] - score = cls_scores[0] - if cls not in result: - result[cls] = [] - result[cls].append((box, score)) - cls_boxes = cls_boxes[1:] - cls_scores = cls_scores[1:] - ious = np.array([_iou(box, x) for x in cls_boxes]) - iou_mask = ious < iou_threshold - cls_boxes = cls_boxes[np.nonzero(iou_mask)] - cls_scores = cls_scores[np.nonzero(iou_mask)] - - return result - -def letter_box_pos_to_original_pos(letter_pos, current_size, ori_image_size)-> np.ndarray: - """ - Parameters should have same shape and dimension space. (Width, Height) or (Height, Width) - :param letter_pos: The current position within letterbox image including fill value area. - :param current_size: The size of whole image including fill value area. - :param ori_image_size: The size of image before being letter boxed. - :return: - """ - letter_pos = np.asarray(letter_pos, dtype=np.float32) - current_size = np.asarray(current_size, dtype=np.float32) - ori_image_size = np.asarray(ori_image_size, dtype=np.float32) - final_ratio = min(current_size[0] / ori_image_size[0], current_size[1] / ori_image_size[1]) - pad = 0.5 * (current_size - final_ratio * ori_image_size) - pad = pad.astype(np.int32) - to_return_pos = (letter_pos - pad) / final_ratio - - return to_return_pos - diff --git a/neural_compressor/adaptor/tf_utils/util.py b/neural_compressor/adaptor/tf_utils/util.py index 750900c4ab8..b0598d289d5 100644 --- a/neural_compressor/adaptor/tf_utils/util.py +++ b/neural_compressor/adaptor/tf_utils/util.py @@ -232,7 +232,7 @@ def iterator_sess_run(sess, iter_op, feed_dict, output_tensor, iteration=-1, mea sess.run(iter_op, feed_dict) preds = [] idx = 0 - while idx < iteration or iteration == -1: + while idx+1 != iteration: try: if measurer: measurer.start()