diff --git a/.azure-pipelines/scripts/models/update_yaml_config.py b/.azure-pipelines/scripts/models/update_yaml_config.py index b594bd2c7f5..a96f262449d 100644 --- a/.azure-pipelines/scripts/models/update_yaml_config.py +++ b/.azure-pipelines/scripts/models/update_yaml_config.py @@ -134,14 +134,6 @@ def update_yaml_config_tuning( prev_strategy = tuning_config.get("strategy", {}) strategy_name = prev_strategy.get("name", None) prev_strategy.update({"name": strategy}) - if strategy == "sigopt": - prev_strategy.update( - { - "sigopt_api_token": strategy_token, - "sigopt_project_id": "lpot", - "sigopt_experiment_name": "lpot-tune", - } - ) if strategy == "hawq": prev_strategy.update({"loss": "CrossEntropyLoss"}) print(f"Changed {strategy_name} to {strategy}") diff --git a/.github/checkgroup.yml b/.github/checkgroup.yml index 90eea65b49a..809a83958c2 100644 --- a/.github/checkgroup.yml +++ b/.github/checkgroup.yml @@ -36,7 +36,7 @@ subprojects: paths: - "neural_compressor/common/**" - "neural_compressor/torch/**" - - "examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/llm/**" + - "examples/pytorch/nlp/huggingface_models/language-modeling/quantization/llm/**" - "setup.py" - "requirements_pt.txt" - ".azure-pipelines/scripts/models/**" diff --git a/.github/workflows/pr-link-scan.yml b/.github/workflows/pr-link-scan.yml new file mode 100644 index 00000000000..33938fe4812 --- /dev/null +++ b/.github/workflows/pr-link-scan.yml @@ -0,0 +1,159 @@ +# Copyright (C) 2025 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +name: Check hyperlinks and relative path validity + +permissions: + contents: read + +on: + pull_request: + branches: [master] + types: [opened, reopened, ready_for_review, synchronize] + +jobs: + check-the-validity-of-hyperlinks-in-README: + runs-on: ubuntu-latest + steps: + - name: Clean Up Working Directory + run: sudo rm -rf ${{github.workspace}}/* + + - name: Checkout Repo + uses: actions/checkout@1e31de5234b9f8995739874a8ce0492dc87873e2 + with: + fetch-depth: 0 + + - name: Check the Validity of Hyperlinks + env: + BASE_SHA: ${{ github.event.pull_request.base.sha }} + run: | + cd ${{github.workspace}} + delay=1 + fail="FALSE" + merged_commit=$(git log -1 --format='%H') + changed_files="$(git diff --name-status --diff-filter=ARM $BASE_SHA ${merged_commit} | awk '/\.md$/ {print $NF}')" + if [ -n "$changed_files" ]; then + for changed_file in $changed_files; do + # echo $changed_file + url_lines=$(grep -H -Eo '\]\(http[s]?://[^)]+\)' "$changed_file") || true + if [ -n "$url_lines" ]; then + for url_line in $url_lines; do + # echo $url_line + url=$(echo "$url_line"|cut -d '(' -f2 | cut -d ')' -f1|sed 's/\.git$//') + path=$(echo "$url_line"|cut -d':' -f1 | cut -d'/' -f2-) + if [[ "$url" == "https://dgpu-docs.intel.com/installation-guides/ubuntu/ubuntu-focal-dc.html" || "$url" == "https://ai.cloud.intel.com/" ]]; then + echo "Link "$url" from ${{github.workspace}}/$path needs to be verified by real person." + else + sleep $delay + response=$(curl -L -s -o /dev/null -w "%{http_code}" -A "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36" -H "Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8" -H "Accept-Language: en-US,en;q=0.5" "$url")|| true + if [ "$response" -ne 200 ]; then + echo "**********Validation $url failed ($response), try again**********" + response_retry=$(curl -s -o /dev/null -w "%{http_code}" "$url") || true + if [ "$response_retry" -eq 200 ]; then + echo "*****Retry successfully*****" + else + echo "******Retry $url failed ($response_retry), add simulated browser requests******" + response_browser=$(curl -s -o /dev/null -w "%{http_code}" -A "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36" -H "Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8" -H "Accept-Language: en-US,en;q=0.5" "$url")|| true + if [ "$response_browser" -eq 200 ]; then + echo "*****Retry successfully*****" + else + echo -e "::error:: Invalid link ($response_retry) from ${{github.workspace}}/$(echo "$url_line"|cut -d':' -f1): $url" + fail="TRUE" + fi + fi + fi + fi + done + fi + done + else + echo "No changed .md file." + fi + + if [[ "$fail" == "TRUE" ]]; then + exit 1 + else + echo "All hyperlinks are valid." + fi + shell: bash + + check-the-validity-of-relative-path: + runs-on: ubuntu-latest + steps: + - name: Clean up Working Directory + run: sudo rm -rf ${{github.workspace}}/* + + - name: Checkout Repo + uses: actions/checkout@1e31de5234b9f8995739874a8ce0492dc87873e2 + with: + fetch-depth: 0 + + - name: Checking Relative Path Validity + env: + REPO_NAME: ${{ github.event.pull_request.head.repo.full_name }} + HEAD_REF: ${{ github.event.pull_request.head.ref }} + BASE_SHA: ${{ github.event.pull_request.base.sha }} + run: | + cd ${{github.workspace}} + delay=1 + fail="FALSE" + branch="https://github.com/$REPO_NAME/blob/$HEAD_REF" + + merged_commit=$(git log -1 --format='%H') + changed_files="$(git diff --name-status --diff-filter=ARM $BASE_SHA ${merged_commit} | awk '/\.md$/ {print $NF}')" + png_lines=$(grep -Eo '\]\([^)]+\)' --include='*.md' -r .|grep -Ev 'http' | grep -Ev 'shape=' | grep -Ev 'mailto:inc.maintainers@intel.com') + if [ -n "$png_lines" ]; then + for png_line in $png_lines; do + # echo "No.1----->png_line is $png_line" + refer_path=$(echo "$png_line"|cut -d':' -f1 | cut -d'/' -f2-) + png_path=$(echo "$png_line"|cut -d '(' -f2 | cut -d ')' -f1) + # echo "No.2----->refer_path is $refer_path, png_path is $png_path" + + if [[ "${png_path:0:1}" == "/" ]]; then + # absolute path + check_path=$(echo "${png_path:1}" | cut -d '#' -f1) + # echo "No.3----->check_path is $check_path" + else + # relative path + check_path=${refer_path} + relative_path=$(echo "$png_path" | cut -d '#' -f1) + if [ -n "$relative_path" ]; then check_path=$(dirname "$refer_path")/$relative_path; fi + # echo "No.4----->check_path is $check_path" + fi + + if [ -e "$check_path" ]; then + real_path=$(realpath $check_path) + # echo "No.5----->real_path is $real_path" + if [[ "$png_path" == *#* ]]; then + if [ -n "$changed_files" ] && echo "$changed_files" | grep -q "^${refer_path}$"; then + url_dev=$branch$(echo "$real_path" | sed 's|.*/neural-compressor||')#$(echo "$png_path" | cut -d '#' -f2) + # echo "No.6----->url_dev is $url_dev" + sleep $delay + response=$(curl -I -L -s -o /dev/null -w "%{http_code}" "$url_dev") + if [ "$response" -ne 200 ]; then + echo "**********Validation failed ($response), try again**********" + response_retry=$(curl -s -o /dev/null -w "%{http_code}" "$url_dev") + if [ "$response_retry" -eq 200 ]; then + echo "*****Retry successfully*****" + else + echo -e "::error:: Invalid path ($response_retry) from ${{github.workspace}}/$refer_path: $png_path" + fail="TRUE" + fi + else + echo "Validation succeed $png_line" + fi + fi + fi + else + echo -e "::error:: ${{github.workspace}}/$refer_path:$png_path does not exist." + fail="TRUE" + fi + done + fi + + if [[ "$fail" == "TRUE" ]]; then + exit 1 + else + echo "All relative path are valid." + fi + shell: bash diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index bd4a375a980..1f79b9475d1 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -134,7 +134,7 @@ repos: exclude: | (?x)^( examples/.*(txt|patch)| - examples/onnxrt/nlp/huggingface_model/text_generation/llama/quantization/ptq_static/prompt.json| + examples/deprecated/onnxrt/nlp/huggingface_model/text_generation/llama/quantization/ptq_static/prompt.json| neural_compressor/torch/algorithms/fp8_quant/internal/diffusion_evaluation/SR_evaluation/imagenet1000_clsidx_to_labels.txt| neural_compressor/evaluation/hf_eval/datasets/cnn_validation.json| neural_compressor/torch/algorithms/fp8_quant/.+| diff --git a/README.md b/README.md index 5fc674a942b..3fd5d3f6029 100644 --- a/README.md +++ b/README.md @@ -19,12 +19,12 @@ Intel® Neural Compressor aims to provide popular model compression techniques s as well as Intel extensions such as [Intel Extension for TensorFlow](https://github.com/intel/intel-extension-for-tensorflow) and [Intel Extension for PyTorch](https://github.com/intel/intel-extension-for-pytorch). In particular, the tool provides the key features, typical examples, and open collaborations as below: -* Support a wide range of Intel hardware such as [Intel Gaudi Al Accelerators](https://www.intel.com/content/www/us/en/products/details/processors/ai-accelerators/gaudi-overview.html), [Intel Core Ultra Processors](https://www.intel.com/content/www/us/en/products/details/processors/core-ultra.html), [Intel Xeon Scalable Processors](https://www.intel.com/content/www/us/en/products/details/processors/xeon/scalable.html), [Intel Xeon CPU Max Series](https://www.intel.com/content/www/us/en/products/details/processors/xeon/max-series.html), [Intel Data Center GPU Flex Series](https://www.intel.com/content/www/us/en/products/details/discrete-gpus/data-center-gpu/flex-series.html), and [Intel Data Center GPU Max Series](https://www.intel.com/content/www/us/en/products/details/discrete-gpus/data-center-gpu/max-series.html) with extensive testing; +* Support a wide range of Intel hardware such as [Intel Gaudi Al Accelerators](https://www.intel.com/content/www/us/en/products/details/processors/ai-accelerators/gaudi.html), [Intel Core Ultra Processors](https://www.intel.com/content/www/us/en/products/details/processors/core-ultra.html), [Intel Xeon Scalable Processors](https://www.intel.com/content/www/us/en/products/details/processors/xeon/scalable.html), [Intel Xeon CPU Max Series](https://www.intel.com/content/www/us/en/products/details/processors/xeon/max-series.html), [Intel Data Center GPU Flex Series](https://www.intel.com/content/www/us/en/products/overview.html), and [Intel Data Center GPU Max Series](https://www.intel.com/content/www/us/en/products/overview.html) with extensive testing; support AMD CPU, ARM CPU, and NVidia GPU through ONNX Runtime with limited testing; support NVidia GPU for some WOQ algorithms like AutoRound and HQQ. * Validate popular LLMs such as [LLama2](/examples/deprecated/pytorch/nlp/huggingface_models/language-modeling/quantization/llm), [Falcon](/examples/deprecated/pytorch/nlp/huggingface_models/language-modeling/quantization/llm), [GPT-J](/examples/deprecated/pytorch/nlp/huggingface_models/language-modeling/quantization/llm), [Bloom](/examples/deprecated/pytorch/nlp/huggingface_models/language-modeling/quantization/llm), [OPT](/examples/deprecated/pytorch/nlp/huggingface_models/language-modeling/quantization/llm), and more than 10,000 broad models such as [Stable Diffusion](/examples/deprecated/pytorch/nlp/huggingface_models/text-to-image/quantization), [BERT-Large](/examples/deprecated/pytorch/nlp/huggingface_models/text-classification/quantization/ptq_static/fx), and [ResNet50](/examples/deprecated/pytorch/image_recognition/torchvision_models/quantization/ptq/cpu/fx) from popular model hubs such as [Hugging Face](https://huggingface.co/), [Torch Vision](https://pytorch.org/vision/stable/index.html), and [ONNX Model Zoo](https://github.com/onnx/models#models), with automatic [accuracy-driven](/docs/source/design.md#workflow) quantization strategies -* Collaborate with cloud marketplaces such as [Google Cloud Platform](https://console.cloud.google.com/marketplace/product/bitnami-launchpad/inc-tensorflow-intel?project=verdant-sensor-286207), [Amazon Web Services](https://aws.amazon.com/marketplace/pp/prodview-yjyh2xmggbmga#pdp-support), and [Azure](https://azuremarketplace.microsoft.com/en-us/marketplace/apps/bitnami.inc-tensorflow-intel), software platforms such as [Alibaba Cloud](https://www.intel.com/content/www/us/en/developer/articles/technical/quantize-ai-by-oneapi-analytics-on-alibaba-cloud.html), [Tencent TACO](https://new.qq.com/rain/a/20221202A00B9S00) and [Microsoft Olive](https://github.com/microsoft/Olive), and open AI ecosystem such as [Hugging Face](https://huggingface.co/blog/intel), [PyTorch](https://pytorch.org/tutorials/recipes/intel_neural_compressor_for_pytorch.html), [ONNX](https://github.com/onnx/models#models), [ONNX Runtime](https://github.com/microsoft/onnxruntime), and [Lightning AI](https://github.com/Lightning-AI/lightning/blob/master/docs/source-pytorch/advanced/post_training_quantization.rst) +* Collaborate with cloud marketplaces such as [Google Cloud Platform](https://console.cloud.google.com/marketplace/product/bitnami-launchpad/inc-tensorflow-intel?project=verdant-sensor-286207), [Amazon Web Services](https://aws.amazon.com/marketplace/pp/prodview-yjyh2xmggbmga#pdp-support), and [Azure](https://azuremarketplace.microsoft.com/en-us/marketplace/apps/bitnami.inc-tensorflow-intel), software platforms such as [Tencent TACO](https://new.qq.com/rain/a/20221202A00B9S00) and [Microsoft Olive](https://github.com/microsoft/Olive), and open AI ecosystem such as [Hugging Face](https://huggingface.co/blog/intel), [PyTorch](https://pytorch.org/tutorials/recipes/intel_neural_compressor_for_pytorch.html), [ONNX](https://github.com/onnx/models#models), [ONNX Runtime](https://github.com/microsoft/onnxruntime), and [Lightning AI](https://github.com/Lightning-AI/lightning/blob/master/docs/source-pytorch/advanced/post_training_quantization.rst) ## What's New * [2025/10] [MXFP8 / MXFP4 quantization](./docs/source/3x/PT_MXQuant.md) experimental support @@ -115,8 +115,8 @@ model = load( Architecture Workflow APIs - LLMs Recipes - Examples + LLMs Recipes + Examples @@ -190,7 +190,6 @@ model = load( ## Additional Content -* [Release Information](./docs/source/releases_info.md) * [Contribution Guidelines](./docs/source/CONTRIBUTING.md) * [Legal Information](./docs/source/legal_information.md) * [Security Policy](SECURITY.md) diff --git a/docs/source/3x/PT_MixedPrecision.md b/docs/source/3x/PT_MixedPrecision.md index 3fbd1db6bbf..c52617f02a1 100644 --- a/docs/source/3x/PT_MixedPrecision.md +++ b/docs/source/3x/PT_MixedPrecision.md @@ -18,7 +18,7 @@ The 4th Gen Intel® Xeon® Scalable processor supports FP16 instruction set arch Further details can be found in the [Intel AVX512 FP16 Guide](https://www.intel.com/content/www/us/en/content-details/669773/intel-avx-512-fp16-instruction-set-for-intel-xeon-processor-based-products-technology-guide.html) published by Intel. The latest Intel Xeon processors deliver flexibility of Intel Advanced Matrix Extensions (Intel AMX) ,an accelerator that improves the performance of deep learning(DL) training and inference, making it ideal for workloads like NLP, recommender systems, and image recognition. Developers can code AI functionality to take advantage of the Intel AMX instruction set, and they can code non-AI functionality to use the processor instruction set architecture (ISA). Intel has integrated the Intel® oneAPI Deep Neural Network Library (oneDNN), its oneAPI DL engine, into Pytorch. -Further details can be found in the [Intel AMX Document](https://www.intel.com/content/www/us/en/content-details/785250/accelerate-artificial-intelligence-ai-workloads-with-intel-advanced-matrix-extensions-intel-amx.html) published by Intel. +Further details can be found in the [Intel AMX Document](https://www.intel.com/content/www/us/en/content-details/785250/accelerate-artificial-intelligence-workloads-with-intel-advanced-matrix-extensions.html) published by Intel.

Architecture @@ -107,5 +107,5 @@ best_model = autotune(model=build_torch_model(), tune_config=custom_tune_config, ## Examples -Users can also refer to [examples](https://github.com/intel/neural-compressor/blob/master/examples/3.x_api/pytorch/cv/mixed_precision +Users can also refer to [examples](https://github.com/intel/neural-compressor/blob/master/examples/pytorch/cv/mixed_precision ) on how to quantize a model with Mixed Precision. diff --git a/docs/source/3x/PT_SmoothQuant.md b/docs/source/3x/PT_SmoothQuant.md index e3a7262dcde..4aee8a979f0 100644 --- a/docs/source/3x/PT_SmoothQuant.md +++ b/docs/source/3x/PT_SmoothQuant.md @@ -46,7 +46,7 @@ run_fn(prepared_model) q_model = convert(prepared_model) ``` -To get more information, please refer to [examples](https://github.com/intel/neural-compressor/blob/master/examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/smooth_quant). +To get more information, please refer to [examples](https://github.com/intel/neural-compressor/blob/master/examples/pytorch/nlp/huggingface_models/language-modeling/quantization/smooth_quant). ## Validated Models @@ -99,8 +99,6 @@ A list of models that achieved a <1% accuracy drop is shown below. | databricks/dolly-v2-3b* | 0.6297 | 0.6247 | alpha=0.5, Ipex 2.1 | | tiiuae/falcon-7b-instruct | 0.6437 | 0.6392 | alpha=0.7, Pytorch | -Please refer to the step-by-step [instruction](../../examples/pytorch/nlp/huggingface_models/language-modeling/quantization/llm/ipex/README.md) for details. - Please note that for models with asterisk(*), we have set all add ops to FP32 during quantization step to achieve desirable results. diff --git a/docs/source/3x/PT_StaticQuant.md b/docs/source/3x/PT_StaticQuant.md index d687e83c1f6..ffd5f809b6d 100644 --- a/docs/source/3x/PT_StaticQuant.md +++ b/docs/source/3x/PT_StaticQuant.md @@ -68,7 +68,7 @@ q_model = convert(prepared_model) #### Model Examples -Users could refer to [examples](https://github.com/intel/neural-compressor/blob/master/examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/static_quant/ipex) on how to quantize a new model. +Users could refer to [examples](https://github.com/intel/neural-compressor/blob/master/examples/pytorch/nlp/huggingface_models/language-modeling/quantization/static_quant/ipex) on how to quantize a new model. ### Static Quantization with PT2E Backend @@ -105,4 +105,4 @@ opt_model = torch.compile(q_model) #### Model Examples with PT2E -Users could refer to [cv examples](https://github.com/intel/neural-compressor/blob/master/examples/3.x_api/pytorch/cv/static_quant) and [llm examples](https://github.com/intel/neural-compressor/blob/master/examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/static_quant/pt2e) on how to quantize a new model. +Users could refer to [cv examples](https://github.com/intel/neural-compressor/blob/master/examples/pytorch/cv/static_quant) and [llm examples](https://github.com/intel/neural-compressor/blob/master/examples/pytorch/nlp/huggingface_models/language-modeling/quantization/static_quant/pt2e) on how to quantize a new model. diff --git a/docs/source/3x/PT_WeightOnlyQuant.md b/docs/source/3x/PT_WeightOnlyQuant.md index c2ee800fc82..2406d8d4150 100644 --- a/docs/source/3x/PT_WeightOnlyQuant.md +++ b/docs/source/3x/PT_WeightOnlyQuant.md @@ -77,7 +77,7 @@ WeightOnlyQuant quantization for PyTorch is using prepare and convert [APIs](./P Notes: - *group_size = -1* refers to **per output channel quantization**. Taking a linear layer (input channel = $C_{in}$, output channel = $C_{out}$) for instance, when *group size = -1*, quantization will calculate total $C_{out}$ quantization parameters. Otherwise, when *group_size = gs* quantization parameters are calculate with every $gs$ elements along with the input channel, leading to total $C_{out} \times (C_{in} / gs)$ quantization parameters. -- 4-bit NormalFloat(NF4) is proposed in QLoRA[7]. 'fp4' includes [fp4_e2m1](../../neural_compressor/adaptor/torch_utils/weight_only.py#L37) and [fp4_e2m1_bnb](https://github.com/TimDettmers/bitsandbytes/blob/18e827d666fa2b70a12d539ccedc17aa51b2c97c/bitsandbytes/functional.py#L735). By default, fp4 refers to fp4_e2m1_bnb. +- 4-bit NormalFloat(NF4) is proposed in QLoRA[7]. 'fp4' includes [fp4_e2m1](/neural_compressor/adaptor/torch_utils/weight_only.py) and [fp4_e2m1_bnb](https://github.com/TimDettmers/bitsandbytes/blob/18e827d666fa2b70a12d539ccedc17aa51b2c97c/bitsandbytes/functional.py#L735). By default, fp4 refers to fp4_e2m1_bnb. - *quant_lm_head* defaults to False. This means that, except for transformer blocks, the last layer in transformer models will not be quantized by default. The last layer may be named "lm_head", "output_layer" or "embed_out". - Only RTN and GPTQ support double quant. @@ -339,7 +339,7 @@ For client machines with limited RAM and cores, we offer optimizations to reduce ## Examples -Users can also refer to [examples](https://github.com/intel/neural-compressor/blob/master/examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/weight_only) on how to quantize a model with WeightOnlyQuant. +Users can also refer to [examples](https://github.com/intel/neural-compressor/blob/master/examples/pytorch/nlp/huggingface_models/language-modeling/quantization/weight_only) on how to quantize a model with WeightOnlyQuant. ## Reference diff --git a/docs/source/3x/TF_Quant.md b/docs/source/3x/TF_Quant.md index 9314a3c8200..f43e4ad7e85 100644 --- a/docs/source/3x/TF_Quant.md +++ b/docs/source/3x/TF_Quant.md @@ -120,4 +120,4 @@ quant_config.set_local("conv1", conv2d_config) ## Examples -Users can also refer to [examples](https://github.com/intel/neural-compressor/blob/master/examples/3.x_api/tensorflow) on how to quantize a TensorFlow model with `neural_compressor.tensorflow`. +Users can also refer to [examples](https://github.com/intel/neural-compressor/blob/master/examples/tensorflow) on how to quantize a TensorFlow model with `neural_compressor.tensorflow`. diff --git a/docs/source/3x/TF_SQ.md b/docs/source/3x/TF_SQ.md index 1d3a08836b5..5c0f7c5c305 100644 --- a/docs/source/3x/TF_SQ.md +++ b/docs/source/3x/TF_SQ.md @@ -12,7 +12,7 @@ Quantization is a common compression operation to reduce memory and accelerate inference by converting the floating point matrix to an integer matrix. For large language models (LLMs) with gigantic parameters, the systematic outliers make quantification of activations difficult. [SmoothQuant](https://arxiv.org/abs/2211.10438), a training free post-training quantization (PTQ) solution, offline migrates this difficulty from activations to weights with a mathematically equivalent transformation. -Please refer to the document of [Smooth Quant](../quantization.md/#smooth-quant) for detailed fundamental knowledge. +Please refer to the document of [Smooth Quant](../smooth_quant.md) for detailed fundamental knowledge. ## Usage @@ -50,4 +50,4 @@ best_model = autotune( ## Examples -Users can also refer to [examples](https://github.com/intel/neural-compressor/blob/master/examples/3.x_api/tensorflow/nlp/large_language_models/quantization/ptq/smoothquant) on how to apply smooth quant to a TensorFlow model with `neural_compressor.tensorflow`. +Users can also refer to [examples](https://github.com/intel/neural-compressor/blob/master/examples/tensorflow/nlp/large_language_models/quantization/ptq/smoothquant) on how to apply smooth quant to a TensorFlow model with `neural_compressor.tensorflow`. diff --git a/docs/source/3x/autotune.md b/docs/source/3x/autotune.md index abba48d02f8..fc19808c156 100644 --- a/docs/source/3x/autotune.md +++ b/docs/source/3x/autotune.md @@ -10,7 +10,7 @@ AutoTune ## Overview -Intel® Neural Compressor aims to help users quickly deploy low-precision models by leveraging popular compression techniques, such as post-training quantization and weight-only quantization algorithms. Despite having a variety of these algorithms, finding the appropriate configuration for a model can be difficult and time-consuming. To address this, we built the `autotune` module based on the [strategy](./tuning_strategies.md) in 2.x for accuracy-aware tuning, which identifies the best algorithm configuration for models to achieve optimal performance under the certain accuracy criteria. This module allows users to easily use predefined tuning recipes and customize the tuning space as needed. +Intel® Neural Compressor aims to help users quickly deploy low-precision models by leveraging popular compression techniques, such as post-training quantization and weight-only quantization algorithms. Despite having a variety of these algorithms, finding the appropriate configuration for a model can be difficult and time-consuming. To address this, we built the `autotune` module based on the [strategy](../tuning_strategies.md) in 2.x for accuracy-aware tuning, which identifies the best algorithm configuration for models to achieve optimal performance under the certain accuracy criteria. This module allows users to easily use predefined tuning recipes and customize the tuning space as needed. ## How it Works diff --git a/docs/source/3x/llm_recipes.md b/docs/source/3x/llm_recipes.md deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/docs/source/3x/transformers_like_api.md b/docs/source/3x/transformers_like_api.md index 55e8d964072..3aecae675d1 100644 --- a/docs/source/3x/transformers_like_api.md +++ b/docs/source/3x/transformers_like_api.md @@ -200,7 +200,7 @@ gen_text = tokenizer.batch_decode(gen_ids, skip_special_tokens=True) print(gen_text) ``` -5. You can directly use [example script](https://github.com/intel/neural-compressor/blob/master/examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/transformers/weight_only/text-generation/run_generation_gpu_woq.py) +5. You can directly use [example script](https://github.com/intel/neural-compressor/blob/master/examples/pytorch/nlp/huggingface_models/language-modeling/quantization/transformers/weight_only/text-generation/run_generation_gpu_woq.py) ```bash python run_generation_gpu_woq.py --woq --benchmark --model save_dir ``` @@ -213,4 +213,4 @@ python run_generation_gpu_woq.py --woq --benchmark --model save_dir ## Examples -Users can also refer to [examples](https://github.com/intel/neural-compressor/blob/master/examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/transformers/weight_only/text-generation) on how to quantize a model with transformers-like api. +Users can also refer to [examples](https://github.com/intel/neural-compressor/blob/master/examples/pytorch/nlp/huggingface_models/language-modeling/quantization/transformers/weight_only/text-generation) on how to quantize a model with transformers-like api. diff --git a/docs/source/adaptor.md b/docs/source/adaptor.md index b0bd7d62456..fc39063f7c1 100644 --- a/docs/source/adaptor.md +++ b/docs/source/adaptor.md @@ -74,7 +74,7 @@ following information on the current runtime framework. * The supported sequence of each quantizable op. * The instance of each sequence. -In the past, the above information was generally defined and hidden in every corner of the code which made effective maintenance difficult. With the Query API, we only need to create one unified yaml file and call the corresponding API to get the information. For example, the [tensorflow.yaml](../neural_compressor/adaptor/tensorflow.yaml) keeps the current Tensorflow framework ability. We recommend that the end user not make modifications if requirements are not clear. +In the past, the above information was generally defined and hidden in every corner of the code which made effective maintenance difficult. With the Query API, we only need to create one unified yaml file and call the corresponding API to get the information. For example, the [tensorflow.yaml](/neural_compressor/adaptor/tensorflow.yaml) keeps the current Tensorflow framework ability. We recommend that the end user not make modifications if requirements are not clear. Below is a fragment of the Tensorflow configuration file. @@ -86,7 +86,7 @@ Below is a fragment of the Tensorflow configuration file. #### Query API Introduction -The abstract class `QueryBackendCapability` is defined in [query.py](../neural_compressor/adaptor/query.py#L21). Each framework should inherit it and implement the member function if needed. Refer to Tensorflow implementation [TensorflowQuery](../neural_compressor/adaptor/tensorflow.py#L628). +The abstract class `QueryBackendCapability` is defined in [query.py](/neural_compressor/adaptor/query.py). Each framework should inherit it and implement the member function if needed. Refer to Tensorflow implementation [TensorflowQuery](/neural_compressor/adaptor/tensorflow.py). ## Example of Adding a New Backend Support @@ -107,11 +107,9 @@ Onnxruntime already has [quantization tools](https://github.com/microsoft/onnxru * nodes_to_quantize, nodes_to_exclude * op_types_to_quantize - We define three configuration files to describe the capability of ONNXRT. Please refer to [onnxrt_qlinear.yaml](../neural_compressor/adaptor/onnxrt_qlinear.yaml), [onnxrt_integer.yaml](../neural_compressor/adaptor/onnxrt_integer.yaml) and [onnxrt_qdq.yaml](../neural_compressor/adaptor/onnxrt_qdq.yaml). - ### Implement ONNXRTAdaptor Class - The base class ONNXRTAdaptor inherits from the Adaptor class. Please refer to [onnxrt.py](../neural_compressor/adaptor/onnxrt.py). + The base class ONNXRTAdaptor inherits from the Adaptor class. ```python @adaptor_registry diff --git a/docs/source/dataloader.md b/docs/source/dataloader.md index ca13ec7c7e3..4adf4ca261d 100644 --- a/docs/source/dataloader.md +++ b/docs/source/dataloader.md @@ -23,7 +23,7 @@ With the importance of a dataloader, different frameworks can have their own Dat - Neural Compressor treats batch size as a tuning parameter which means it can dynamically change the batch size to reach the accuracy goal. -The unified `DataLoader` API takes a [dataset](./dataset.md) as the input parameter and loads data from the dataset when needed. In special cases, users can also define their own dataloader classes, which must have `batch_size` attribute and `__iter__` function. +The unified `DataLoader` API takes a dataset as the input parameter and loads data from the dataset when needed. In special cases, users can also define their own dataloader classes, which must have `batch_size` attribute and `__iter__` function. Of cause, users can also use frameworks own dataloader in Neural Compressor. @@ -93,6 +93,6 @@ q_model = quantization.fit(model, config, calib_dataloader=dataloader, eval_func ## Examples -- Refer to this [example](https://github.com/intel/neural-compressor/blob/master/examples/onnxrt/body_analysis/onnx_model_zoo/ultraface/quantization/ptq_static) for how to define a customised dataloader. +- Refer to this [example](https://github.com/intel/neural-compressor/blob/master/examples/deprecated/onnxrt/body_analysis/onnx_model_zoo/ultraface/quantization/ptq_static) for how to define a customised dataloader. -- Refer to this [example](https://github.com/intel/neural-compressor/blob/master/examples/onnxrt/nlp/bert/quantization/ptq_static) for how to use internal dataloader. +- Refer to this [example](https://github.com/intel/neural-compressor/blob/master/examples/deprecated/onnxrt/nlp/bert/quantization/ptq_static) for how to use internal dataloader. diff --git a/docs/source/incompatible_changes.md b/docs/source/incompatible_changes.md deleted file mode 100644 index 4ffcac0a1c1..00000000000 --- a/docs/source/incompatible_changes.md +++ /dev/null @@ -1,43 +0,0 @@ -# Incompatible changes between v1.2 and v1.1 - -## User-facing APIs - -The user-facing APIs are changed between v1.2 and v1.1. The major changes are: - -1. v1.2 abstracts `neural_compressor.common.Model` concept to cover those cases whose weight and graph files are stored separately. - -2. v1.2 unifies the calling style by setting model, calibration dataloader, evaluation dataloader, and metric through `quantizer` attributes rather than passing as function inputs. - -Refer to below examples for details. - -```python -# user facing API example in v1.1 -quantizer = Quantization("/path/to/user.yaml") -ds = dataset("/path/to/dataset") -dataloader = quantizer.dataloader(ds, batch_size=100) -quantizer.metric("metric", metric) -q_model = quantizer( - "/path/to/model", - q_dataloader=dataloader, - eval_dataloader=dataloader, -) -... # user to write framework specific code to save q_model -``` - -```python -# user facing API example in v1.2 -quantizer = Quantization(conf.yaml) -quantizer.model = "/path/to/model" -dl = dataset("/path/to/dataset") -quantizer.calib_dataloader = common.DataLoader(dl, batch_size=32) -quantizer.eval_dataloader = common.DataLoader(dl, batch_size=32) -quantizer.metric = common.Metric(custom_metric) -q_model = quantizer.fit() -q_model.save("/path/to/output/dir") # explicitly call to save q_model -``` - -## Built-in transform/dataset/metric APIs - -v1.2 refines Neural Compressor built-in transform/dataset/metric to unify APIs cross different framework backends. - -Refer to [dataset](./dataset.md), [transform](./transform.md), and [metric](./metric.md) to learn how to use them in yaml or code. diff --git a/docs/source/metric.md b/docs/source/metric.md index bacc993f46e..961fa575e87 100644 --- a/docs/source/metric.md +++ b/docs/source/metric.md @@ -25,21 +25,21 @@ Neural Compressor supports some built-in metrics that are popularly used in indu ### TensorFlow -| Metric | Parameters | Inputs | Comments | -| :------ | :------ | :------ | :------ | -| topk(k) | **k** (int, default=1): Number of top elements to look at for computing accuracy | preds, labels | Computes top k predictions accuracy. | -| Accuracy() | None | preds, labels | Computes accuracy classification score. | -| Loss() | None | preds, labels | A dummy metric for directly printing loss, it calculates the average of predictions.
Please refer to [MXNet docs](https://mxnet.apache.org/versions/1.7.0/api/python/docs/_modules/mxnet/metric.html#Loss) for details. | -| MAE(compare_label) | **compare_label** (bool, default=True): Whether to compare label. False if there are no labels and will use FP32 preds as labels | preds, labels | Computes Mean Absolute Error (MAE) loss. | -| RMSE(compare_label) | **compare_label** (bool, default=True): Whether to compare label. False if there are no labels and will use FP32 preds as labels | preds, labels | Computes Root Mean Square Error (RMSE) loss. | -| MSE(compare_label) | **compare_label** (bool, default=True): Whether to compare label. False if there are no labels and will use FP32 preds as labels | preds, labels | Computes Mean Squared Error (MSE) loss. | -| F1() | None | preds, labels | Computes the F1 score of a binary classification problem. | -| mAP(anno_path, iou_thrs, map_points) | **anno_path** (str): Annotation path. The annotation file should be a yaml file, please refer to [label_map](../examples/tensorflow/object_detection/tensorflow_models/quantization/ptq/label_map.yaml) for its format.
**iou_thrs** (float or str, default=0.5): Minimal value for intersection over union that allows to make decision that prediction bounding box is true positive. You can specify one float value between 0 to 1 or string "05:0.05:0.95" for standard COCO thresholds.
**map_points** (int, default=0): The way to calculate mAP. 101 for 101-point interpolated AP, 11 for 11-point interpolated AP, 0 for area under PR curve. | preds, labels | preds is a tuple which supports 2 length: 3 and 4.
If its length is 3, it should contain boxes, scores, classes in turn.
If its length is 4, it should contain target_boxes_num, boxes, scores, classes in turn
labels is a tuple which contains bbox, str_label, int_label, image_id inturn
the length of one of str_label and int_label can be 0 | -| COCOmAP(anno_path, iou_thrs, map_points) | **anno_path** (str): Annotation path. The annotation file should be a yaml file, please refer to [label_map](../examples/tensorflow/object_detection/tensorflow_models/quantization/ptq/label_map.yaml) for its format.
**iou_thrs** (float or str): Intersection over union threshold. Set to "0.5:0.05:0.95" for standard COCO thresholds.
**map_points** (int): The way to calculate mAP. Set to 101 for 101-point interpolated AP. | preds, labels | preds is a tuple which supports 2 length: 3 and 4.
If its length is 3, it should contain boxes, scores, classes in turn.
If its length is 4, it should contain target_boxes_num, boxes, scores, classes in turn
labels is a tuple which contains bbox, str_label, int_label, image_id inturn
the length of one of str_label and int_label can be 0 | -| VOCmAP(anno_path, iou_thrs, map_points) | **anno_path** (str): Annotation path. The annotation file should be a yaml file, please refer to [label_map](../examples/tensorflow/object_detection/tensorflow_models/quantization/ptq/label_map.yaml) for its format.
**iou_thrs**(float or str): Intersection over union threshold. Set to 0.5.
**map_points**(int): The way to calculate mAP. The way to calculate mAP. Set to 0 for area under PR curve. | preds, labels | preds is a tuple which supports 2 length: 3 and 4.
If its length is 3, it should contain boxes, scores, classes in turn.
If its length is 4, it should contain target_boxes_num, boxes, scores, classes in turn
labels is a tuple which contains bbox, str_label, int_label, image_id inturn
the length of one of str_label and int_label can be 0 | -| COCOmAPv2(anno_path, iou_thrs, map_points, output_index_mapping) | **anno_path** (str): Annotation path. The annotation file should be a yaml file, please refer to [label_map](../examples/tensorflow/object_detection/tensorflow_models/quantization/ptq/label_map.yaml) for its format.
**iou_thrs** (float or str): Intersection over union threshold. Set to "0.5:0.05:0.95" for standard COCO thresholds.
**map_points** (int): The way to calculate mAP. Set to 101 for 101-point interpolated AP.
**output_index_mapping** (dict, default={'num_detections':-1, 'boxes':0, 'scores':1, 'classes':2}): Specifies the index of outputs in model raw prediction, -1 means this output does not exist. | preds, labels | preds is a tuple which supports 2 length: 3 and 4.
If its length is 3, it should contain boxes, scores, classes in turn.
If its length is 4, it should contain target_boxes_num, boxes, scores, classes in turn
labels is a tuple which contains bbox, str_label, int_label, image_id inturn
the length of one of str_label and int_label can be 0 | -| BLEU() | None | preds, labels | BLEU score computation between labels and predictions. An approximate BLEU scoring method since we do not glue word pieces or decode the ids and tokenize the output. By default, we use ngram order of 4 and use brevity penalty. Also, this does not have beam search | -| SquadF1() | None | preds, labels | Evaluate v1.1 of the SQuAD dataset | +| Metric | Parameters | Inputs | Comments | +| :------ |:--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| :------ | :------ | +| topk(k) | **k** (int, default=1): Number of top elements to look at for computing accuracy | preds, labels | Computes top k predictions accuracy. | +| Accuracy() | None | preds, labels | Computes accuracy classification score. | +| Loss() | None | preds, labels | A dummy metric for directly printing loss, it calculates the average of predictions.
Please refer to [MXNet docs](https://mxnet.apache.org/versions/1.7.0/api/python/docs/_modules/mxnet/metric.html#Loss) for details. | +| MAE(compare_label) | **compare_label** (bool, default=True): Whether to compare label. False if there are no labels and will use FP32 preds as labels | preds, labels | Computes Mean Absolute Error (MAE) loss. | +| RMSE(compare_label) | **compare_label** (bool, default=True): Whether to compare label. False if there are no labels and will use FP32 preds as labels | preds, labels | Computes Root Mean Square Error (RMSE) loss. | +| MSE(compare_label) | **compare_label** (bool, default=True): Whether to compare label. False if there are no labels and will use FP32 preds as labels | preds, labels | Computes Mean Squared Error (MSE) loss. | +| F1() | None | preds, labels | Computes the F1 score of a binary classification problem. | +| mAP(anno_path, iou_thrs, map_points) | **anno_path** (str): Annotation path. The annotation file should be a yaml file.
**iou_thrs** (float or str, default=0.5): Minimal value for intersection over union that allows to make decision that prediction bounding box is true positive. You can specify one float value between 0 to 1 or string "05:0.05:0.95" for standard COCO thresholds.
**map_points** (int, default=0): The way to calculate mAP. 101 for 101-point interpolated AP, 11 for 11-point interpolated AP, 0 for area under PR curve. | preds, labels | preds is a tuple which supports 2 length: 3 and 4.
If its length is 3, it should contain boxes, scores, classes in turn.
If its length is 4, it should contain target_boxes_num, boxes, scores, classes in turn
labels is a tuple which contains bbox, str_label, int_label, image_id inturn
the length of one of str_label and int_label can be 0 | +| COCOmAP(anno_path, iou_thrs, map_points) | **anno_path** (str): Annotation path. The annotation file should be a yaml file.
**iou_thrs** (float or str): Intersection over union threshold. Set to "0.5:0.05:0.95" for standard COCO thresholds.
**map_points** (int): The way to calculate mAP. Set to 101 for 101-point interpolated AP. | preds, labels | preds is a tuple which supports 2 length: 3 and 4.
If its length is 3, it should contain boxes, scores, classes in turn.
If its length is 4, it should contain target_boxes_num, boxes, scores, classes in turn
labels is a tuple which contains bbox, str_label, int_label, image_id inturn
the length of one of str_label and int_label can be 0 | +| VOCmAP(anno_path, iou_thrs, map_points) | **anno_path** (str): Annotation path. The annotation file should be a yaml file.
**iou_thrs**(float or str): Intersection over union threshold. Set to 0.5.
**map_points**(int): The way to calculate mAP. The way to calculate mAP. Set to 0 for area under PR curve. | preds, labels | preds is a tuple which supports 2 length: 3 and 4.
If its length is 3, it should contain boxes, scores, classes in turn.
If its length is 4, it should contain target_boxes_num, boxes, scores, classes in turn
labels is a tuple which contains bbox, str_label, int_label, image_id inturn
the length of one of str_label and int_label can be 0 | +| COCOmAPv2(anno_path, iou_thrs, map_points, output_index_mapping) | **anno_path** (str): Annotation path. The annotation file should be a yaml file.
**iou_thrs** (float or str): Intersection over union threshold. Set to "0.5:0.05:0.95" for standard COCO thresholds.
**map_points** (int): The way to calculate mAP. Set to 101 for 101-point interpolated AP.
**output_index_mapping** (dict, default={'num_detections':-1, 'boxes':0, 'scores':1, 'classes':2}): Specifies the index of outputs in model raw prediction, -1 means this output does not exist. | preds, labels | preds is a tuple which supports 2 length: 3 and 4.
If its length is 3, it should contain boxes, scores, classes in turn.
If its length is 4, it should contain target_boxes_num, boxes, scores, classes in turn
labels is a tuple which contains bbox, str_label, int_label, image_id inturn
the length of one of str_label and int_label can be 0 | +| BLEU() | None | preds, labels | BLEU score computation between labels and predictions. An approximate BLEU scoring method since we do not glue word pieces or decode the ids and tokenize the output. By default, we use ngram order of 4 and use brevity penalty. Also, this does not have beam search | +| SquadF1() | None | preds, labels | Evaluate v1.1 of the SQuAD dataset | ### PyTorch @@ -57,20 +57,20 @@ Neural Compressor supports some built-in metrics that are popularly used in indu ### ONNXRT -| Metric | Parameters | Inputs | Comments | -| :------ | :------ | :------ | :------ | -| topk(k) | **k** (int, default=1): Number of top elements to look at for computing accuracy | preds, labels | Computes top k predictions accuracy. | -| Accuracy() | None | preds, labels |Computes accuracy classification score. | -| Loss() | None | preds, labels | A dummy metric for directly printing loss, it calculates the average of predictions.
Please refer to [MXNet docs](https://mxnet.apache.org/versions/1.7.0/api/python/docs/_modules/mxnet/metric.html#Loss) for details. | -| MAE(compare_label) | **compare_label** (bool, default=True): Whether to compare label. False if there are no labels and will use FP32 preds as labels. | preds, labels | Computes Mean Absolute Error (MAE) loss. | -| RMSE(compare_label) | **compare_label** (bool, default=True): Whether to compare label. False if there are no labels and will use FP32 preds as labels. | preds, labels | Computes Root Mean Squared Error (RMSE) loss. | -| MSE(compare_label) | **compare_label** (bool, default=True): Whether to compare label. False if there are no labels and will use FP32 preds as labels. | preds, labels | Computes Mean Squared Error (MSE) loss. | -| F1() | None | preds, labels | Computes the F1 score of a binary classification problem. | -| mAP(anno_path, iou_thrs, map_points) | **anno_path** (str): Annotation path. The annotation file should be a yaml file, please refer to [label_map](../examples/tensorflow/object_detection/tensorflow_models/quantization/ptq/label_map.yaml) for its format. The annotation file should be a yaml file, please refer to [label_map](../examples/tensorflow/object_detection/tensorflow_models/quantization/ptq/label_map.yaml) for its format.
**iou_thrs** (float or str, default=0.5): Minimal value for intersection over union that allows to make decision that prediction bounding box is true positive. You can specify one float value between 0 to 1 or string "05:0.05:0.95" for standard COCO thresholds.
**map_points** (int, default=0): The way to calculate mAP. 101 for 101-point interpolated AP, 11 for 11-point interpolated AP, 0 for area under PR curve. | preds, labels | preds is a tuple which supports 2 length: 3 and 4.
If its length is 3, it should contain boxes, scores, classes in turn.
If its length is 4, it should contain target_boxes_num, boxes, scores, classes in turn
labels is a tuple which contains bbox, str_label, int_label, image_id inturn
the length of one of str_label and int_label can be 0 | -| COCOmAP(anno_path, iou_thrs, map_points) | **anno_path** (str): Annotation path. The annotation file should be a yaml file, please refer to [label_map](../examples/tensorflow/object_detection/tensorflow_models/quantization/ptq/label_map.yaml) for its format. The annotation file should be a yaml file, please refer to [label_map](../examples/tensorflow/object_detection/tensorflow_models/quantization/ptq/label_map.yaml) for its format.
**iou_thrs** (float or str): Intersection over union threshold. Set to "0.5:0.05:0.95" for standard COCO thresholds.
**map_points** (int): The way to calculate mAP. Set to 101 for 101-point interpolated AP. | preds, labels | preds is a tuple which supports 2 length: 3 and 4.
If its length is 3, it should contain boxes, scores, classes in turn.
If its length is 4, it should contain target_boxes_num, boxes, scores, classes in turn
labels is a tuple which contains bbox, str_label, int_label, image_id inturn
the length of one of str_label and int_label can be 0 | -| VOCmAP(anno_path, iou_thrs, map_points) | **anno_path** (str): Annotation path. The annotation file should be a yaml file, please refer to [label_map](../examples/tensorflow/object_detection/tensorflow_models/quantization/ptq/label_map.yaml) for its format. The annotation file should be a yaml file, please refer to [label_map](../examples/tensorflow/object_detection/tensorflow_models/quantization/ptq/label_map.yaml) for its format .
**iou_thrs** (float or str): Intersection over union threshold. Set to 0.5.
**map_points** (int): The way to calculate mAP. The way to calculate mAP. Set to 0 for area under PR curve. | preds, labels | preds is a tuple which supports 2 length: 3 and 4.
If its length is 3, it should contain boxes, scores, classes in turn.
If its length is 4, it should contain target_boxes_num, boxes, scores, classes in turn
labels is a tuple which contains bbox, str_label, int_label, image_id inturn
the length of one of str_label and int_label can be 0 | -| COCOmAPv2(anno_path, iou_thrs, map_points, output_index_mapping) | **anno_path** (str): Annotation path. The annotation file should be a yaml file, please refer to [label_map](../examples/tensorflow/object_detection/tensorflow_models/quantization/ptq/label_map.yaml) for its format. The annotation file should be a yaml file, please refer to [label_map](../examples/tensorflow/object_detection/tensorflow_models/quantization/ptq/label_map.yaml) for its format.
**iou_thrs** (float or str): Intersection over union threshold. Set to "0.5:0.05:0.95" for standard COCO thresholds.
**map_points** (int): The way to calculate mAP. Set to 101 for 101-point interpolated AP.
**output_index_mapping** (dict, default={'num_detections':-1, 'boxes':0, 'scores':1, 'classes':2}): Specifies the index of outputs in model raw prediction, -1 means this output does not exist. | preds, labels | preds is a tuple which supports 2 length: 3 and 4.
If its length is 3, it should contain boxes, scores, classes in turn.
If its length is 4, it should contain target_boxes_num, boxes, scores, classes in turn
labels is a tuple which contains bbox, str_label, int_label, image_id inturn
the length of one of str_label and int_label can be 0 | -| GLUE(task) | **task** (str, default=mrpc): The name of the task. Choices include mrpc, qqp, qnli, rte, sts-b, cola, mnli, wnli. | preds, labels | Computes GLUE score for bert model. | +| Metric | Parameters | Inputs | Comments | +| :------ |:--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| :------ | :------ | +| topk(k) | **k** (int, default=1): Number of top elements to look at for computing accuracy | preds, labels | Computes top k predictions accuracy. | +| Accuracy() | None | preds, labels |Computes accuracy classification score. | +| Loss() | None | preds, labels | A dummy metric for directly printing loss, it calculates the average of predictions.
Please refer to [MXNet docs](https://mxnet.apache.org/versions/1.7.0/api/python/docs/_modules/mxnet/metric.html#Loss) for details. | +| MAE(compare_label) | **compare_label** (bool, default=True): Whether to compare label. False if there are no labels and will use FP32 preds as labels. | preds, labels | Computes Mean Absolute Error (MAE) loss. | +| RMSE(compare_label) | **compare_label** (bool, default=True): Whether to compare label. False if there are no labels and will use FP32 preds as labels. | preds, labels | Computes Root Mean Squared Error (RMSE) loss. | +| MSE(compare_label) | **compare_label** (bool, default=True): Whether to compare label. False if there are no labels and will use FP32 preds as labels. | preds, labels | Computes Mean Squared Error (MSE) loss. | +| F1() | None | preds, labels | Computes the F1 score of a binary classification problem. | +| mAP(anno_path, iou_thrs, map_points) | **anno_path** (str): Annotation path. The annotation file should be a yaml file. The annotation file should be a yaml file.
**iou_thrs** (float or str, default=0.5): Minimal value for intersection over union that allows to make decision that prediction bounding box is true positive. You can specify one float value between 0 to 1 or string "05:0.05:0.95" for standard COCO thresholds.
**map_points** (int, default=0): The way to calculate mAP. 101 for 101-point interpolated AP, 11 for 11-point interpolated AP, 0 for area under PR curve. | preds, labels | preds is a tuple which supports 2 length: 3 and 4.
If its length is 3, it should contain boxes, scores, classes in turn.
If its length is 4, it should contain target_boxes_num, boxes, scores, classes in turn
labels is a tuple which contains bbox, str_label, int_label, image_id inturn
the length of one of str_label and int_label can be 0 | +| COCOmAP(anno_path, iou_thrs, map_points) | **anno_path** (str): Annotation path. The annotation file should be a yaml file. The annotation file should be a yaml file.
**iou_thrs** (float or str): Intersection over union threshold. Set to "0.5:0.05:0.95" for standard COCO thresholds.
**map_points** (int): The way to calculate mAP. Set to 101 for 101-point interpolated AP. | preds, labels | preds is a tuple which supports 2 length: 3 and 4.
If its length is 3, it should contain boxes, scores, classes in turn.
If its length is 4, it should contain target_boxes_num, boxes, scores, classes in turn
labels is a tuple which contains bbox, str_label, int_label, image_id inturn
the length of one of str_label and int_label can be 0 | +| VOCmAP(anno_path, iou_thrs, map_points) | **anno_path** (str): Annotation path. The annotation file should be a yaml file, . The annotation file should be a yaml file.
**iou_thrs** (float or str): Intersection over union threshold. Set to 0.5.
**map_points** (int): The way to calculate mAP. The way to calculate mAP. Set to 0 for area under PR curve. | preds, labels | preds is a tuple which supports 2 length: 3 and 4.
If its length is 3, it should contain boxes, scores, classes in turn.
If its length is 4, it should contain target_boxes_num, boxes, scores, classes in turn
labels is a tuple which contains bbox, str_label, int_label, image_id inturn
the length of one of str_label and int_label can be 0 | +| COCOmAPv2(anno_path, iou_thrs, map_points, output_index_mapping) | **anno_path** (str): Annotation path. The annotation file should be a yaml file. The annotation file should be a yaml file.
**iou_thrs** (float or str): Intersection over union threshold. Set to "0.5:0.05:0.95" for standard COCO thresholds.
**map_points** (int): The way to calculate mAP. Set to 101 for 101-point interpolated AP.
**output_index_mapping** (dict, default={'num_detections':-1, 'boxes':0, 'scores':1, 'classes':2}): Specifies the index of outputs in model raw prediction, -1 means this output does not exist. | preds, labels | preds is a tuple which supports 2 length: 3 and 4.
If its length is 3, it should contain boxes, scores, classes in turn.
If its length is 4, it should contain target_boxes_num, boxes, scores, classes in turn
labels is a tuple which contains bbox, str_label, int_label, image_id inturn
the length of one of str_label and int_label can be 0 | +| GLUE(task) | **task** (str, default=mrpc): The name of the task. Choices include mrpc, qqp, qnli, rte, sts-b, cola, mnli, wnli. | preds, labels | Computes GLUE score for bert model. | @@ -124,6 +124,6 @@ q_model = fit(model, config, calib_dataloader=calib_dataloader, eval_dataloader= ## Example -- Refer to this [example](https://github.com/intel/neural-compressor/tree/master/examples/onnxrt/body_analysis/onnx_model_zoo/arcface/quantization/ptq_static) for how to define a customised metric. +- Refer to this [example](https://github.com/intel/neural-compressor/tree/master/examples/deprecated/onnxrt/body_analysis/onnx_model_zoo/arcface/quantization/ptq_static) for how to define a customised metric. -- Refer to this [example](https://github.com/intel/neural-compressor/blob/master/examples/tensorflow/image_recognition/tensorflow_models/efficientnet-b0/quantization/ptq) for how to use internal metric. +- Refer to this [example](https://github.com/intel/neural-compressor/tree/master/examples/deprecated/tensorflow/image_recognition/tensorflow_models/efficientnet-b0/quantization/ptq) for how to use internal metric. diff --git a/docs/source/mx_quantization.md b/docs/source/mx_quantization.md index 99af3ea8d1d..33ace844641 100644 --- a/docs/source/mx_quantization.md +++ b/docs/source/mx_quantization.md @@ -115,11 +115,6 @@ from neural_compressor.torch.quantization import MXQuantConfig, quantize quant_config = MXQuantConfig(w_dtype=args.w_dtype, act_dtype=args.act_dtype, weight_only=args.woq) user_model = quantize(model=user_model, quant_config=quant_config) ``` - -## Examples - -- PyTorch [huggingface models](/examples/pytorch/nlp/huggingface_models/language-modeling/quantization/mx) - ## Reference diff --git a/docs/source/objective.md b/docs/source/objective.md index 34fc7c577ac..93059f10835 100644 --- a/docs/source/objective.md +++ b/docs/source/objective.md @@ -66,6 +66,3 @@ from neural_compressor.config import TuningCriterion tuning_criterion = TuningCriterion(objective=["performance", "accuracy"]) ``` - -## Example -Refer to [example](../neural_compressor/template/ptq.yaml) as an example. diff --git a/docs/source/pruning.md b/docs/source/pruning.md index 4a94d868de1..b698271af11 100644 --- a/docs/source/pruning.md +++ b/docs/source/pruning.md @@ -107,7 +107,7 @@ Pruning patterns defines the rules of pruned weights' arrangements in space. Int - Multi-head Attention Pruning - Multi-head attention mechanism boosts transformer models' capability of contextual information analysis. However, different heads' contribution to the final output varies. In most situation, a number of heads can be removed without causing accuracy drop. Head pruning can be applied in a wide range of scenes including BERT, GPT as well as other large language models. **We haven't support it in pruning, but we have provided experimental feature in Model Auto Slim**. Please refer to [multi-head attention auto slim examples](https://github.com/intel/neural-compressor/blob/master/examples/pytorch/nlp/huggingface_models/question-answering/model_slim) + Multi-head attention mechanism boosts transformer models' capability of contextual information analysis. However, different heads' contribution to the final output varies. In most situation, a number of heads can be removed without causing accuracy drop. Head pruning can be applied in a wide range of scenes including BERT, GPT as well as other large language models. **We haven't support it in pruning, but we have provided experimental feature in Model Auto Slim**. Please refer to [multi-head attention auto slim examples](https://github.com/intel/neural-compressor/tree/master/examples/deprecated/pytorch/nlp/huggingface_models/question-answering/model_slim) @@ -474,7 +474,7 @@ The pruning technique is validated on typical models across various domains (in - Language Modeling - Sparsity is effectively implemented through various pruning patterns in Causal language modeling (CLM) tasks. [Language-modeling examples](../../../examples/pytorch/nlp/huggingface_models/language-modeling/pruning/eager). + Sparsity is effectively implemented through various pruning patterns in Causal language modeling (CLM) tasks. [Language-modeling examples](/examples/deprecated/pytorch/nlp/huggingface_models/language-modeling/pruning/eager). - Text Classification @@ -500,11 +500,11 @@ Please refer to [pruning examples](../../examples/deprecated/README.md#Pruning-1 ## Sparse Model Deployment -Particular hardware/software like [Intel Extension for Transformer](https://github.com/intel/intel-extension-for-transformers) are required to obtain inference speed and footprints' optimization for most sparse models. However, using [model slim](#click) for some special structures can obtain significant inference speed improvements and footprint reduction without the post-pruning deployment. In other words, you can achieve model acceleration directly under your training framework (PyTorch, etc.) +Particular hardware/software like [Intel Extension for Transformer](https://github.com/intel/intel-extension-for-transformers) are required to obtain inference speed and footprints' optimization for most sparse models. However, using model slim for some special structures can obtain significant inference speed improvements and footprint reduction without the post-pruning deployment. In other words, you can achieve model acceleration directly under your training framework (PyTorch, etc.) ## Pruning with Hyperparameter Optimization Intel® Neural Compressor currently support grid search, random, bayesian optimization and xgboost search algorithms for pruning with HPO. -For more details, please refer to [HPO document](../../neural_compressor/compression/hpo/README.md) +For more details, please refer to [HPO](/neural_compressor/compression/hpo) ## Reference diff --git a/docs/source/quantization_weight_only.md b/docs/source/quantization_weight_only.md index 7534b69fb3c..09562a5b280 100644 --- a/docs/source/quantization_weight_only.md +++ b/docs/source/quantization_weight_only.md @@ -34,8 +34,6 @@ There are many excellent works for weight only quantization to improve its accur | GPTQ | ✔ | ✔ | | TEQ | ✔ | stay tuned | -**Note:** To get the validated accuracy results on popular models, please refer to [PyTorch Models with Torch 2.0.1+cpu in WOQ Mode](./validated_model_list.md/#pytorch-models-with-torch-201cpu-in-woq-mode) - > **RTN:** A quantification method that we can think of very intuitively. It does not require additional datasets and is a very fast quantization method. Generally speaking, RTN will convert the weight into a uniformly distributed integer data type, but some algorithms, such as Qlora, propose a non-uniform NF4 data type and prove its theoretical optimality. > **GPTQ:** A new one-shot weight quantization method based on approximate second-order information, that is both highly-accurate and highly efficient[4]. The weights of each column are updated based on the fixed-scale pseudo-quantization error and the inverse of the Hessian matrix calculated from the activations. The updated columns sharing the same scale may generate a new max/min value, so the scale needs to be saved for restoration. diff --git a/docs/source/sigopt_strategy.md b/docs/source/sigopt_strategy.md deleted file mode 100644 index 49b0a25d9e3..00000000000 --- a/docs/source/sigopt_strategy.md +++ /dev/null @@ -1,83 +0,0 @@ -SigOpt Strategy -============ - -1. [Introduction](#introduction) - - 1.1 [Preparation](#preparation) - - 1.2 [SigOpt Platform](#sigopt-platform) - - 1.3 [Neural Compressor Configuration](#neural-compressor-configuration) - -2. [Performance](#performance) - - 2.1 [Benefit of SigOpt Strategy](#benefit-of-sigopt-strategy) - - 2.2 [Performance Comparison of Different Strategies](#performance-comparison-of-different-strategies) - -## Introduction - -[SigOpt](https://app.sigopt.com/) is an online model development platform that makes it easy to track runs, visualize training, and scale hyperparameter optimization for any type of model. [Optimization Loop](https://app.sigopt.com/docs/overview/optimization) is the backbone of using SigOpt. We can set metrics and realize the interaction between the online platform and tuning configurations based on this mechanism. - -### Preparation - -Before using the `SigOpt` strategy, a SigOpt account is necessary. -- Each account has its own API token. Find your API token and then fill it in the `sigopt_api_token` field. -- Create a new project and fill the corresponding name into the `sigopt_project_id` field. -- Set the name of this experiment in `sigopt_experiment_id` field optionally. The default name is "nc-tune". - -### SigOpt Platform - -If you are using the SigOpt products for the first time, please [sign-up](https://app.sigopt.com/signup), if not, please [login](https://app.sigopt.com/login). It is free to apply for an account. Although there are certain restrictions on the model parameters and the number of experiments created, it is sufficient for ordinary customers. If you want higher capacity, please contact support@sigopt.com. - -After logging in, you can use `the token api` to connect the local code to the online platform, corresponding to `sigopt_api_token`. It can be obtained [here](https://app.sigopt.com/tokens/info). - -SigOpt has two concepts: [project](https://app.sigopt.com/projects) and [experiment](https://app.sigopt.com/experiments). Create a project before experimenting, corresponding to `sigopt_project_id` and `sigopt_experiment_name`. Multiple experiments can be created on each project. After creating the experiment, SigOpt will execute three simple steps below in a loop: - -- Receive a Suggestion from SigOpt; -- Evaluate your metrics; -- Report an Observation to SigOpt; - -In our built-in sigopt strategy, the metrics add accuracy as a constraint and optimize for latency. - -### Neural Compressor Configuration - -Compare to `Basic` strategy, `sigopt_api_token` and `sigopt_project_id` is necessary for `SigOpt` strategy. Before using the strategy, it is required to create the project corresponding to `sigopt_project_id` in your account. - -```python -from neural_compressor.config import PostTrainingQuantConfig, TuningCriterion - -conf = PostTrainingQuantConfig( - tuning_criterion=TuningCriterion( - strategy="sigopt", - strategy_kwargs={ - "sigopt_api_token": "YOUR-ACCOUNT-API-TOKEN", - "sigopt_project_id": "PROJECT-ID", - "sigopt_experiment_name": "nc-tune", - }, - ), -) -``` - -## Performance - -### Benefit of SigOpt Strategy - -- Metric based SigOpt is better than self-defining and easy to use. You can read the details [here](https://app.sigopt.com/docs/overview/metric_constraints). -- With the token api, results of each experiment are recorded in your account. You can use the SigOpt data analysis function to analyze the results, such as drawing a chart, calculating the F1 score, etc. - -### Performance Comparison of Different Strategies - -- MobileNet_v1(tensorflow) - - |strategy|FP32 baseline|int8 accuracy|int8 duration(s)| - |--------|-------------|-------------|----------------| - | basic | 0.8266 | 0.8372 | 88.2132 | - | sigopt | 0.8266 | 0.8372 | 83.7495 | - -- ResNet50_v1(tensorflow) - - |strategy|FP32 baseline|int8 accuracy|int8 duration(s)| - |--------|-------------|-------------|----------------| - | basic | 0.8299 | 0.8294 | 85.0837 | - | sigopt | 0.8299 | 0.8291 | 83.4469 | diff --git a/docs/source/smooth_quant.md b/docs/source/smooth_quant.md index 2a54f967c49..fbcaef2c16d 100644 --- a/docs/source/smooth_quant.md +++ b/docs/source/smooth_quant.md @@ -375,7 +375,7 @@ A list of models that achieved a <1% accuracy drop is shown below. | databricks/dolly-v2-3b* | 0.6297 | 0.6247 | alpha=0.5, Ipex 2.1 | | tiiuae/falcon-7b-instruct | 0.6437 | 0.6392 | alpha=0.7, Pytorch | -The results listed below are achieved using IPEX optimize_transformers in model initialization for better performance. Please refer to the step-by-step [instruction](../../examples/deprecated/pytorch/nlp/huggingface_models/language-modeling/quantization/llm/ipex/README.md) for details. +The results listed below are achieved using IPEX optimize_transformers in model initialization for better performance. | Model/Last token accuracy | FP32 Accuracy | INT8 (w/ SmoothQuant) | Notes | |:----------:|:------:|:------:|-----------------------------------| | LLaMa-2-7b-hf* | 0.7392 | 0.7332 | alpha=Auto, Ipex 2.1 | @@ -446,7 +446,7 @@ recipes = {"smooth_quant": True, conf = PostTrainingQuantConfig(recipes=recipes) ``` -To get more information, please refer to [examples](https://github.com/intel/neural-compressor/blob/master/examples/pytorch/nlp/huggingface_models/language-modeling/quantization/llm). +To get more information, please refer to [examples](https://github.com/intel/neural-compressor/blob/master/examples/deprecated/pytorch/nlp/huggingface_models/language-modeling/quantization/llm). ## Supported Framework Matrix diff --git a/docs/source/tuning_strategies.md b/docs/source/tuning_strategies.md index 2822fafb5ba..aac3d791cac 100644 --- a/docs/source/tuning_strategies.md +++ b/docs/source/tuning_strategies.md @@ -33,9 +33,7 @@ Tuning Strategies 3.9. [Random](#random) - 3.10. [SigOpt](#sigopt) - - 3.11. [TPE](#tpe) + 3.10. [TPE](#tpe) 4. [Distributed Tuning](#distributed-tuning) @@ -44,18 +42,18 @@ Tuning Strategies ## Introduction Intel® Neural Compressor aims to help users quickly deploy -the low-precision inference solution on popular Deep Learning frameworks such as TensorFlow, PyTorch and ONNX. With built-in strategies, it automatically optimizes low-precision recipes for deep learning models to achieve optimal product objectives, such as inference performance and memory usage, with expected accuracy criteria. Currently, several tuning strategies, including `auto`, `O0`, `O1`, `Basic`, `MSE`, `MSE_V2`, `HAWQ_V2`, `Bayesian`, `Exhaustive`, `Random`, `SigOpt`, `TPE`, etc are supported. By default, the [`quant_level="auto"`](./tuning_strategies.md#auto) is used for tuning. +the low-precision inference solution on popular Deep Learning frameworks such as TensorFlow, PyTorch and ONNX. With built-in strategies, it automatically optimizes low-precision recipes for deep learning models to achieve optimal product objectives, such as inference performance and memory usage, with expected accuracy criteria. Currently, several tuning strategies, including `auto`, `O0`, `O1`, `Basic`, `MSE`, `MSE_V2`, `HAWQ_V2`, `Bayesian`, `Exhaustive`, `Random`, `TPE`, etc. are supported. By default, the [`quant_level="auto"`](./tuning_strategies.md#auto) is used for tuning. ## Strategy Design Before tuning, the `tuning space` was constructed according to the framework capability and user configuration. Then the selected strategy generates the next quantization configuration according to its traverse process and the previous tuning record. The tuning process stops when meeting the exit policy. The function of strategies is shown below: -![Tuning Strategy](./imgs/strategy.png "Strategy Framework") +![Tuning Strategy](./imgs/strategy.png) ### Tuning Space Intel® Neural Compressor supports multiple quantization modes such as Post Training Static Quantization (PTQ static), Post Training Dynamic Quantization (PTQ dynamic), Quantization Aware Training, etc. One operator (OP) with a specific quantization mode has multiple ways to quantize, for example it may have multiple quantization scheme(symmetric/asymmetric), calibration algorithm(Min-Max/KL Divergence), etc. We use the [`framework capability`](./framework_yaml.md) to represent the methods that we have already supported. The `tuning space` includes all tuning items and their options. For example, the tuning items and options of the `Conv2D` (PyTorch) supported by Intel® Neural Compressor are as follows: -![Conv2D_PyTorch_Cap](./imgs/Conv2D_PyTorch_Cap.png "Conv2D PyTorch Capability") +![Conv2D_PyTorch_Cap](./imgs/Conv2D_PyTorch_Cap.png) To incorporate the human experience and reduce the tuning time, user can reduce the tuning space by specifying the `op_name_dict` and `op_type_dict` in `PostTrainingQuantConfig` (`QuantizationAwareTrainingConfig`). Before tuning, the strategy will merge these configurations with framework capability to create the final tuning space. @@ -386,37 +384,6 @@ conf = PostTrainingQuantConfig( ) ``` - -### SigOpt - -#### Design - -`SigOpt` strategy is to use [SigOpt Optimization Loop](https://app.sigopt.com/docs/overview/optimization) method to accelerate and visualize the traversal of the tuning configurations from the tuning space. The metrics add accuracy as a constraint and optimize for latency to improve performance. [SigOpt Projects](https://app.sigopt.com/) can show the result of each tuning experiment. - -#### Usage - -Compared to `Basic`, `sigopt_api_token` and `sigopt_project_id` are necessary for `SigOpt`. -For details, [how to use sigopt strategy in neural_compressor](./sigopt_strategy.md) is available. - -Note that the `sigopt_api_token`, `sigopt_project_id`, and `sigopt_experiment_name` should be set inside the `strategy_kwargs`. - -```python -from neural_compressor.config import PostTrainingQuantConfig, TuningCriterion - -conf = PostTrainingQuantConfig( - quant_level=1, - tuning_criterion=TuningCriterion( - strategy="sigopt", - strategy_kwargs={ - "sigopt_api_token": "YOUR-ACCOUNT-API-TOKEN", - "sigopt_project_id": "PROJECT-ID", - "sigopt_experiment_name": "nc-tune", - }, - ), -) -``` - - ### TPE #### Design @@ -485,7 +452,7 @@ An example of customizing a new tuning strategy can be reached at [TPE Strategy] Intel® Neural Compressor provides distributed tuning to speed up the tuning process by leveraging the multi-node cluster. It seamlessly parallelizes the tuning process across multi nodes by using the MPI. In distributed tuning, the `fp32` model is replicated on every node, and each original model replica is fed with a different quantization configuration. The master handler coordinates the tuning process and synchronizes the tuning result of each stage to every slave handler. The distributed tuning allows the tuning process to scale up significantly to the number of nodes, which translates into faster results and more efficient utilization of computing resources. The diagram below provides an overview of the distributed tuning process. -![distributed tuning](./imgs/distributed_tuning_intro.png "Distributed Tuning") +![distributed tuning](./imgs/distributed_tuning_intro.png) ### Usage diff --git a/examples/deprecated/onnxrt/nlp/huggingface_model/text_classification/mix_precision/README.md b/examples/deprecated/onnxrt/nlp/huggingface_model/text_classification/mix_precision/README.md index f6e36dcda2e..c5d4ebae6be 100644 --- a/examples/deprecated/onnxrt/nlp/huggingface_model/text_classification/mix_precision/README.md +++ b/examples/deprecated/onnxrt/nlp/huggingface_model/text_classification/mix_precision/README.md @@ -11,7 +11,7 @@ git clone -b dnnl_ep --depth 1 https://github.com/intel/neural-compressor.git cd neural-compressor pip install -e ./ -cd examples/onnxrt/nlp/huggingface_model/text_classification/mix_precision/ +cd examples/deprecated/onnxrt/nlp/huggingface_model/text_classification/mix_precision/ pip install -r requirements.txt ``` diff --git a/examples/deprecated/pytorch/image_recognition/resnest/quantization/ptq/fx/ResNest_README.md b/examples/deprecated/pytorch/image_recognition/resnest/quantization/ptq/fx/ResNest_README.md index f675e8b0abe..92c13749a17 100644 --- a/examples/deprecated/pytorch/image_recognition/resnest/quantization/ptq/fx/ResNest_README.md +++ b/examples/deprecated/pytorch/image_recognition/resnest/quantization/ptq/fx/ResNest_README.md @@ -17,8 +17,6 @@ # ResNeSt Split-Attention Network, A New ResNet Variant. It significantly boosts the performance of downstream models such as Mask R-CNN, Cascade R-CNN and DeepLabV3. -![](./miscs/abstract.jpg) - ### Table of Contents 0. [Pretrained Models](#pretrained-models) 0. [Transfer Learning Models](#transfer-learning-models) @@ -50,8 +48,6 @@ pip install resnest --pre - **3rd party implementations** are available: [Tensorflow](https://github.com/QiaoranC/tf_ResNeSt_RegNet_model), [Caffe](https://github.com/NetEase-GameAI/ResNeSt-caffe). -- Extra ablation study models are available in [link](./ablation.md) - ### PyTorch Models - Load using Torch Hub @@ -339,7 +335,6 @@ python verify.py --model resnest50 --crop-size 224 ### ImageNet Models -- Training with MXNet Gluon: Please visit [Gluon folder](./scripts/gluon/). - Training with PyTorch: Please visit [PyTorch Encoding Toolkit](https://hangzhang.org/PyTorch-Encoding/model_zoo/imagenet.html) (slightly worse than Gluon implementation). ### Detectron Models diff --git a/examples/deprecated/tensorflow/image_recognition/SavedModel/mobilenet_v1/quantization/ptq/README.md b/examples/deprecated/tensorflow/image_recognition/SavedModel/mobilenet_v1/quantization/ptq/README.md index b63c2a736dc..532356ef324 100644 --- a/examples/deprecated/tensorflow/image_recognition/SavedModel/mobilenet_v1/quantization/ptq/README.md +++ b/examples/deprecated/tensorflow/image_recognition/SavedModel/mobilenet_v1/quantization/ptq/README.md @@ -46,7 +46,7 @@ image recognition ## 3. Prepare Dataset TensorFlow [models](https://github.com/tensorflow/models) repo provides [scripts and instructions](https://github.com/tensorflow/models/tree/master/research/slim#an-automated-script-for-processing-imagenet-data) to download, process and convert the ImageNet dataset to the TF records format. -We also prepared related scripts in [TF image_recognition example](/examples/deprecated/tensorflow/tensorflow/image_recognition/tensorflow_models/mobilenet_v1/quantization/ptq#3-prepare-dataset). +We also prepared related scripts in [TF image_recognition example](/examples/deprecated/tensorflow/image_recognition/tensorflow_models/mobilenet_v1/quantization/ptq#3-prepare-dataset). # Run Command diff --git a/examples/deprecated/tensorflow/image_recognition/SavedModel/mobilenet_v2/quantization/ptq/README.md b/examples/deprecated/tensorflow/image_recognition/SavedModel/mobilenet_v2/quantization/ptq/README.md index 20adba9545e..0457bb1e96b 100644 --- a/examples/deprecated/tensorflow/image_recognition/SavedModel/mobilenet_v2/quantization/ptq/README.md +++ b/examples/deprecated/tensorflow/image_recognition/SavedModel/mobilenet_v2/quantization/ptq/README.md @@ -46,7 +46,7 @@ image recognition ## 3. Prepare Dataset TensorFlow [models](https://github.com/tensorflow/models) repo provides [scripts and instructions](https://github.com/tensorflow/models/tree/master/research/slim#an-automated-script-for-processing-imagenet-data) to download, process and convert the ImageNet dataset to the TF records format. -We also prepared related scripts in [TF image_recognition example](/examples/deprecated/tensorflow/tensorflow/image_recognition/tensorflow_models/mobilenet_v1/quantization/ptq#3-prepare-dataset). +We also prepared related scripts in [TF image_recognition example](/examples/deprecated/tensorflow/image_recognition/tensorflow_models/mobilenet_v1/quantization/ptq#3-prepare-dataset). # Run Command diff --git a/examples/deprecated/tensorflow/image_recognition/tensorflow_models/inception_resnet_v2/quantization/ptq/README.md b/examples/deprecated/tensorflow/image_recognition/tensorflow_models/inception_resnet_v2/quantization/ptq/README.md index 12a2b332247..f3666687423 100644 --- a/examples/deprecated/tensorflow/image_recognition/tensorflow_models/inception_resnet_v2/quantization/ptq/README.md +++ b/examples/deprecated/tensorflow/image_recognition/tensorflow_models/inception_resnet_v2/quantization/ptq/README.md @@ -61,7 +61,7 @@ We can get the pb file by convert the checkpoint file. https://storage.googleapis.com/intel-optimized-tensorflow/intel_tensorflow-1.15.0up2-cp35-cp35m-manylinux2010_x86_64.whl > Please note: The ImageNet dataset has 1001, the **VGG** and **ResNet V1** final layers have only 1000 outputs rather than 1001. So we need add the `--labels_offset=1` flag in the inference graph exporting command. - 3. Use [Netron](https://lutzroeder.github.io/netron/) to get the input/output layer name of inference graph pb, for vgg_16 the output layer name is `InceptionResnetV2/Logits/Predictions` + 3. Use [Netron](https://netron.app/) to get the input/output layer name of inference graph pb, for vgg_16 the output layer name is `InceptionResnetV2/Logits/Predictions` 4. Freezing the exported Graph, please use the tool `freeze_graph.py` in [tensorflow v1.15.2](https://github.com/tensorflow/tensorflow/blob/v1.15.2/tensorflow/python/tools/freeze_graph.py) repo ```shell diff --git a/examples/deprecated/tensorflow/image_recognition/tensorflow_models/inception_v1/quantization/ptq/README.md b/examples/deprecated/tensorflow/image_recognition/tensorflow_models/inception_v1/quantization/ptq/README.md index f9312259eaa..9a30c0b8833 100644 --- a/examples/deprecated/tensorflow/image_recognition/tensorflow_models/inception_v1/quantization/ptq/README.md +++ b/examples/deprecated/tensorflow/image_recognition/tensorflow_models/inception_v1/quantization/ptq/README.md @@ -61,7 +61,7 @@ We can get the pb file by convert the checkpoint file. https://storage.googleapis.com/intel-optimized-tensorflow/intel_tensorflow-1.15.0up2-cp35-cp35m-manylinux2010_x86_64.whl > Please note: The ImageNet dataset has 1001, the **VGG** and **ResNet V1** final layers have only 1000 outputs rather than 1001. So we need add the `--labels_offset=1` flag in the inference graph exporting command. - 3. Use [Netron](https://lutzroeder.github.io/netron/) to get the input/output layer name of inference graph pb, for vgg_16 the output layer name is `InceptionV1/Logits/Predictions/Reshape_1` + 3. Use [Netron](https://netron.app/) to get the input/output layer name of inference graph pb, for vgg_16 the output layer name is `InceptionV1/Logits/Predictions/Reshape_1` 4. Freezing the exported Graph, please use the tool `freeze_graph.py` in [tensorflow v1.15.2](https://github.com/tensorflow/tensorflow/blob/v1.15.2/tensorflow/python/tools/freeze_graph.py) repo ```shell diff --git a/examples/deprecated/tensorflow/image_recognition/tensorflow_models/inception_v2/quantization/ptq/README.md b/examples/deprecated/tensorflow/image_recognition/tensorflow_models/inception_v2/quantization/ptq/README.md index 9678315a18c..d1cffb85f4c 100644 --- a/examples/deprecated/tensorflow/image_recognition/tensorflow_models/inception_v2/quantization/ptq/README.md +++ b/examples/deprecated/tensorflow/image_recognition/tensorflow_models/inception_v2/quantization/ptq/README.md @@ -61,7 +61,7 @@ We can get the pb file by convert the checkpoint file. https://storage.googleapis.com/intel-optimized-tensorflow/intel_tensorflow-1.15.0up2-cp35-cp35m-manylinux2010_x86_64.whl > Please note: The ImageNet dataset has 1001, the **VGG** and **ResNet V1** final layers have only 1000 outputs rather than 1001. So we need add the `--labels_offset=1` flag in the inference graph exporting command. - 3. Use [Netron](https://lutzroeder.github.io/netron/) to get the input/output layer name of inference graph pb, for vgg_16 the output layer name is `InceptionV2/Predictions/Reshape_1` + 3. Use [Netron](https://netron.app/) to get the input/output layer name of inference graph pb, for vgg_16 the output layer name is `InceptionV2/Predictions/Reshape_1` 4. Freezing the exported Graph, please use the tool `freeze_graph.py` in [tensorflow v1.15.2](https://github.com/tensorflow/tensorflow/blob/v1.15.2/tensorflow/python/tools/freeze_graph.py) repo ```shell diff --git a/examples/deprecated/tensorflow/image_recognition/tensorflow_models/mobilenet_v2/quantization/ptq/README.md b/examples/deprecated/tensorflow/image_recognition/tensorflow_models/mobilenet_v2/quantization/ptq/README.md index f1cf4c38e76..7949fc08ca0 100644 --- a/examples/deprecated/tensorflow/image_recognition/tensorflow_models/mobilenet_v2/quantization/ptq/README.md +++ b/examples/deprecated/tensorflow/image_recognition/tensorflow_models/mobilenet_v2/quantization/ptq/README.md @@ -62,7 +62,7 @@ We can get the pb file by convert the checkpoint file. https://storage.googleapis.com/intel-optimized-tensorflow/intel_tensorflow-1.15.0up2-cp35-cp35m-manylinux2010_x86_64.whl > Please note: The ImageNet dataset has 1001, the **VGG** and **ResNet V1** final layers have only 1000 outputs rather than 1001. So we need add the `--labels_offset=1` flag in the inference graph exporting command. - 3. Use [Netron](https://lutzroeder.github.io/netron/) to get the input/output layer name of inference graph pb, for vgg_16 the output layer name is `MobilenetV2/Predictions/Reshape_1` + 3. Use [Netron](https://netron.app/) to get the input/output layer name of inference graph pb, for vgg_16 the output layer name is `MobilenetV2/Predictions/Reshape_1` 4. Freezing the exported Graph, please use the tool `freeze_graph.py` in [tensorflow v1.15.2](https://github.com/tensorflow/tensorflow/blob/v1.15.2/tensorflow/python/tools/freeze_graph.py) repo ```shell diff --git a/examples/deprecated/tensorflow/image_recognition/tensorflow_models/resnet_v2_101/quantization/ptq/README.md b/examples/deprecated/tensorflow/image_recognition/tensorflow_models/resnet_v2_101/quantization/ptq/README.md index 16dc8f92e89..d083d5a3cf8 100644 --- a/examples/deprecated/tensorflow/image_recognition/tensorflow_models/resnet_v2_101/quantization/ptq/README.md +++ b/examples/deprecated/tensorflow/image_recognition/tensorflow_models/resnet_v2_101/quantization/ptq/README.md @@ -61,7 +61,7 @@ We can get the pb file by convert the checkpoint file. https://storage.googleapis.com/intel-optimized-tensorflow/intel_tensorflow-1.15.0up2-cp35-cp35m-manylinux2010_x86_64.whl > Please note: The ImageNet dataset has 1001, the **VGG** and **ResNet V1** final layers have only 1000 outputs rather than 1001. So we need add the `--labels_offset=1` flag in the inference graph exporting command. - 3. Use [Netron](https://lutzroeder.github.io/netron/) to get the input/output layer name of inference graph pb, for vgg_16 the output layer name is `resnet_v2_101/predictions/Reshape_1` + 3. Use [Netron](https://netron.app/) to get the input/output layer name of inference graph pb, for vgg_16 the output layer name is `resnet_v2_101/predictions/Reshape_1` 4. Freezing the exported Graph, please use the tool `freeze_graph.py` in [tensorflow v1.15.2](https://github.com/tensorflow/tensorflow/blob/v1.15.2/tensorflow/python/tools/freeze_graph.py) repo ```shell diff --git a/examples/deprecated/tensorflow/image_recognition/tensorflow_models/resnet_v2_152/quantization/ptq/README.md b/examples/deprecated/tensorflow/image_recognition/tensorflow_models/resnet_v2_152/quantization/ptq/README.md index 7292979f83a..f3618fea4b5 100644 --- a/examples/deprecated/tensorflow/image_recognition/tensorflow_models/resnet_v2_152/quantization/ptq/README.md +++ b/examples/deprecated/tensorflow/image_recognition/tensorflow_models/resnet_v2_152/quantization/ptq/README.md @@ -61,7 +61,7 @@ We can get the pb file by convert the checkpoint file. https://storage.googleapis.com/intel-optimized-tensorflow/intel_tensorflow-1.15.0up2-cp35-cp35m-manylinux2010_x86_64.whl > Please note: The ImageNet dataset has 1001, the **VGG** and **ResNet V1** final layers have only 1000 outputs rather than 1001. So we need add the `--labels_offset=1` flag in the inference graph exporting command. - 3. Use [Netron](https://lutzroeder.github.io/netron/) to get the input/output layer name of inference graph pb, for vgg_16 the output layer name is `resnet_v2_152/predictions/Reshape_1` + 3. Use [Netron](https://netron.app/) to get the input/output layer name of inference graph pb, for vgg_16 the output layer name is `resnet_v2_152/predictions/Reshape_1` 4. Freezing the exported Graph, please use the tool `freeze_graph.py` in [tensorflow v1.15.2](https://github.com/tensorflow/tensorflow/blob/v1.15.2/tensorflow/python/tools/freeze_graph.py) repo ```shell diff --git a/examples/deprecated/tensorflow/image_recognition/tensorflow_models/resnet_v2_50/quantization/ptq/README.md b/examples/deprecated/tensorflow/image_recognition/tensorflow_models/resnet_v2_50/quantization/ptq/README.md index dddfc46f020..cc336a1bffb 100644 --- a/examples/deprecated/tensorflow/image_recognition/tensorflow_models/resnet_v2_50/quantization/ptq/README.md +++ b/examples/deprecated/tensorflow/image_recognition/tensorflow_models/resnet_v2_50/quantization/ptq/README.md @@ -61,7 +61,7 @@ We can get the pb file by convert the checkpoint file. https://storage.googleapis.com/intel-optimized-tensorflow/intel_tensorflow-1.15.0up2-cp35-cp35m-manylinux2010_x86_64.whl > Please note: The ImageNet dataset has 1001, the **VGG** and **ResNet V1** final layers have only 1000 outputs rather than 1001. So we need add the `--labels_offset=1` flag in the inference graph exporting command. - 3. Use [Netron](https://lutzroeder.github.io/netron/) to get the input/output layer name of inference graph pb, for vgg_16 the output layer name is `resnet_v2_50/predictions/Reshape_1` + 3. Use [Netron](https://netron.app/) to get the input/output layer name of inference graph pb, for vgg_16 the output layer name is `resnet_v2_50/predictions/Reshape_1` 4. Freezing the exported Graph, please use the tool `freeze_graph.py` in [tensorflow v1.15.2](https://github.com/tensorflow/tensorflow/blob/v1.15.2/tensorflow/python/tools/freeze_graph.py) repo ```shell diff --git a/examples/deprecated/tensorflow/image_recognition/tensorflow_models/vgg16/quantization/ptq/README.md b/examples/deprecated/tensorflow/image_recognition/tensorflow_models/vgg16/quantization/ptq/README.md index 4276e731660..39e6c716c43 100644 --- a/examples/deprecated/tensorflow/image_recognition/tensorflow_models/vgg16/quantization/ptq/README.md +++ b/examples/deprecated/tensorflow/image_recognition/tensorflow_models/vgg16/quantization/ptq/README.md @@ -62,7 +62,7 @@ We can get the pb file by convert the checkpoint file. https://storage.googleapis.com/intel-optimized-tensorflow/intel_tensorflow-1.15.0up2-cp35-cp35m-manylinux2010_x86_64.whl > Please note: The ImageNet dataset has 1001, the **VGG** and **ResNet V1** final layers have only 1000 outputs rather than 1001. So we need add the `--labels_offset=1` flag in the inference graph exporting command. - 3. Use [Netron](https://lutzroeder.github.io/netron/) to get the input/output layer name of inference graph pb, for vgg_16 the output layer name is `vgg_16/fc8/squeezed` + 3. Use [Netron](https://netron.app/) to get the input/output layer name of inference graph pb, for vgg_16 the output layer name is `vgg_16/fc8/squeezed` 4. Freezing the exported Graph, please use the tool `freeze_graph.py` in [tensorflow v1.15.2](https://github.com/tensorflow/tensorflow/blob/v1.15.2/tensorflow/python/tools/freeze_graph.py) repo ```shell diff --git a/examples/deprecated/tensorflow/image_recognition/tensorflow_models/vgg19/quantization/ptq/README.md b/examples/deprecated/tensorflow/image_recognition/tensorflow_models/vgg19/quantization/ptq/README.md index cf676a98798..0248e445862 100644 --- a/examples/deprecated/tensorflow/image_recognition/tensorflow_models/vgg19/quantization/ptq/README.md +++ b/examples/deprecated/tensorflow/image_recognition/tensorflow_models/vgg19/quantization/ptq/README.md @@ -62,7 +62,7 @@ We can get the pb file by convert the checkpoint file. https://storage.googleapis.com/intel-optimized-tensorflow/intel_tensorflow-1.15.0up2-cp35-cp35m-manylinux2010_x86_64.whl > Please note: The ImageNet dataset has 1001, the **VGG** and **ResNet V1** final layers have only 1000 outputs rather than 1001. So we need add the `--labels_offset=1` flag in the inference graph exporting command. - 3. Use [Netron](https://lutzroeder.github.io/netron/) to get the input/output layer name of inference graph pb, for vgg_19 the output layer name is `vgg_19/fc8/squeezed` + 3. Use [Netron](https://netron.app/) to get the input/output layer name of inference graph pb, for vgg_19 the output layer name is `vgg_19/fc8/squeezed` 4. Freezing the exported Graph, please use the tool `freeze_graph.py` in [tensorflow v1.15.2](https://github.com/tensorflow/tensorflow/blob/v1.15.2/tensorflow/python/tools/freeze_graph.py) repo ```shell diff --git a/examples/pytorch/cv/mixed_precision/README.md b/examples/pytorch/cv/mixed_precision/README.md index ede1837b57a..1dd04c2199a 100644 --- a/examples/pytorch/cv/mixed_precision/README.md +++ b/examples/pytorch/cv/mixed_precision/README.md @@ -10,7 +10,7 @@ This document describes the step-by-step instructions for reproducing PyTorch Re PyTorch 1.8 or higher version is needed with pytorch_fx backend. ```Shell -cd examples/3.x_api/pytorch/image_recognition/torchvision_models/mixed_precision/resnet18 +cd examples/pytorch/image_recognition/torchvision_models/mixed_precision/resnet18 pip install -r requirements.txt ``` > Note: Validated PyTorch [Version](/docs/source/installation_guide.md#validated-software-environment). diff --git a/examples/pytorch/multimodal-modeling/quantization/auto_round/llama4/README.md b/examples/pytorch/multimodal-modeling/quantization/auto_round/llama4/README.md index d08cf714c34..57dff91b799 100644 --- a/examples/pytorch/multimodal-modeling/quantization/auto_round/llama4/README.md +++ b/examples/pytorch/multimodal-modeling/quantization/auto_round/llama4/README.md @@ -10,7 +10,7 @@ This example quantizes and validates the accuracy of Llama4. docker run -d --gpus all -v ... --shm-size=100g --name llama4 -it nvcr.io/nvidia/pytorch:25.05-py3 /bin/bash docker exec -it llama4 bash git clone https://github.com/intel/neural-compressor.git -cd neural-compressor/examples/3.x_api/pytorch/multimodal-modeling/quantization/auto_round/llama4 +cd neural-compressor/examples/pytorch/multimodal-modeling/quantization/auto_round/llama4 # Use `INC_PT_ONLY=1 pip install git+https://github.com/intel/neural-compressor.git@v3.6rc` for the latest updates before neural-compressor v3.6 release pip install neural-compressor-pt==3.6 # Use `pip install git+https://github.com/intel/auto-round.git@v0.8.0rc2` for the latest updates before auto-round v0.8.0 release diff --git a/examples/tensorflow/image_recognition/inception_v3/quantization/ptq/README.md b/examples/tensorflow/image_recognition/inception_v3/quantization/ptq/README.md index 34eb64fcf74..4ae8ca7f668 100644 --- a/examples/tensorflow/image_recognition/inception_v3/quantization/ptq/README.md +++ b/examples/tensorflow/image_recognition/inception_v3/quantization/ptq/README.md @@ -44,10 +44,10 @@ pip install --upgrade intel-extension-for-tensorflow[cpu] ## 3. Prepare Dataset TensorFlow [models](https://github.com/tensorflow/models) repo provides [scripts and instructions](https://github.com/tensorflow/models/tree/master/research/slim#an-automated-script-for-processing-imagenet-data) to download, process and convert the ImageNet dataset to the TF records format. - We also prepared related scripts in ` examples/3.x_api/tensorflow/cv` directory. To download the raw images, the user must create an account with image-net.org. If you have downloaded the raw data and preprocessed the validation data by moving the images into the appropriate sub-directory based on the label (synset) of the image. we can use below command ro convert it to tf records format. + We also prepared related scripts in ` examples/tensorflow/cv` directory. To download the raw images, the user must create an account with image-net.org. If you have downloaded the raw data and preprocessed the validation data by moving the images into the appropriate sub-directory based on the label (synset) of the image. we can use below command ro convert it to tf records format. ```shell - cd examples/3.x_api/tensorflow/cv + cd examples/tensorflow/cv # convert validation subset bash prepare_dataset.sh --output_dir=./inception_v3/quantization/ptq/data --raw_dir=/PATH/TO/img_raw/val/ --subset=validation # convert train subset diff --git a/examples/tensorflow/image_recognition/mobilenet_v2/quantization/ptq/README.md b/examples/tensorflow/image_recognition/mobilenet_v2/quantization/ptq/README.md index 25755074a06..f4ea8457338 100644 --- a/examples/tensorflow/image_recognition/mobilenet_v2/quantization/ptq/README.md +++ b/examples/tensorflow/image_recognition/mobilenet_v2/quantization/ptq/README.md @@ -62,7 +62,7 @@ We can get the pb file by convert the checkpoint file. https://storage.googleapis.com/intel-optimized-tensorflow/intel_tensorflow-1.15.0up2-cp35-cp35m-manylinux2010_x86_64.whl > Please note: The ImageNet dataset has 1001, the **VGG** and **ResNet V1** final layers have only 1000 outputs rather than 1001. So we need add the `--labels_offset=1` flag in the inference graph exporting command. - 3. Use [Netron](https://lutzroeder.github.io/netron/) to get the input/output layer name of inference graph pb, for vgg_16 the output layer name is `MobilenetV2/Predictions/Reshape_1` + 3. Use [Netron](https://netron.app/) to get the input/output layer name of inference graph pb, for vgg_16 the output layer name is `MobilenetV2/Predictions/Reshape_1` 4. Freezing the exported Graph, please use the tool `freeze_graph.py` in [tensorflow v1.15.2](https://github.com/tensorflow/tensorflow/blob/v1.15.2/tensorflow/python/tools/freeze_graph.py) repo ```shell @@ -77,10 +77,10 @@ We can get the pb file by convert the checkpoint file. ## 3. Prepare Dataset TensorFlow [models](https://github.com/tensorflow/models) repo provides [scripts and instructions](https://github.com/tensorflow/models/tree/master/research/slim#an-automated-script-for-processing-imagenet-data) to download, process and convert the ImageNet dataset to the TF records format. - We also prepared related scripts in ` examples/3.x_api/tensorflow/cv` directory. To download the raw images, the user must create an account with image-net.org. If you have downloaded the raw data and preprocessed the validation data by moving the images into the appropriate sub-directory based on the label (synset) of the image. we can use below command ro convert it to tf records format. + We also prepared related scripts in ` examples/tensorflow/cv` directory. To download the raw images, the user must create an account with image-net.org. If you have downloaded the raw data and preprocessed the validation data by moving the images into the appropriate sub-directory based on the label (synset) of the image. we can use below command ro convert it to tf records format. ```shell - cd examples/3.x_api/tensorflow/cv + cd examples/tensorflow/cv # convert validation subset bash prepare_dataset.sh --output_dir=./mobilenet_v2/quantization/ptq/data --raw_dir=/PATH/TO/img_raw/val/ --subset=validation # convert train subset diff --git a/examples/tensorflow/image_recognition/resnet_v2_50/quantization/ptq/README.md b/examples/tensorflow/image_recognition/resnet_v2_50/quantization/ptq/README.md index bc07e651f96..e97c95ef3cc 100644 --- a/examples/tensorflow/image_recognition/resnet_v2_50/quantization/ptq/README.md +++ b/examples/tensorflow/image_recognition/resnet_v2_50/quantization/ptq/README.md @@ -61,7 +61,7 @@ We can get the pb file by convert the checkpoint file. https://storage.googleapis.com/intel-optimized-tensorflow/intel_tensorflow-1.15.0up2-cp35-cp35m-manylinux2010_x86_64.whl > Please note: The ImageNet dataset has 1001, the **VGG** and **ResNet V1** final layers have only 1000 outputs rather than 1001. So we need add the `--labels_offset=1` flag in the inference graph exporting command. - 3. Use [Netron](https://lutzroeder.github.io/netron/) to get the input/output layer name of inference graph pb, for vgg_16 the output layer name is `resnet_v2_50/predictions/Reshape_1` + 3. Use [Netron](https://netron.app/) to get the input/output layer name of inference graph pb, for vgg_16 the output layer name is `resnet_v2_50/predictions/Reshape_1` 4. Freezing the exported Graph, please use the tool `freeze_graph.py` in [tensorflow v1.15.2](https://github.com/tensorflow/tensorflow/blob/v1.15.2/tensorflow/python/tools/freeze_graph.py) repo ```shell @@ -76,10 +76,10 @@ We can get the pb file by convert the checkpoint file. ## 3. Prepare Dataset TensorFlow [models](https://github.com/tensorflow/models) repo provides [scripts and instructions](https://github.com/tensorflow/models/tree/master/research/slim#an-automated-script-for-processing-imagenet-data) to download, process and convert the ImageNet dataset to the TF records format. - We also prepared related scripts in ` examples/3.x_api/tensorflow/cv` directory. To download the raw images, the user must create an account with image-net.org. If you have downloaded the raw data and preprocessed the validation data by moving the images into the appropriate sub-directory based on the label (synset) of the image. we can use below command ro convert it to tf records format. + We also prepared related scripts in ` examples/tensorflow/cv` directory. To download the raw images, the user must create an account with image-net.org. If you have downloaded the raw data and preprocessed the validation data by moving the images into the appropriate sub-directory based on the label (synset) of the image. we can use below command ro convert it to tf records format. ```shell - cd examples/3.x_api/tensorflow/cv + cd examples/tensorflow/cv # convert validation subset bash prepare_dataset.sh --output_dir=./resnet_v2_50/quantization/ptq/data --raw_dir=/PATH/TO/img_raw/val/ --subset=validation # convert train subset diff --git a/examples/tensorflow/image_recognition/vgg16/quantization/ptq/README.md b/examples/tensorflow/image_recognition/vgg16/quantization/ptq/README.md index 00e00c7846d..28ca4c0251c 100644 --- a/examples/tensorflow/image_recognition/vgg16/quantization/ptq/README.md +++ b/examples/tensorflow/image_recognition/vgg16/quantization/ptq/README.md @@ -62,7 +62,7 @@ We can get the pb file by convert the checkpoint file. https://storage.googleapis.com/intel-optimized-tensorflow/intel_tensorflow-1.15.0up2-cp35-cp35m-manylinux2010_x86_64.whl > Please note: The ImageNet dataset has 1001, the **VGG** and **ResNet V1** final layers have only 1000 outputs rather than 1001. So we need add the `--labels_offset=1` flag in the inference graph exporting command. - 3. Use [Netron](https://lutzroeder.github.io/netron/) to get the input/output layer name of inference graph pb, for vgg_16 the output layer name is `vgg_16/fc8/squeezed` + 3. Use [Netron](https://netron.app/) to get the input/output layer name of inference graph pb, for vgg_16 the output layer name is `vgg_16/fc8/squeezed` 4. Freezing the exported Graph, please use the tool `freeze_graph.py` in [tensorflow v1.15.2](https://github.com/tensorflow/tensorflow/blob/v1.15.2/tensorflow/python/tools/freeze_graph.py) repo ```shell @@ -77,10 +77,10 @@ We can get the pb file by convert the checkpoint file. ## 3. Prepare Dataset TensorFlow [models](https://github.com/tensorflow/models) repo provides [scripts and instructions](https://github.com/tensorflow/models/tree/master/research/slim#an-automated-script-for-processing-imagenet-data) to download, process and convert the ImageNet dataset to the TF records format. - We also prepared related scripts in `examples/3.x_api/tensorflow/cv` directory. To download the raw images, the user must create an account with image-net.org. If you have downloaded the raw data and preprocessed the validation data by moving the images into the appropriate sub-directory based on the label (synset) of the image. we can use below command ro convert it to tf records format. + We also prepared related scripts in `examples/tensorflow/cv` directory. To download the raw images, the user must create an account with image-net.org. If you have downloaded the raw data and preprocessed the validation data by moving the images into the appropriate sub-directory based on the label (synset) of the image. we can use below command ro convert it to tf records format. ```shell - cd examples/3.x_api/tensorflow/cv + cd examples/tensorflow/cv # convert validation subset bash prepare_dataset.sh --output_dir=./vgg16/quantization/ptq/data --raw_dir=/PATH/TO/img_raw/val/ --subset=validation # convert train subset diff --git a/examples/tensorflow/image_recognition/vision_transformer/quantization/ptq/README.md b/examples/tensorflow/image_recognition/vision_transformer/quantization/ptq/README.md index 0d4fa041690..48e49713a0c 100644 --- a/examples/tensorflow/image_recognition/vision_transformer/quantization/ptq/README.md +++ b/examples/tensorflow/image_recognition/vision_transformer/quantization/ptq/README.md @@ -40,10 +40,10 @@ wget https://storage.googleapis.com/intel-optimized-tensorflow/models/2_11_0/HF- ## 3. Prepare Dataset TensorFlow [models](https://github.com/tensorflow/models) repo provides [scripts and instructions](https://github.com/tensorflow/models/tree/master/research/slim#an-automated-script-for-processing-imagenet-data) to download, process and convert the ImageNet dataset to the TF records format. - We also prepared related scripts in `examples/3.x_api/tensorflow/cv` directory. To download the raw images, the user must create an account with image-net.org. If you have downloaded the raw data and preprocessed the validation data by moving the images into the appropriate sub-directory based on the label (synset) of the image. we can use below command ro convert it to tf records format. + We also prepared related scripts in `examples/tensorflow/cv` directory. To download the raw images, the user must create an account with image-net.org. If you have downloaded the raw data and preprocessed the validation data by moving the images into the appropriate sub-directory based on the label (synset) of the image. we can use below command ro convert it to tf records format. ```shell - cd examples/3.x_api/tensorflow/cv + cd examples/tensorflow/cv # convert validation subset bash prepare_dataset.sh --output_dir=./vision_transformer/quantization/ptq/data --raw_dir=/PATH/TO/img_raw/val/ --subset=validation # convert train subset diff --git a/examples/tensorflow/keras/image_recognition/inception_v3/quantization/ptq/README.md b/examples/tensorflow/keras/image_recognition/inception_v3/quantization/ptq/README.md index a9275ab13ce..4b078605437 100644 --- a/examples/tensorflow/keras/image_recognition/inception_v3/quantization/ptq/README.md +++ b/examples/tensorflow/keras/image_recognition/inception_v3/quantization/ptq/README.md @@ -38,7 +38,7 @@ python prepare_model.py --output_model=./inception_v3_keras We also prepared related scripts in `imagenet_prepare` directory. To download the raw images, the user must create an account with image-net.org. If you have downloaded the raw data and preprocessed the validation data by moving the images into the appropriate sub-directory based on the label (synset) of the image. we can use below command ro convert it to tf records format. ```shell - cd examples/3.x_api/tensorflow/keras/cv/ + cd examples/tensorflow/keras/cv/ # convert validation subset bash prepare_dataset.sh --output_dir=./inception_v3/quantization/ptq/data --raw_dir=/PATH/TO/img_raw/val/ --subset=validation # convert train subset diff --git a/examples/tensorflow/keras/image_recognition/resnet_v2_50/quantization/ptq/README.md b/examples/tensorflow/keras/image_recognition/resnet_v2_50/quantization/ptq/README.md index a276ef7cd0d..d2f689a397b 100644 --- a/examples/tensorflow/keras/image_recognition/resnet_v2_50/quantization/ptq/README.md +++ b/examples/tensorflow/keras/image_recognition/resnet_v2_50/quantization/ptq/README.md @@ -37,7 +37,7 @@ python prepare_model.py --output_model=./resnetv2_50_keras We also prepared related scripts in `imagenet_prepare` directory. To download the raw images, the user must create an account with image-net.org. If you have downloaded the raw data and preprocessed the validation data by moving the images into the appropriate sub-directory based on the label (synset) of the image. we can use below command ro convert it to tf records format. ```shell - cd examples/3.x_api/tensorflow/keras/cv/ + cd examples/tensorflow/keras/cv/ # convert validation subset bash prepare_dataset.sh --output_dir=./resnetv2_50/quantization/ptq/data --raw_dir=/PATH/TO/img_raw/val/ --subset=validation # convert train subset diff --git a/examples/tensorflow/nlp/transformer_lt/quantization/ptq/README.md b/examples/tensorflow/nlp/transformer_lt/quantization/ptq/README.md index 544e954371e..d4431a5fd4a 100644 --- a/examples/tensorflow/nlp/transformer_lt/quantization/ptq/README.md +++ b/examples/tensorflow/nlp/transformer_lt/quantization/ptq/README.md @@ -50,10 +50,10 @@ tar -zxvf transformer_lt_official_fp32_pretrained_model.tar.gz Dataset is in data folder, pretrained model is in graph folder. #### Automatic dataset & model download -Run the `prepare_dataset_model.sh` script located in `examples/3.x_api/tensorflow/nlp/transformer_lt/quantization/ptq`. +Run the `prepare_dataset_model.sh` script located in `examples/tensorflow/nlp/transformer_lt/quantization/ptq`. ```shell -cd examples/3.x_api/tensorflow/nlp/transformer_lt/quantization/ptq +cd examples/tensorflow/nlp/transformer_lt/quantization/ptq bash prepare_dataset_model.sh ``` diff --git a/examples/tensorflow/object_detection/faster_rcnn_resnet50/quantization/ptq/README.md b/examples/tensorflow/object_detection/faster_rcnn_resnet50/quantization/ptq/README.md index b7b90b6f8ec..7be2ae333a4 100644 --- a/examples/tensorflow/object_detection/faster_rcnn_resnet50/quantization/ptq/README.md +++ b/examples/tensorflow/object_detection/faster_rcnn_resnet50/quantization/ptq/README.md @@ -22,7 +22,7 @@ pip install tensorflow ### Installation Dependency packages ```shell -cd examples/3.x_api/tensorflow/object_detection +cd examples/tensorflow/object_detection pip install -r requirements.txt cd faster_rcnn_resnet50/quantization/ptq ``` @@ -65,11 +65,11 @@ tar -xvf faster_rcnn_resnet50_fp32_coco_pretrained_model.tar.gz > **_Note: `prepare_dataset.sh` script works with TF version 1.x._** -Run the `prepare_dataset.sh` script located in `examples/3.x_api/tensorflow/object_detection`. +Run the `prepare_dataset.sh` script located in `examples/tensorflow/object_detection`. Usage: ```shell -cd examples/3.x_api/tensorflow/object_detection +cd examples/tensorflow/object_detection . prepare_dataset.sh cd faster_rcnn_resnet50/quantization/ptq ``` diff --git a/examples/tensorflow/object_detection/mask_rcnn_inception_v2/quantization/ptq/README.md b/examples/tensorflow/object_detection/mask_rcnn_inception_v2/quantization/ptq/README.md index 9ec8ae2ad78..fca95564e17 100644 --- a/examples/tensorflow/object_detection/mask_rcnn_inception_v2/quantization/ptq/README.md +++ b/examples/tensorflow/object_detection/mask_rcnn_inception_v2/quantization/ptq/README.md @@ -22,7 +22,7 @@ pip install intel-tensorflow ### Installation Dependency packages ```shell -cd examples/3.x_api/tensorflow/object_detection +cd examples/tensorflow/object_detection pip install -r requirements.txt cd mask_rcnn_inception_v2/quantization/ptq ``` @@ -65,11 +65,11 @@ tar -xvzf mask_rcnn_inception_v2_coco_2018_01_28.tar.gz > **_Note: `prepare_dataset.sh` script works with TF version 1.x._** -Run the `prepare_dataset.sh` script located in `examples/3.x_api/tensorflow/object_detection`. +Run the `prepare_dataset.sh` script located in `examples/tensorflow/object_detection`. Usage: ```shell -cd examples/3.x_api/tensorflow/object_detection/ +cd examples/tensorflow/object_detection/ . prepare_dataset.sh cd mask_rcnn_inception_v2/quantization/ptq ``` diff --git a/examples/tensorflow/object_detection/ssd_mobilenet_v1/quantization/ptq/README.md b/examples/tensorflow/object_detection/ssd_mobilenet_v1/quantization/ptq/README.md index 1b52ecf8b17..4791cd68034 100644 --- a/examples/tensorflow/object_detection/ssd_mobilenet_v1/quantization/ptq/README.md +++ b/examples/tensorflow/object_detection/ssd_mobilenet_v1/quantization/ptq/README.md @@ -22,7 +22,7 @@ pip install intel-tensorflow ### Installation Dependency packages ```shell -cd examples/3.x_api//tensorflow/object_detection +cd examples/tensorflow/object_detection pip install -r requirements.txt cd ssd_mobilenet_v1/quantization/ptq ``` @@ -55,7 +55,7 @@ pip install --upgrade intel-extension-for-tensorflow[cpu] ## 2. Prepare Model ### Automated approach -Run the `prepare_model.py` script located in `examples/3.x_api/tensorflow/object_detection/ssd_mobilenet_v1/quantization/ptq`. +Run the `prepare_model.py` script located in `examples/tensorflow/object_detection/ssd_mobilenet_v1/quantization/ptq`. ``` python prepare_model.py --model_name=ssd_mobilenet_v1 --model_path=./ @@ -83,11 +83,11 @@ tar -xvzf ssd_mobilenet_v1_coco_2018_01_28.tar.gz > **_Note: `prepare_dataset.sh` script works with TF version 1.x._** -Run the `prepare_dataset.sh` script located in `examples/3.x_api/tensorflow/object_detection`. +Run the `prepare_dataset.sh` script located in `examples/tensorflow/object_detection`. Usage: ```shell -cd examples/3.x_api/tensorflow/object_detection +cd examples/tensorflow/object_detection . prepare_dataset.sh cd ssd_mobilenet_v1/quantization/ptq ``` diff --git a/examples/tensorflow/object_detection/yolo_v5/quantization/ptq/README.md b/examples/tensorflow/object_detection/yolo_v5/quantization/ptq/README.md index 845e383cd59..68d03a93dc2 100644 --- a/examples/tensorflow/object_detection/yolo_v5/quantization/ptq/README.md +++ b/examples/tensorflow/object_detection/yolo_v5/quantization/ptq/README.md @@ -19,7 +19,7 @@ pip install tensorflow ### Installation Dependency packages ```shell -cd examples/3.x_api/tensorflow/object_detection/yolo_v5/quantization/ptq +cd examples/tensorflow/object_detection/yolo_v5/quantization/ptq pip install -r requirements.txt ``` diff --git a/examples/tensorflow/recommendation/wide_deep_large_ds/quantization/ptq/README.md b/examples/tensorflow/recommendation/wide_deep_large_ds/quantization/ptq/README.md index 7bff08a2f84..bf98a637015 100644 --- a/examples/tensorflow/recommendation/wide_deep_large_ds/quantization/ptq/README.md +++ b/examples/tensorflow/recommendation/wide_deep_large_ds/quantization/ptq/README.md @@ -41,7 +41,7 @@ pip install --upgrade intel-extension-for-tensorflow[cpu] ### Install Additional Dependency packages ```shell -cd examples/3.x_api/tensorflow/recommendation/wide_deep_large_ds/quantization/ptq +cd examples/tensorflow/recommendation/wide_deep_large_ds/quantization/ptq pip install -r requirements.txt ``` diff --git a/neural_compressor/compression/pruner/README.md b/neural_compressor/compression/pruner/README.md index cfe6f988c71..078e0d7b368 100644 --- a/neural_compressor/compression/pruner/README.md +++ b/neural_compressor/compression/pruner/README.md @@ -107,7 +107,7 @@ Pruning patterns defines the rules of pruned weights' arrangements in space. Int - Multi-head Attention Pruning - Multi-head attention mechanism boosts transformer models' capability of contextual information analysis. However, different heads' contribution to the final output varies. In most situation, a number of heads can be removed without causing accuracy drop. Head pruning can be applied in a wide range of scenes including BERT, GPT as well as other large language models. **We haven't support it in pruning, but we have provided experimental feature in Model Auto Slim**. Please refer to [multi-head attention auto slim examples](https://github.com/intel/neural-compressor/blob/master/examples/pytorch/nlp/huggingface_models/question-answering/model_slim) + Multi-head attention mechanism boosts transformer models' capability of contextual information analysis. However, different heads' contribution to the final output varies. In most situation, a number of heads can be removed without causing accuracy drop. Head pruning can be applied in a wide range of scenes including BERT, GPT as well as other large language models. **We haven't support it in pruning, but we have provided experimental feature in Model Auto Slim**. Please refer to [multi-head attention auto slim examples](https://github.com/intel/neural-compressor/blob/master/examples/deprecated/pytorch/nlp/huggingface_models/question-answering/model_slim) @@ -505,8 +505,7 @@ Please refer to [pruning examples](../../../examples/deprecated/README.md#Prunin Particular hardware/software like [Intel Extension for Transformer](https://github.com/intel/intel-extension-for-transformers) are required to obtain inference speed and footprints' optimization for most sparse models. However, using [model slim](#click) for some special structures can obtain significant inference speed improvements and footprint reduction without the post-pruning deployment. In other words, you can achieve model acceleration directly under your training framework (PyTorch, etc.) ## Pruning with Hyperparameter Optimization -Intel® Neural Compressor currently support grid search, random, bayesian optimization and xgboost search algorithms for pruning with HPO. -For more details, please refer to [HPO document](../../neural_compressor/compression/hpo/README.md) +Intel® Neural Compressor currently support grid search, random, bayesian optimization and xgboost search algorithms for pruning with HPO. ## Reference diff --git a/neural_compressor/config.py b/neural_compressor/config.py index 95e97f8f974..3987fe374eb 100644 --- a/neural_compressor/config.py +++ b/neural_compressor/config.py @@ -694,7 +694,7 @@ def strategy(self, strategy): "strategy", strategy, str, - ["basic", "mse", "bayesian", "random", "exhaustive", "sigopt", "tpe", "mse_v2", "hawq_v2"], + ["basic", "mse", "bayesian", "random", "exhaustive", "tpe", "mse_v2", "hawq_v2"], ): self._strategy = strategy diff --git a/neural_compressor/contrib/strategy/sigopt.py b/neural_compressor/contrib/strategy/sigopt.py deleted file mode 100644 index 81b819c2899..00000000000 --- a/neural_compressor/contrib/strategy/sigopt.py +++ /dev/null @@ -1,312 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- -# -# Copyright (c) 2021 Intel Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -"""The SigOpt Tuning Strategy provides support for the quantization process.""" -import copy -from collections import OrderedDict - -from neural_compressor.strategy.strategy import TuneStrategy, strategy_registry -from neural_compressor.strategy.utils.tuning_sampler import OpWiseTuningSampler -from neural_compressor.strategy.utils.tuning_structs import OpTuningConfig -from neural_compressor.utils import logger -from neural_compressor.utils.utility import LazyImport - -sigopt = LazyImport("sigopt") - - -@strategy_registry -class SigOptTuneStrategy(TuneStrategy): - """The tuning strategy using SigOpt HPO search in tuning space. - - Args: - model (object): The FP32 model specified for low precision tuning. - conf (Conf): The Conf class instance initialized from user yaml - config file. - q_dataloader (generator): Data loader for calibration, mandatory for - post-training quantization. - It is iterable and should yield a tuple (input, - label) for calibration dataset containing label, - or yield (input, _) for label-free calibration - dataset. The input could be a object, list, tuple or - dict, depending on user implementation, as well as - it can be taken as model input. - q_func (function, optional): Reserved for future use. - eval_dataloader (generator, optional): Data loader for evaluation. It is iterable - and should yield a tuple of (input, label). - The input could be a object, list, tuple or dict, - depending on user implementation, as well as it can - be taken as model input. The label should be able - to take as input of supported metrics. If this - parameter is not None, user needs to specify - pre-defined evaluation metrics through configuration - file and should set "eval_func" parameter as None. - Tuner will combine model, eval_dataloader and - pre-defined metrics to run evaluation process. - eval_func (function, optional): The evaluation function provided by user. - This function takes model as parameter, and - evaluation dataset and metrics should be - encapsulated in this function implementation and - outputs a higher-is-better accuracy scalar value. - - The pseudo code should be something like: - - def eval_func(model): - input, label = dataloader() - output = model(input) - accuracy = metric(output, label) - return accuracy - dicts (dict, optional): The dict containing resume information. - Defaults to None. - """ - - def __init__( - self, - model, - conf, - q_dataloader=None, - q_func=None, - eval_func=None, - eval_dataloader=None, - eval_metric=None, - resume=None, - q_hooks=None, - ): - """Initialize the SigOpt tuning strategy if the user specified to use it. - - Args: - model: The FP32 model specified for low precision tuning. - conf: The Conf class instance includes all user configurations. - q_dataloader: Data loader for calibration, mandatory for post-training quantization. Defaults to None. - q_func: Training function for quantization aware training. Defaults to None. Defaults to None. - eval_func: The evaluation function provided by user. This function takes model as parameter, and - evaluation dataset and metrics should be encapsulated in this function implementation and - outputs a higher-is-better accuracy scalar value. - eval_dataloader: Data loader for evaluation. Defaults to None. - eval_metric: Metric for evaluation. Defaults to None. - resume: The dict containing resume information. Defaults to None. - q_hooks: The dict of training hooks, supported keys are: on_epoch_begin, on_epoch_end, on_step_begin, - on_step_end. Their values are functions to be executed in adaptor layer.. Defaults to None. - """ - super().__init__( - model=model, - conf=conf, - q_dataloader=q_dataloader, - q_func=q_func, - eval_func=eval_func, - eval_dataloader=eval_dataloader, - eval_metric=eval_metric, - resume=resume, - q_hooks=q_hooks, - ) - logger.info("*** Initialize SigOpt tuning") - self.config = self._initialize_config(conf) - strategy_name = self.config.tuning_criterion.strategy - if strategy_name.lower() == "sigopt": - try: - import sigopt - except ImportError: - try: - import subprocess - import sys - - subprocess.check_call([sys.executable, "-m", "pip", "install", "sigopt"]) - import sigopt # pylint: disable=import-error - except: - assert False, "Unable to import sigopt from the local environment." - else: - pass - # SigOpt init - strategy_kwargs = self.config.tuning_criterion.strategy_kwargs - client_token = strategy_kwargs.get("sigopt_api_token", None) - self.project_id = strategy_kwargs.get("sigopt_project_id", None) - self.experiment_name = strategy_kwargs.get("sigopt_experiment_name", None) - try: - assert client_token is not None - except AssertionError: - logger.error( - "`sigopt_api_token` field in yaml file is required. " - "Please refer to details in /docs/sigopt_strategy.md." - ) - exit(0) - try: - assert self.project_id is not None - logger.warning( - "Project id is {}, " "Please check whether it is created in the sigopt account.".format(self.project_id) - ) - except AssertionError: - logger.error( - "`sigopt_project_id` field in yaml file is required. " - "Please refer to details in /docs/sigopt_strategy.md." - ) - exit(0) - if self.experiment_name == "nc-tune": - logger.info( - "Default experiment name `nc-tune` is used, " - "Please refer to details in /docs/sigopt_strategy.md " - "if user wants to modify it." - ) - else: - logger.info("Experiment name is {}.".format(self.experiment_name)) - - self.conn = sigopt.Connection(client_token) - self.experiment = None - - def params_to_tune_configs(self, params): - """Get the parameters of the tuning strategy.""" - op_tuning_cfg = {} - calib_sampling_size_lst = self.tuning_space.root_item.get_option_by_name("calib_sampling_size").options - for op_name_type, configs in self.op_configs.items(): - if len(configs) == 1: - op_tuning_cfg[op_name_type] = configs[0] - else: - op_tuning_cfg[op_name_type] = configs[min(len(configs) - 1, int(params[op_name_type[0]]))] - calib_sampling_size = calib_sampling_size_lst[min(len(configs) - 1, int(params["calib_sampling_size"]))] - op_tuning_cfg["calib_sampling_size"] = calib_sampling_size - return op_tuning_cfg - - def next_tune_cfg(self): - """Yielding the tuning config to traverse by concreting strategies according to last tuning result.""" - while self.experiment.progress.observation_count < self.experiment.observation_budget: - suggestion = self.conn.experiments(self.experiment.id).suggestions().create() - yield self.params_to_tune_configs(suggestion.assignments) - values = [ - dict(name="accuracy", value=self.last_tune_result[0]), - dict(name="latency", value=self.last_tune_result[1]), - ] - obs = ( - self.conn.experiments(self.experiment.id).observations().create(suggestion=suggestion.id, values=values) - ) - logger.debug("`suggestion_id` is {}, `observation_id` is {}.".format(suggestion.id, obs.id)) - self.experiment = self.conn.experiments(self.experiment.id).fetch() - - def get_acc_target(self, base_acc): - """Get the tuning target of the accuracy ceiterion.""" - accuracy_criterion_conf = self.config.accuracy_criterion - if accuracy_criterion_conf.criterion == "relative": - return base_acc * (1.0 - accuracy_criterion_conf.tolerable_loss) - else: - return base_acc - accuracy_criterion_conf.tolerable_loss - - def traverse(self): - """The main traverse logic, which could be override by some concrete strategy which needs more hooks. - - This is SigOpt version of traverse -- with additional constraints setting to HPO. - """ - self._prepare_tuning() - - baseline_msg = ( - "[Accuracy: {:.4f}".format(self.baseline[0]) - + "".join( - [ - ", {}: {:.4f}".format(x, y) - for x, y in zip(self.objectives.representation, self.baseline[1]) - if x != "Accuracy" - ] - ) - + "]" - if self.baseline - else "n/a" - ) - logger.info("FP32 baseline is: {}".format(baseline_msg)) - self.experiment = self.create_exp(acc_target=self.get_acc_target(self.baseline[0])) - trials_count = 0 - for tune_cfg in self.next_tune_cfg(): - # add tune_cfg here as quantize use tune_cfg - trials_count += 1 - tuning_history = self._find_tuning_history(tune_cfg) - if tuning_history and trials_count < self.config.tuning_criterion.max_trials: - self.last_tune_result = tuning_history["last_tune_result"] - self.best_tune_result = tuning_history["best_tune_result"] - logger.warn("Find evaluated tuning config, skip.") - continue - - logger.debug("Dump current tuning configuration:") - logger.debug(tune_cfg) - self.last_qmodel = self.adaptor.quantize(tune_cfg, self.model, self.calib_dataloader, self.q_func) - assert self.last_qmodel - # Return the last quantized model as a result. if performance only. - if self._not_tuning: - self.best_qmodel = self.last_qmodel - self._add_tuning_history(copy.deepcopy(tune_cfg), (-1, [0]), q_config=self.last_qmodel.q_config) - return - self.last_tune_cfg = copy.deepcopy(tune_cfg) - self.last_tune_result = self._evaluate(self.last_qmodel) - - need_stop = self.stop(self.config.tuning_criterion.timeout, trials_count) - - # record the tuning history - saved_tune_cfg = copy.deepcopy(tune_cfg) - saved_last_tune_result = copy.deepcopy(self.last_tune_result) - self._add_tuning_history(saved_tune_cfg, saved_last_tune_result) - - if need_stop: - break - - def create_exp(self, acc_target): - """Set the config for the experiment.""" - params = [] - from copy import deepcopy - - tuning_space = self.tuning_space - initial_op_tuning_cfg = {} - for item in tuning_space.root_item.options: - if item.item_type == "op": - op_name, op_type = item.name - initial_op_tuning_cfg[item.name] = OpTuningConfig(op_name, op_type, "fp32", tuning_space) - calib_sampling_size_lst = tuning_space.root_item.get_option_by_name("calib_sampling_size").options - # step1. collect the ops that support static and dynamic - quant_mode_wise_items = OrderedDict() - query_order = ["static", "dynamic", "bf16", "fp16", "fp32"] - pre_items = set() - for quant_mode in query_order: - items = tuning_space.query_items_by_quant_mode(quant_mode) - filtered_items = [item for item in items if item not in pre_items] - pre_items = pre_items.union(set(items)) - quant_mode_wise_items[quant_mode] = filtered_items - - def initial_op_quant_mode(items_lst, target_quant_mode, op_item_dtype_dict): - """Initialize the op tuning mode.""" - for item in items_lst: - op_item_dtype_dict[item.name] = target_quant_mode - - op_item_dtype_dict = OrderedDict() - for quant_mode, quant_mode_items in quant_mode_wise_items.items(): - initial_op_quant_mode(quant_mode_items, quant_mode, op_item_dtype_dict) - - op_wise_pool = OpWiseTuningSampler(tuning_space, [], [], op_item_dtype_dict, initial_op_tuning_cfg) - self.op_configs = op_wise_pool.get_opwise_candidate() - for op, configs in self.op_configs.items(): - if len(configs) > 1: - params.append(dict(name=op[0], type="int", bounds=dict(min=0, max=len(configs) - 1))) - params.append( - dict(name="calib_sampling_size", type="int", bounds=dict(min=0, max=len(calib_sampling_size_lst) - 1)) - ) - experiment = self.conn.experiments().create( - name=self.experiment_name, - parameters=params, - metrics=[ - dict(name="accuracy", objective="maximize", strategy="constraint", threshold=acc_target), - dict(name="latency", objective="minimize", strategy="optimize"), - ], - parallel_bandwidth=1, - # Define an Observation Budget for your experiment - observation_budget=100, - project=self.project_id, - ) - - logger.debug("Create experiment at https://app.sigopt.com/experiment/{}".format(experiment.id)) - - return experiment diff --git a/test/strategy/test_sigopt.py b/test/strategy/test_sigopt.py deleted file mode 100644 index 90e296ab016..00000000000 --- a/test/strategy/test_sigopt.py +++ /dev/null @@ -1,127 +0,0 @@ -"""Tests for quantization.""" - -import os -import shutil -import unittest - -import numpy as np - -if os.getenv("SIGOPT_API_TOKEN") is None or os.getenv("SIGOPT_PROJECT_ID") is None: - CONDITION = True -else: - CONDITION = False - - -def build_fake_model(): - import tensorflow as tf - - try: - graph = tf.Graph() - graph_def = tf.compat.v1.GraphDef() - with tf.compat.v1.Session() as sess: - x = tf.compat.v1.placeholder(tf.float32, shape=(1, 3, 3, 1), name="x") - y = tf.constant(np.random.random((2, 2, 1, 1)).astype(np.float32), name="y") - z = tf.constant(np.random.random((1, 1, 1, 1)).astype(np.float32), name="z") - op = tf.nn.conv2d(input=tf.nn.relu(x), filters=y, strides=[1, 1, 1, 1], padding="VALID", name="op_to_store") - op2 = tf.nn.conv2d( - input=tf.nn.relu(op), filters=z, strides=[1, 1, 1, 1], padding="VALID", name="op2_to_store" - ) - - sess.run(tf.compat.v1.global_variables_initializer()) - constant_graph = tf.compat.v1.graph_util.convert_variables_to_constants( - sess, sess.graph_def, ["op2_to_store"] - ) - - graph_def.ParseFromString(constant_graph.SerializeToString()) - with graph.as_default(): - tf.import_graph_def(graph_def, name="") - except: - graph = tf.Graph() - graph_def = tf.compat.v1.GraphDef() - with tf.compat.v1.Session() as sess: - x = tf.compat.v1.placeholder(tf.float32, shape=(1, 3, 3, 1), name="x") - y = tf.constant(np.random.random((2, 2, 1, 1)).astype(np.float32), name="y") - z = tf.constant(np.random.random((1, 1, 1, 1)).astype(np.float32), name="z") - op = tf.nn.conv2d(input=x, filters=y, strides=[1, 1, 1, 1], padding="VALID", name="op_to_store") - op2 = tf.nn.conv2d(input=op, filters=z, strides=[1, 1, 1, 1], padding="VALID", name="op2_to_store") - - sess.run(tf.compat.v1.global_variables_initializer()) - constant_graph = tf.compat.v1.graph_util.convert_variables_to_constants( - sess, sess.graph_def, ["op2_to_store"] - ) - - graph_def.ParseFromString(constant_graph.SerializeToString()) - with graph.as_default(): - tf.import_graph_def(graph_def, name="") - return graph - - -class TestSigoptTuningStrategy(unittest.TestCase): - @classmethod - def setUpClass(self): - sigopt_api_token = os.getenv("SIGOPT_API_TOKEN") - sigopt_project_id = os.getenv("SIGOPT_PROJECT_ID") - self.constant_graph = build_fake_model() - - @classmethod - def tearDownClass(self): - shutil.rmtree("saved", ignore_errors=True) - - @unittest.skipIf(CONDITION, "missing the env variables 'SIGOPT_API_TOKEN' or 'SIGOPT_PROJECT_ID'") - def test_run_sigopt_one_trial_new_api(self): - from neural_compressor.config import AccuracyCriterion, PostTrainingQuantConfig, TuningCriterion - from neural_compressor.data import DATALOADERS, Datasets - from neural_compressor.quantization import fit - - # dataset and dataloader - dataset = Datasets("tensorflow")["dummy"](((100, 3, 3, 1))) - dataloader = DATALOADERS["tensorflow"](dataset) - - # tuning and accuracy criterion - accuracy_criterion = AccuracyCriterion(criterion="relative") - strategy_kwargs = { - "sigopt_api_token": "sigopt_api_token_test", - "sigopt_project_id": "sigopt_project_id_test", - "sigopt_experiment_name": "nc-tune", - } - tuning_criterion = TuningCriterion(strategy="sigopt", strategy_kwargs=strategy_kwargs, max_trials=3) - conf = PostTrainingQuantConfig( - quant_level=1, approach="static", tuning_criterion=tuning_criterion, accuracy_criterion=accuracy_criterion - ) - self.assertEqual(conf.tuning_criterion.strategy_kwargs, strategy_kwargs) - - def fake_eval(model): - return 1 - - q_model = fit(model=self.constant_graph, conf=conf, calib_dataloader=dataloader, eval_func=fake_eval) - - def test_run_sigopt_one_trial_fake_token(self): - from neural_compressor.config import AccuracyCriterion, PostTrainingQuantConfig, TuningCriterion - from neural_compressor.data import DATALOADERS, Datasets - from neural_compressor.quantization import fit - - # dataset and dataloader - dataset = Datasets("tensorflow")["dummy"](((100, 3, 3, 1))) - dataloader = DATALOADERS["tensorflow"](dataset) - - # tuning and accuracy criterion - accuracy_criterion = AccuracyCriterion(criterion="relative") - strategy_kwargs = { - "sigopt_api_token": "sigopt_api_token_test", - "sigopt_project_id": "sigopt_project_id_test", - "sigopt_experiment_name": "nc-tune", - } - tuning_criterion = TuningCriterion(strategy="sigopt", strategy_kwargs=strategy_kwargs, max_trials=3) - conf = PostTrainingQuantConfig( - quant_level=1, approach="static", tuning_criterion=tuning_criterion, accuracy_criterion=accuracy_criterion - ) - self.assertEqual(conf.tuning_criterion.strategy_kwargs, strategy_kwargs) - - def fake_eval(model): - return 1 - - q_model = fit(model=self.constant_graph, conf=conf, calib_dataloader=dataloader, eval_func=fake_eval) - - -if __name__ == "__main__": - unittest.main()