-
Notifications
You must be signed in to change notification settings - Fork 192
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add onnx whisper-large quantization example (#920)
- Loading branch information
1 parent
683cdc9
commit 038be06
Showing
8 changed files
with
467 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,52 @@ | ||
{ | ||
"whisper_large_static": { | ||
"working_dir": "huggingface/onnxruntime/speech-recognition/quantization", | ||
"tune":{ | ||
"cmd": "bash run_tuning.sh", | ||
"params": { | ||
"config": "/tf_dataset2/models/onnx/whisper_large", | ||
"approach": "static", | ||
"output_model": "whisper-large-with-past-static", | ||
"input_model": "/tf_dataset2/models/onnx/whisper_large", | ||
"dataset_location": "/tf_dataset2/datasets/datasets_cache" | ||
} | ||
}, | ||
"benchmark": { | ||
"cmd": "bash run_benchmark.sh", | ||
"params": { | ||
"config": "/tf_dataset2/models/onnx/whisper_large", | ||
"mode": "accuracy", | ||
"batch_size": "1", | ||
"iters": "100", | ||
"input_model": "whisper-large-with-past-static", | ||
"dataset_location": "/tf_dataset2/datasets/datasets_cache", | ||
"int8": "false" | ||
} | ||
} | ||
}, | ||
"whisper_large_dynamic": { | ||
"working_dir": "huggingface/onnxruntime/speech-recognition/quantization", | ||
"tune":{ | ||
"cmd": "bash run_tuning.sh", | ||
"params": { | ||
"config": "/tf_dataset2/models/onnx/whisper_large", | ||
"approach": "dynamic", | ||
"output_model": "whisper-large-with-past-dynamic", | ||
"input_model": "/tf_dataset2/models/onnx/whisper_large", | ||
"dataset_location": "/tf_dataset2/datasets/datasets_cache" | ||
} | ||
}, | ||
"benchmark": { | ||
"cmd": "bash run_benchmark.sh", | ||
"params": { | ||
"config": "/tf_dataset2/models/onnx/whisper_large", | ||
"mode": "accuracy", | ||
"batch_size": "1", | ||
"iters": "100", | ||
"input_model": "whisper-large-with-past-dynamic", | ||
"dataset_location": "/tf_dataset2/datasets/datasets_cache", | ||
"int8": "false" | ||
} | ||
} | ||
}, | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
we have [optimization](optimization_README.md) examples. | ||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,10 @@ | ||
# Huggingface Examples | ||
|
||
Welcome to ONNX Runtime Huggingface examples. The models are from [Huggingface](https://huggingface.co) and model compressor technology is dependend on [Intel® Neural Compressor](https://github.com/intel/neural-compressor). | ||
|
||
## Quantization approach | ||
|
||
| Task | PostTrainingDynamic | PostTrainingStatic | ||
|---|:---:|:---:| | ||
|**`speech-recognition`**| ✅ | ✅ | | ||
|
62 changes: 62 additions & 0 deletions
62
examples/huggingface/onnxruntime/speech-recognition/quantization/README.md
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,62 @@ | ||
Step-by-Step | ||
============ | ||
The script `run_whisper.py` provides two quantization approaches (PostTrainingStatic and PostTrainingDynamic) based on [Intel® Neural Compressor](https://github.com/intel/neural-compressor) with [LibriSpeech test-clean](https://huggingface.co/datasets/librispeech_asr) dataset. | ||
|
||
# Prerequisite | ||
## 1. Create Environment | ||
```shell | ||
pip install -r requirements.txt | ||
``` | ||
|
||
## 2. Prepare Model<200b> | ||
``` | ||
optimum-cli export onnx --model openai/whisper-large whisper-large-with-past/ --task automatic-speech-recognition-with-past --opset 13 | ||
``` | ||
|
||
# Run | ||
## 1. Quantization | ||
|
||
- To get int8 model | ||
|
||
``` | ||
bash run_tuning.sh --config=openai/whisper-large \ | ||
--dataset_location=/path/to/dataset \ # optional | ||
--input_model=whisper-large-with-past/ \ | ||
--output_model=whisper-large-with-past-static/ \ # or whisper-large-with-past-dynamic | ||
--approach=static # or dynamic | ||
``` | ||
|
||
- To get model accuracy | ||
|
||
``` | ||
bash run_benchmark.sh --config=whisper-large-with-past \ | ||
--dataset_location=/path/to/dataset \ # optional | ||
--input_model=whisper-large-with-past-static/ \ | ||
--int8 \ | ||
--mode=accuracy | ||
``` | ||
|
||
- To get model performance | ||
|
||
``` | ||
numactl -m 0 -C 0-3 bash run_benchmark.sh --config=whisper-large-with-past \ | ||
--dataset_location=/path/to/dataset \ # optional | ||
--input_model=whisper-large-with-past-static/ \ | ||
--mode=benchmark \ | ||
--iters=100 \ | ||
--cores_per_instance=4 \ | ||
--int8 \ | ||
--max_new_tokens=16 | ||
``` | ||
|
||
**Notes**: | ||
- If users don't set dataset_location, it will download the dataset or use the cached dataset automatically. | ||
- numactl command is used to bind specific cores. | ||
|
||
# Validated model list | ||
|
||
|Topology|Pretrained model|PostTrainingDynamic|PostTrainingStatic | ||
|---|------------------------------------|---|--- | ||
|whisper_large|openai/whisper-large| ✅| ✅| | ||
|
||
|
11 changes: 11 additions & 0 deletions
11
examples/huggingface/onnxruntime/speech-recognition/quantization/requirements.txt
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,11 @@ | ||
datasets | ||
torch | ||
transformers | ||
jiwer | ||
optimum | ||
onnx | ||
onnxruntime | ||
evaluate | ||
neural-compressor | ||
librosa | ||
soundfile |
76 changes: 76 additions & 0 deletions
76
examples/huggingface/onnxruntime/speech-recognition/quantization/run_benchmark.sh
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,76 @@ | ||
#!/bin/bash | ||
set -x | ||
|
||
function main { | ||
|
||
init_params "$@" | ||
run_benchmark | ||
|
||
} | ||
|
||
# init params | ||
function init_params { | ||
iters=100 | ||
dataset_location=$HOME/.cache/huggingface | ||
script="run_whisper.py" | ||
for var in "$@" | ||
do | ||
case $var in | ||
--config=*) | ||
config=$(echo $var |cut -f2 -d=) | ||
;; | ||
--dataset_location=*) | ||
dataset_location=$(echo $var |cut -f2 -d=) | ||
;; | ||
--input_model=*) | ||
input_model=$(echo $var |cut -f2 -d=) | ||
;; | ||
--mode=*) | ||
mode=$(echo $var |cut -f2 -d=) | ||
;; | ||
--iters=*) | ||
iters=$(echo ${var} |cut -f2 -d=) | ||
;; | ||
--cores_per_instance=*) | ||
cores_per_instance=$(echo $var |cut -f2 -d=) | ||
;; | ||
--max_new_tokens=*) | ||
max_new_tokens=$(echo $var |cut -f2 -d=) | ||
;; | ||
--int8=*) | ||
int8=$(echo ${var} |cut -f2 -d=) | ||
;; | ||
esac | ||
done | ||
|
||
} | ||
|
||
|
||
# run_benchmark | ||
function run_benchmark { | ||
|
||
if [[ ${int8} == "false" ]]; then | ||
input_model=${config} | ||
fi | ||
|
||
if [[ ${mode} == "accuracy" ]]; then | ||
mode_cmd=" --accuracy_only" | ||
elif [[ ${mode} == "benchmark" ]]; then | ||
mode_cmd=" --benchmark" | ||
else | ||
echo "Error: No such mode: ${mode}" | ||
exit 1 | ||
fi | ||
|
||
|
||
python -u ${script} \ | ||
--model_name_or_path ${config} \ | ||
--cache_dir ${dataset_location} \ | ||
--cores_per_instance ${cores_per_instance-4} \ | ||
--input_model ${input_model} \ | ||
--max_new_tokens ${max_new_tokens-16} \ | ||
--iters ${iters} \ | ||
${mode_cmd} | ||
} | ||
|
||
main "$@" |
52 changes: 52 additions & 0 deletions
52
examples/huggingface/onnxruntime/speech-recognition/quantization/run_tuning.sh
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,52 @@ | ||
#!/bin/bash | ||
set -x | ||
|
||
function main { | ||
|
||
init_params "$@" | ||
run_tuning | ||
|
||
} | ||
|
||
# init params | ||
function init_params { | ||
approach="static" | ||
script="run_whisper.py" | ||
dataset_location=$HOME/.cache/huggingface | ||
for var in "$@" | ||
do | ||
case $var in | ||
--config=*) | ||
config=$(echo $var |cut -f2 -d=) | ||
;; | ||
--dataset_location=*) | ||
dataset_location=$(echo $var |cut -f2 -d=) | ||
;; | ||
--input_model=*) | ||
input_model=$(echo $var |cut -f2 -d=) | ||
;; | ||
--output_model=*) | ||
output_model=$(echo $var |cut -f2 -d=) | ||
;; | ||
--approach=*) | ||
approach=$(echo $var |cut -f2 -d=) | ||
;; | ||
esac | ||
done | ||
|
||
} | ||
|
||
# run_tuning | ||
function run_tuning { | ||
|
||
python -u ${script} \ | ||
--model_name_or_path ${config} \ | ||
--input_model ${input_model} \ | ||
--output_model ${output_model} \ | ||
--cache_dir ${dataset_location} \ | ||
--tune \ | ||
--approach ${approach} | ||
|
||
} | ||
|
||
main "$@" |
Oops, something went wrong.