Skip to content

Commit

Permalink
Add onnx whisper-large quantization example (#920)
Browse files Browse the repository at this point in the history
  • Loading branch information
mengniwang95 committed Jul 3, 2023
1 parent 683cdc9 commit 038be06
Show file tree
Hide file tree
Showing 8 changed files with 467 additions and 0 deletions.
52 changes: 52 additions & 0 deletions examples/.config/onnx_optimize.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
{
"whisper_large_static": {
"working_dir": "huggingface/onnxruntime/speech-recognition/quantization",
"tune":{
"cmd": "bash run_tuning.sh",
"params": {
"config": "/tf_dataset2/models/onnx/whisper_large",
"approach": "static",
"output_model": "whisper-large-with-past-static",
"input_model": "/tf_dataset2/models/onnx/whisper_large",
"dataset_location": "/tf_dataset2/datasets/datasets_cache"
}
},
"benchmark": {
"cmd": "bash run_benchmark.sh",
"params": {
"config": "/tf_dataset2/models/onnx/whisper_large",
"mode": "accuracy",
"batch_size": "1",
"iters": "100",
"input_model": "whisper-large-with-past-static",
"dataset_location": "/tf_dataset2/datasets/datasets_cache",
"int8": "false"
}
}
},
"whisper_large_dynamic": {
"working_dir": "huggingface/onnxruntime/speech-recognition/quantization",
"tune":{
"cmd": "bash run_tuning.sh",
"params": {
"config": "/tf_dataset2/models/onnx/whisper_large",
"approach": "dynamic",
"output_model": "whisper-large-with-past-dynamic",
"input_model": "/tf_dataset2/models/onnx/whisper_large",
"dataset_location": "/tf_dataset2/datasets/datasets_cache"
}
},
"benchmark": {
"cmd": "bash run_benchmark.sh",
"params": {
"config": "/tf_dataset2/models/onnx/whisper_large",
"mode": "accuracy",
"batch_size": "1",
"iters": "100",
"input_model": "whisper-large-with-past-dynamic",
"dataset_location": "/tf_dataset2/datasets/datasets_cache",
"int8": "false"
}
}
},
}
3 changes: 3 additions & 0 deletions examples/huggingface/onnxruntime/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
we have [optimization](optimization_README.md) examples.


10 changes: 10 additions & 0 deletions examples/huggingface/onnxruntime/optimization_README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
# Huggingface Examples

Welcome to ONNX Runtime Huggingface examples. The models are from [Huggingface](https://huggingface.co) and model compressor technology is dependend on [Intel® Neural Compressor](https://github.com/intel/neural-compressor).

## Quantization approach

| Task | PostTrainingDynamic | PostTrainingStatic
|---|:---:|:---:|
|**`speech-recognition`**|||

Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
Step-by-Step​
============
The script `run_whisper.py` provides two quantization approaches (PostTrainingStatic and PostTrainingDynamic) based on [Intel® Neural Compressor](https://github.com/intel/neural-compressor) with [LibriSpeech test-clean](https://huggingface.co/datasets/librispeech_asr) dataset.

# Prerequisite​
## 1. Create Environment​
```shell
pip install -r requirements.txt
```

## 2. Prepare Model<200b>
```
optimum-cli export onnx --model openai/whisper-large whisper-large-with-past/ --task automatic-speech-recognition-with-past --opset 13
```

# Run
## 1. Quantization

- To get int8 model

```
bash run_tuning.sh --config=openai/whisper-large \
--dataset_location=/path/to/dataset \ # optional
--input_model=whisper-large-with-past/ \
--output_model=whisper-large-with-past-static/ \ # or whisper-large-with-past-dynamic
--approach=static # or dynamic
```

- To get model accuracy

```
bash run_benchmark.sh --config=whisper-large-with-past \
--dataset_location=/path/to/dataset \ # optional
--input_model=whisper-large-with-past-static/ \
--int8 \
--mode=accuracy
```

- To get model performance

```
numactl -m 0 -C 0-3 bash run_benchmark.sh --config=whisper-large-with-past \
--dataset_location=/path/to/dataset \ # optional
--input_model=whisper-large-with-past-static/ \
--mode=benchmark \
--iters=100 \
--cores_per_instance=4 \
--int8 \
--max_new_tokens=16
```

**Notes**:
- If users don't set dataset_location, it will download the dataset or use the cached dataset automatically.
- numactl command is used to bind specific cores.

# Validated model list

|Topology|Pretrained model|PostTrainingDynamic|PostTrainingStatic
|---|------------------------------------|---|---
|whisper_large|openai/whisper-large|||


Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
datasets
torch
transformers
jiwer
optimum
onnx
onnxruntime
evaluate
neural-compressor
librosa
soundfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
#!/bin/bash
set -x

function main {

init_params "$@"
run_benchmark

}

# init params
function init_params {
iters=100
dataset_location=$HOME/.cache/huggingface
script="run_whisper.py"
for var in "$@"
do
case $var in
--config=*)
config=$(echo $var |cut -f2 -d=)
;;
--dataset_location=*)
dataset_location=$(echo $var |cut -f2 -d=)
;;
--input_model=*)
input_model=$(echo $var |cut -f2 -d=)
;;
--mode=*)
mode=$(echo $var |cut -f2 -d=)
;;
--iters=*)
iters=$(echo ${var} |cut -f2 -d=)
;;
--cores_per_instance=*)
cores_per_instance=$(echo $var |cut -f2 -d=)
;;
--max_new_tokens=*)
max_new_tokens=$(echo $var |cut -f2 -d=)
;;
--int8=*)
int8=$(echo ${var} |cut -f2 -d=)
;;
esac
done

}


# run_benchmark
function run_benchmark {

if [[ ${int8} == "false" ]]; then
input_model=${config}
fi

if [[ ${mode} == "accuracy" ]]; then
mode_cmd=" --accuracy_only"
elif [[ ${mode} == "benchmark" ]]; then
mode_cmd=" --benchmark"
else
echo "Error: No such mode: ${mode}"
exit 1
fi


python -u ${script} \
--model_name_or_path ${config} \
--cache_dir ${dataset_location} \
--cores_per_instance ${cores_per_instance-4} \
--input_model ${input_model} \
--max_new_tokens ${max_new_tokens-16} \
--iters ${iters} \
${mode_cmd}
}

main "$@"
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
#!/bin/bash
set -x

function main {

init_params "$@"
run_tuning

}

# init params
function init_params {
approach="static"
script="run_whisper.py"
dataset_location=$HOME/.cache/huggingface
for var in "$@"
do
case $var in
--config=*)
config=$(echo $var |cut -f2 -d=)
;;
--dataset_location=*)
dataset_location=$(echo $var |cut -f2 -d=)
;;
--input_model=*)
input_model=$(echo $var |cut -f2 -d=)
;;
--output_model=*)
output_model=$(echo $var |cut -f2 -d=)
;;
--approach=*)
approach=$(echo $var |cut -f2 -d=)
;;
esac
done

}

# run_tuning
function run_tuning {

python -u ${script} \
--model_name_or_path ${config} \
--input_model ${input_model} \
--output_model ${output_model} \
--cache_dir ${dataset_location} \
--tune \
--approach ${approach}

}

main "$@"

0 comments on commit 038be06

Please sign in to comment.