Skip to content

Commit

Permalink
Add the ability to choose the ONNX runtime execution provider in `ORT…
Browse files Browse the repository at this point in the history
…Model` (#137)

* added option for onnxruntime execution provider

* formatting

* better description

* changed ort provider to model arguments

* added documentation

* changed ort provider name

* formatting

* remove wrong files

* trigger actions

* added error catch in case the given arguments for cpu-gpu optimization are contradictory

* remove unused files

* correct wrong catch

* styling

Co-authored-by: Felix Marty <felix@huggingface.co>
  • Loading branch information
fxmarty and fxmarty committed Apr 20, 2022
1 parent d36d6ee commit 2742bf1
Show file tree
Hide file tree
Showing 7 changed files with 153 additions and 13 deletions.
37 changes: 35 additions & 2 deletions examples/onnxruntime/optimization/question-answering/run_qa.py
Expand Up @@ -81,6 +81,10 @@ class ModelArguments:
"with private models)."
},
)
execution_provider: str = field(
default="CPUExecutionProvider",
metadata={"help": "ONNX Runtime execution provider to use for inference."},
)


@dataclass
Expand Down Expand Up @@ -257,6 +261,28 @@ def main():
transformers.utils.logging.enable_default_handler()
transformers.utils.logging.enable_explicit_format()

if (
optim_args.optimization_level > 1
and optim_args.optimize_for_gpu
and model_args.execution_provider == "CPUExecutionProvider"
):
raise ValueError(
f"Optimization level is set at {optim_args.optimization_level} and "
f"GPU optimization will be done, although the CPU execution provider "
f"was selected. Use --execution_provider CUDAExecutionProvider."
)

if (
optim_args.optimization_level > 1
and not optim_args.optimize_for_gpu
and model_args.execution_provider == "CUDAExecutionProvider"
):
raise ValueError(
f"Optimization level is set at {optim_args.optimization_level} and "
f"CPU optimization will be done, although the GPU execution provider "
f"was selected. Remove the argument --execution_provider CUDAExecutionProvider."
)

logger.info(f"Optimization with the following parameters {optim_args}")

if os.path.isdir(training_args.output_dir) and not training_args.overwrite_output_dir:
Expand Down Expand Up @@ -441,7 +467,12 @@ def compute_metrics(p: EvalPrediction):
# During Feature creation dataset samples might increase, we will select required samples again
eval_dataset = eval_dataset.select(range(data_args.max_eval_samples))

ort_model = ORTModel(optimized_model_path, optimizer._onnx_config, compute_metrics=compute_metrics)
ort_model = ORTModel(
optimized_model_path,
optimizer._onnx_config,
execution_provider=model_args.execution_provider,
compute_metrics=compute_metrics,
)
outputs = ort_model.evaluation_loop(eval_dataset)
predictions = post_processing_function(eval_examples, eval_dataset, outputs.predictions)
metrics = compute_metrics(predictions)
Expand Down Expand Up @@ -472,7 +503,9 @@ def compute_metrics(p: EvalPrediction):
# During Feature creation dataset samples might increase, we will select required samples again
predict_dataset = predict_dataset.select(range(data_args.max_predict_samples))

ort_model = ORTModel(optimized_model_path, optimizer._onnx_config)
ort_model = ORTModel(
optimized_model_path, optimizer._onnx_config, execution_provider=model_args.execution_provider
)
outputs = ort_model.evaluation_loop(predict_dataset)
predictions = post_processing_function(predict_examples, predict_dataset, outputs.predictions)
metrics = compute_metrics(predictions)
Expand Down
36 changes: 34 additions & 2 deletions examples/onnxruntime/optimization/text-classification/run_glue.py
Expand Up @@ -130,6 +130,10 @@ class ModelArguments:
default=None,
metadata={"help": "Where do you want to store the pretrained models downloaded from huggingface.co"},
)
execution_provider: str = field(
default="CPUExecutionProvider",
metadata={"help": "ONNX Runtime execution provider to use for inference."},
)


@dataclass
Expand Down Expand Up @@ -195,6 +199,28 @@ def main():
transformers.utils.logging.enable_default_handler()
transformers.utils.logging.enable_explicit_format()

if (
optim_args.optimization_level > 1
and optim_args.optimize_for_gpu
and model_args.execution_provider == "CPUExecutionProvider"
):
raise ValueError(
f"Optimization level is set at {optim_args.optimization_level} and "
f"GPU optimization will be done, although the CPU execution provider "
f"was selected. Use --execution_provider CUDAExecutionProvider."
)

if (
optim_args.optimization_level > 1
and not optim_args.optimize_for_gpu
and model_args.execution_provider == "CUDAExecutionProvider"
):
raise ValueError(
f"Optimization level is set at {optim_args.optimization_level} and "
f"CPU optimization will be done, although the GPU execution provider "
f"was selected. Remove the argument --execution_provider CUDAExecutionProvider."
)

logger.info(f"Optimization with the following parameters {optim_args}")

if os.path.isdir(training_args.output_dir) and not training_args.overwrite_output_dir:
Expand Down Expand Up @@ -354,7 +380,11 @@ def compute_metrics(p: EvalPrediction):
)

ort_model = ORTModel(
optimized_model_path, optimizer._onnx_config, compute_metrics=compute_metrics, label_names=["label"]
optimized_model_path,
optimizer._onnx_config,
execution_provider=model_args.execution_provider,
compute_metrics=compute_metrics,
label_names=["label"],
)
outputs = ort_model.evaluation_loop(eval_dataset)
# Save metrics
Expand All @@ -378,7 +408,9 @@ def compute_metrics(p: EvalPrediction):
desc="Running tokenizer on the test dataset",
)

ort_model = ORTModel(optimized_model_path, optimizer._onnx_config)
ort_model = ORTModel(
optimized_model_path, optimizer._onnx_config, execution_provider=model_args.execution_provider
)
outputs = ort_model.evaluation_loop(predict_dataset)
predictions = np.squeeze(outputs.predictions) if is_regression else np.argmax(outputs.predictions, axis=1)

Expand Down
40 changes: 38 additions & 2 deletions examples/onnxruntime/optimization/token-classification/run_ner.py
Expand Up @@ -81,6 +81,10 @@ class ModelArguments:
"with private models)."
},
)
execution_provider: str = field(
default="CPUExecutionProvider",
metadata={"help": "ONNX Runtime execution provider to use for inference."},
)


@dataclass
Expand Down Expand Up @@ -226,6 +230,28 @@ def main():
transformers.utils.logging.enable_default_handler()
transformers.utils.logging.enable_explicit_format()

if (
optim_args.optimization_level > 1
and optim_args.optimize_for_gpu
and model_args.execution_provider == "CPUExecutionProvider"
):
raise ValueError(
f"Optimization level is set at {optim_args.optimization_level} and "
f"GPU optimization will be done, although the CPU execution provider "
f"was selected. Use --execution_provider CUDAExecutionProvider."
)

if (
optim_args.optimization_level > 1
and not optim_args.optimize_for_gpu
and model_args.execution_provider == "CUDAExecutionProvider"
):
raise ValueError(
f"Optimization level is set at {optim_args.optimization_level} and "
f"CPU optimization will be done, although the GPU execution provider "
f"was selected. Remove the argument --execution_provider CUDAExecutionProvider."
)

logger.info(f"Optimization with the following parameters {optim_args}")

if os.path.isdir(training_args.output_dir) and not training_args.overwrite_output_dir:
Expand Down Expand Up @@ -425,7 +451,12 @@ def compute_metrics(p):
desc="Running tokenizer on the validation dataset",
)

ort_model = ORTModel(optimized_model_path, optimizer._onnx_config, compute_metrics=compute_metrics)
ort_model = ORTModel(
optimized_model_path,
optimizer._onnx_config,
execution_provider=model_args.execution_provider,
compute_metrics=compute_metrics,
)
outputs = ort_model.evaluation_loop(eval_dataset)

# Save evaluation metrics
Expand All @@ -450,7 +481,12 @@ def compute_metrics(p):
desc="Running tokenizer on the prediction dataset",
)

ort_model = ORTModel(optimized_model_path, optimizer._onnx_config, compute_metrics=compute_metrics)
ort_model = ORTModel(
optimized_model_path,
optimizer._onnx_config,
execution_provider=model_args.execution_provider,
compute_metrics=compute_metrics,
)
outputs = ort_model.evaluation_loop(predict_dataset)
predictions = np.argmax(outputs.predictions, axis=2)

Expand Down
15 changes: 13 additions & 2 deletions examples/onnxruntime/quantization/question-answering/run_qa.py
Expand Up @@ -88,6 +88,10 @@ class ModelArguments:
"with private models)."
},
)
execution_provider: str = field(
default="CPUExecutionProvider",
metadata={"help": "ONNX Runtime execution provider to use for inference."},
)


@dataclass
Expand Down Expand Up @@ -651,7 +655,12 @@ def compute_metrics(p: EvalPrediction):
if training_args.do_eval:
logger.info("*** Evaluate ***")

ort_model = ORTModel(quantized_model_path, quantizer._onnx_config, compute_metrics=compute_metrics)
ort_model = ORTModel(
quantized_model_path,
quantizer._onnx_config,
execution_provider=model_args.execution_provider,
compute_metrics=compute_metrics,
)
outputs = ort_model.evaluation_loop(eval_dataset)
predictions = post_processing_function(eval_examples, eval_dataset, outputs.predictions)
metrics = compute_metrics(predictions)
Expand All @@ -664,7 +673,9 @@ def compute_metrics(p: EvalPrediction):
if training_args.do_predict:
logger.info("*** Predict ***")

ort_model = ORTModel(quantized_model_path, quantizer._onnx_config)
ort_model = ORTModel(
quantized_model_path, quantizer._onnx_config, execution_provider=model_args.execution_provider
)
outputs = ort_model.evaluation_loop(predict_dataset)
predictions = post_processing_function(predict_examples, predict_dataset, outputs.predictions)
metrics = compute_metrics(predictions)
Expand Down
14 changes: 12 additions & 2 deletions examples/onnxruntime/quantization/text-classification/run_glue.py
Expand Up @@ -151,6 +151,10 @@ class ModelArguments:
default=None,
metadata={"help": "Where do you want to store the pretrained models downloaded from huggingface.co"},
)
execution_provider: str = field(
default="CPUExecutionProvider",
metadata={"help": "ONNX Runtime execution provider to use for inference."},
)


@dataclass
Expand Down Expand Up @@ -476,7 +480,11 @@ def compute_metrics(p: EvalPrediction):
eval_dataset = eval_dataset.select(range(data_args.max_eval_samples))

ort_model = ORTModel(
quantized_model_path, quantizer._onnx_config, compute_metrics=compute_metrics, label_names=["label"]
quantized_model_path,
quantizer._onnx_config,
execution_provider=model_args.execution_provider,
compute_metrics=compute_metrics,
label_names=["label"],
)
outputs = ort_model.evaluation_loop(eval_dataset)
# Save metrics
Expand All @@ -493,7 +501,9 @@ def compute_metrics(p: EvalPrediction):
if data_args.max_predict_samples is not None:
predict_dataset = predict_dataset.select(range(data_args.max_predict_samples))

ort_model = ORTModel(quantized_model_path, quantizer._onnx_config)
ort_model = ORTModel(
quantized_model_path, quantizer._onnx_config, execution_provider=model_args.execution_provider
)
outputs = ort_model.evaluation_loop(predict_dataset)
predictions = np.squeeze(outputs.predictions) if is_regression else np.argmax(outputs.predictions, axis=1)

Expand Down
18 changes: 16 additions & 2 deletions examples/onnxruntime/quantization/token-classification/run_ner.py
Expand Up @@ -87,6 +87,10 @@ class ModelArguments:
"with private models)."
},
)
execution_provider: str = field(
default="CPUExecutionProvider",
metadata={"help": "ONNX Runtime execution provider to use for inference."},
)


@dataclass
Expand Down Expand Up @@ -542,7 +546,12 @@ def compute_metrics(p):
desc="Running tokenizer on the validation dataset",
)

ort_model = ORTModel(quantized_model_path, quantizer._onnx_config, compute_metrics=compute_metrics)
ort_model = ORTModel(
quantized_model_path,
quantizer._onnx_config,
execution_provider=model_args.execution_provider,
compute_metrics=compute_metrics,
)
outputs = ort_model.evaluation_loop(eval_dataset)

# Save evaluation metrics
Expand All @@ -568,7 +577,12 @@ def compute_metrics(p):
desc="Running tokenizer on the prediction dataset",
)

ort_model = ORTModel(quantized_model_path, quantizer._onnx_config, compute_metrics=compute_metrics)
ort_model = ORTModel(
quantized_model_path,
quantizer._onnx_config,
execution_provider=model_args.execution_provider,
compute_metrics=compute_metrics,
)
outputs = ort_model.evaluation_loop(predict_dataset)
predictions = np.argmax(outputs.predictions, axis=2)

Expand Down
6 changes: 5 additions & 1 deletion optimum/onnxruntime/model.py
Expand Up @@ -37,6 +37,7 @@ def __init__(
self,
model_path: Union[str, os.PathLike],
onnx_config: OnnxConfig,
execution_provider: Optional[str] = "CPUExecutionProvider",
compute_metrics: Optional[Callable[[EvalPrediction], Dict]] = None,
label_names: Optional[List[str]] = None,
):
Expand All @@ -47,6 +48,8 @@ def __init__(
onnx_config (`OnnxConfig`):
An ONNX configuration associated to the ONNX model describing metadata on how to export the model
through the ONNX format.
execution_provider (:obj:`str`, `optional`):
ONNX Runtime execution provider to use.
compute_metrics (`Callable[[EvalPrediction], Dict]`, `optional`):
The function that will be used to compute metrics at evaluation. Must take an `EvalPrediction` and
return a dictionary string to metric values.
Expand All @@ -62,6 +65,7 @@ def __init__(
self.onnx_named_inputs = list(onnx_config.inputs.keys())
self.onnx_named_outputs = list(onnx_config.outputs.keys())
self.onnx_config = onnx_config
self.execution_provider = execution_provider
self.model_path = Path(model_path)
self.compute_metrics = compute_metrics
default_label_names = (
Expand All @@ -81,7 +85,7 @@ def evaluation_loop(self, dataset: Dataset):
all_preds = None
all_labels = None
options = SessionOptions()
session = InferenceSession(self.model_path.as_posix(), options)
session = InferenceSession(self.model_path.as_posix(), options, providers=[self.execution_provider])
for step, inputs in enumerate(dataset):
has_labels = all(inputs.get(k) is not None for k in self.label_names)
if has_labels:
Expand Down

0 comments on commit 2742bf1

Please sign in to comment.