Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add the ability to choose the ONNX runtime execution provider in ORTModel #137

Merged
merged 14 commits into from
Apr 20, 2022
37 changes: 35 additions & 2 deletions examples/onnxruntime/optimization/question-answering/run_qa.py
Expand Up @@ -81,6 +81,10 @@ class ModelArguments:
"with private models)."
},
)
execution_provider: str = field(
default="CPUExecutionProvider",
metadata={"help": "ONNX Runtime execution provider to use for inference."},
)


@dataclass
Expand Down Expand Up @@ -257,6 +261,28 @@ def main():
transformers.utils.logging.enable_default_handler()
transformers.utils.logging.enable_explicit_format()

if (
optim_args.optimization_level > 1
and optim_args.optimize_for_gpu
and model_args.execution_provider == "CPUExecutionProvider"
):
raise ValueError(
f"Optimization level is set at {optim_args.optimization_level} and "
f"GPU optimization will be done, although the CPU execution provider "
f"was selected. Use --execution_provider CUDAExecutionProvider."
)

if (
optim_args.optimization_level > 1
and optim_args.optimize_for_gpu is False
fxmarty marked this conversation as resolved.
Show resolved Hide resolved
and model_args.execution_provider == "CUDAExecutionProvider"
):
raise ValueError(
f"Optimization level is set at {optim_args.optimization_level} and "
f"CPU optimization will be done, although the GPU execution provider "
f"was selected. Remove the argument --execution_provider CUDAExecutionProvider."
)

logger.info(f"Optimization with the following parameters {optim_args}")

if os.path.isdir(training_args.output_dir) and not training_args.overwrite_output_dir:
Expand Down Expand Up @@ -441,7 +467,12 @@ def compute_metrics(p: EvalPrediction):
# During Feature creation dataset samples might increase, we will select required samples again
eval_dataset = eval_dataset.select(range(data_args.max_eval_samples))

ort_model = ORTModel(optimized_model_path, optimizer._onnx_config, compute_metrics=compute_metrics)
ort_model = ORTModel(
optimized_model_path,
optimizer._onnx_config,
execution_provider=model_args.execution_provider,
compute_metrics=compute_metrics,
)
outputs = ort_model.evaluation_loop(eval_dataset)
predictions = post_processing_function(eval_examples, eval_dataset, outputs.predictions)
metrics = compute_metrics(predictions)
Expand Down Expand Up @@ -472,7 +503,9 @@ def compute_metrics(p: EvalPrediction):
# During Feature creation dataset samples might increase, we will select required samples again
predict_dataset = predict_dataset.select(range(data_args.max_predict_samples))

ort_model = ORTModel(optimized_model_path, optimizer._onnx_config)
ort_model = ORTModel(
optimized_model_path, optimizer._onnx_config, execution_provider=model_args.execution_provider
)
outputs = ort_model.evaluation_loop(predict_dataset)
predictions = post_processing_function(predict_examples, predict_dataset, outputs.predictions)
metrics = compute_metrics(predictions)
Expand Down
36 changes: 34 additions & 2 deletions examples/onnxruntime/optimization/text-classification/run_glue.py
Expand Up @@ -130,6 +130,10 @@ class ModelArguments:
default=None,
metadata={"help": "Where do you want to store the pretrained models downloaded from huggingface.co"},
)
execution_provider: str = field(
default="CPUExecutionProvider",
metadata={"help": "ONNX Runtime execution provider to use for inference."},
)


@dataclass
Expand Down Expand Up @@ -195,6 +199,28 @@ def main():
transformers.utils.logging.enable_default_handler()
transformers.utils.logging.enable_explicit_format()

if (
optim_args.optimization_level > 1
and optim_args.optimize_for_gpu
and model_args.execution_provider == "CPUExecutionProvider"
):
raise ValueError(
f"Optimization level is set at {optim_args.optimization_level} and "
f"GPU optimization will be done, although the CPU execution provider "
f"was selected. Use --execution_provider CUDAExecutionProvider."
)

if (
optim_args.optimization_level > 1
and optim_args.optimize_for_gpu is False
and model_args.execution_provider == "CUDAExecutionProvider"
):
raise ValueError(
f"Optimization level is set at {optim_args.optimization_level} and "
f"CPU optimization will be done, although the GPU execution provider "
f"was selected. Remove the argument --execution_provider CUDAExecutionProvider."
)

logger.info(f"Optimization with the following parameters {optim_args}")

if os.path.isdir(training_args.output_dir) and not training_args.overwrite_output_dir:
Expand Down Expand Up @@ -354,7 +380,11 @@ def compute_metrics(p: EvalPrediction):
)

ort_model = ORTModel(
optimized_model_path, optimizer._onnx_config, compute_metrics=compute_metrics, label_names=["label"]
optimized_model_path,
optimizer._onnx_config,
execution_provider=model_args.execution_provider,
compute_metrics=compute_metrics,
label_names=["label"],
)
outputs = ort_model.evaluation_loop(eval_dataset)
# Save metrics
Expand All @@ -378,7 +408,9 @@ def compute_metrics(p: EvalPrediction):
desc="Running tokenizer on the test dataset",
)

ort_model = ORTModel(optimized_model_path, optimizer._onnx_config)
ort_model = ORTModel(
optimized_model_path, optimizer._onnx_config, execution_provider=model_args.execution_provider
)
outputs = ort_model.evaluation_loop(predict_dataset)
predictions = np.squeeze(outputs.predictions) if is_regression else np.argmax(outputs.predictions, axis=1)

Expand Down
40 changes: 38 additions & 2 deletions examples/onnxruntime/optimization/token-classification/run_ner.py
Expand Up @@ -81,6 +81,10 @@ class ModelArguments:
"with private models)."
},
)
execution_provider: str = field(
default="CPUExecutionProvider",
metadata={"help": "ONNX Runtime execution provider to use for inference."},
)


@dataclass
Expand Down Expand Up @@ -226,6 +230,28 @@ def main():
transformers.utils.logging.enable_default_handler()
transformers.utils.logging.enable_explicit_format()

if (
optim_args.optimization_level > 1
and optim_args.optimize_for_gpu
and model_args.execution_provider == "CPUExecutionProvider"
):
raise ValueError(
f"Optimization level is set at {optim_args.optimization_level} and "
f"GPU optimization will be done, although the CPU execution provider "
f"was selected. Use --execution_provider CUDAExecutionProvider."
)

if (
optim_args.optimization_level > 1
and optim_args.optimize_for_gpu is False
and model_args.execution_provider == "CUDAExecutionProvider"
):
raise ValueError(
f"Optimization level is set at {optim_args.optimization_level} and "
f"CPU optimization will be done, although the GPU execution provider "
f"was selected. Remove the argument --execution_provider CUDAExecutionProvider."
)

logger.info(f"Optimization with the following parameters {optim_args}")

if os.path.isdir(training_args.output_dir) and not training_args.overwrite_output_dir:
Expand Down Expand Up @@ -425,7 +451,12 @@ def compute_metrics(p):
desc="Running tokenizer on the validation dataset",
)

ort_model = ORTModel(optimized_model_path, optimizer._onnx_config, compute_metrics=compute_metrics)
ort_model = ORTModel(
optimized_model_path,
optimizer._onnx_config,
execution_provider=model_args.execution_provider,
compute_metrics=compute_metrics,
)
outputs = ort_model.evaluation_loop(eval_dataset)

# Save evaluation metrics
Expand All @@ -450,7 +481,12 @@ def compute_metrics(p):
desc="Running tokenizer on the prediction dataset",
)

ort_model = ORTModel(optimized_model_path, optimizer._onnx_config, compute_metrics=compute_metrics)
ort_model = ORTModel(
optimized_model_path,
optimizer._onnx_config,
execution_provider=model_args.execution_provider,
compute_metrics=compute_metrics,
)
outputs = ort_model.evaluation_loop(predict_dataset)
predictions = np.argmax(outputs.predictions, axis=2)

Expand Down
15 changes: 13 additions & 2 deletions examples/onnxruntime/quantization/question-answering/run_qa.py
Expand Up @@ -88,6 +88,10 @@ class ModelArguments:
"with private models)."
},
)
execution_provider: str = field(
default="CPUExecutionProvider",
metadata={"help": "ONNX Runtime execution provider to use for inference."},
)


@dataclass
Expand Down Expand Up @@ -651,7 +655,12 @@ def compute_metrics(p: EvalPrediction):
if training_args.do_eval:
logger.info("*** Evaluate ***")

ort_model = ORTModel(quantized_model_path, quantizer._onnx_config, compute_metrics=compute_metrics)
ort_model = ORTModel(
quantized_model_path,
quantizer._onnx_config,
execution_provider=model_args.execution_provider,
compute_metrics=compute_metrics,
)
outputs = ort_model.evaluation_loop(eval_dataset)
predictions = post_processing_function(eval_examples, eval_dataset, outputs.predictions)
metrics = compute_metrics(predictions)
Expand All @@ -664,7 +673,9 @@ def compute_metrics(p: EvalPrediction):
if training_args.do_predict:
logger.info("*** Predict ***")

ort_model = ORTModel(quantized_model_path, quantizer._onnx_config)
ort_model = ORTModel(
quantized_model_path, quantizer._onnx_config, execution_provider=model_args.execution_provider
)
outputs = ort_model.evaluation_loop(predict_dataset)
predictions = post_processing_function(predict_examples, predict_dataset, outputs.predictions)
metrics = compute_metrics(predictions)
Expand Down
14 changes: 12 additions & 2 deletions examples/onnxruntime/quantization/text-classification/run_glue.py
Expand Up @@ -151,6 +151,10 @@ class ModelArguments:
default=None,
metadata={"help": "Where do you want to store the pretrained models downloaded from huggingface.co"},
)
execution_provider: str = field(
default="CPUExecutionProvider",
metadata={"help": "ONNX Runtime execution provider to use for inference."},
)


@dataclass
Expand Down Expand Up @@ -476,7 +480,11 @@ def compute_metrics(p: EvalPrediction):
eval_dataset = eval_dataset.select(range(data_args.max_eval_samples))

ort_model = ORTModel(
quantized_model_path, quantizer._onnx_config, compute_metrics=compute_metrics, label_names=["label"]
quantized_model_path,
quantizer._onnx_config,
execution_provider=model_args.execution_provider,
compute_metrics=compute_metrics,
label_names=["label"],
)
outputs = ort_model.evaluation_loop(eval_dataset)
# Save metrics
Expand All @@ -493,7 +501,9 @@ def compute_metrics(p: EvalPrediction):
if data_args.max_predict_samples is not None:
predict_dataset = predict_dataset.select(range(data_args.max_predict_samples))

ort_model = ORTModel(quantized_model_path, quantizer._onnx_config)
ort_model = ORTModel(
quantized_model_path, quantizer._onnx_config, execution_provider=model_args.execution_provider
)
outputs = ort_model.evaluation_loop(predict_dataset)
predictions = np.squeeze(outputs.predictions) if is_regression else np.argmax(outputs.predictions, axis=1)

Expand Down
18 changes: 16 additions & 2 deletions examples/onnxruntime/quantization/token-classification/run_ner.py
Expand Up @@ -87,6 +87,10 @@ class ModelArguments:
"with private models)."
},
)
execution_provider: str = field(
default="CPUExecutionProvider",
metadata={"help": "ONNX Runtime execution provider to use for inference."},
)


@dataclass
Expand Down Expand Up @@ -542,7 +546,12 @@ def compute_metrics(p):
desc="Running tokenizer on the validation dataset",
)

ort_model = ORTModel(quantized_model_path, quantizer._onnx_config, compute_metrics=compute_metrics)
ort_model = ORTModel(
quantized_model_path,
quantizer._onnx_config,
execution_provider=model_args.execution_provider,
compute_metrics=compute_metrics,
)
outputs = ort_model.evaluation_loop(eval_dataset)

# Save evaluation metrics
Expand All @@ -568,7 +577,12 @@ def compute_metrics(p):
desc="Running tokenizer on the prediction dataset",
)

ort_model = ORTModel(quantized_model_path, quantizer._onnx_config, compute_metrics=compute_metrics)
ort_model = ORTModel(
quantized_model_path,
quantizer._onnx_config,
execution_provider=model_args.execution_provider,
compute_metrics=compute_metrics,
)
outputs = ort_model.evaluation_loop(predict_dataset)
predictions = np.argmax(outputs.predictions, axis=2)

Expand Down
6 changes: 5 additions & 1 deletion optimum/onnxruntime/model.py
Expand Up @@ -37,6 +37,7 @@ def __init__(
self,
model_path: Union[str, os.PathLike],
onnx_config: OnnxConfig,
execution_provider: Optional[str] = "CPUExecutionProvider",
compute_metrics: Optional[Callable[[EvalPrediction], Dict]] = None,
label_names: Optional[List[str]] = None,
):
Expand All @@ -47,6 +48,8 @@ def __init__(
onnx_config (`OnnxConfig`):
An ONNX configuration associated to the ONNX model describing metadata on how to export the model
through the ONNX format.
execution_provider (:obj:`str`, `optional`):
ONNX Runtime execution provider to use.
compute_metrics (`Callable[[EvalPrediction], Dict]`, `optional`):
The function that will be used to compute metrics at evaluation. Must take an `EvalPrediction` and
return a dictionary string to metric values.
Expand All @@ -62,6 +65,7 @@ def __init__(
self.onnx_named_inputs = list(onnx_config.inputs.keys())
self.onnx_named_outputs = list(onnx_config.outputs.keys())
self.onnx_config = onnx_config
self.execution_provider = execution_provider
fxmarty marked this conversation as resolved.
Show resolved Hide resolved
self.model_path = Path(model_path)
self.compute_metrics = compute_metrics
default_label_names = (
Expand All @@ -81,7 +85,7 @@ def evaluation_loop(self, dataset: Dataset):
all_preds = None
all_labels = None
options = SessionOptions()
session = InferenceSession(self.model_path.as_posix(), options)
session = InferenceSession(self.model_path.as_posix(), options, providers=[self.execution_provider])
for step, inputs in enumerate(dataset):
has_labels = all(inputs.get(k) is not None for k in self.label_names)
if has_labels:
Expand Down