generated from amazon-archives/__template_Apache-2.0
-
Notifications
You must be signed in to change notification settings - Fork 61
Add optimum-neuron pipeline support for zero code deployment.
#92
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
Merged
Changes from all commits
Commits
Show all changes
7 commits
Select commit
Hold shift + click to select a range
54eea63
add optimum support
philschmid 9f78566
added logger
philschmid c80695c
corrected env var naming and add tasks from optimum
philschmid 6a368d4
fix quality
philschmid 94c484c
double check optimum
philschmid 77ad2b2
added suggestions and feedback
philschmid d00cf8e
fix quality
philschmid File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
99 changes: 99 additions & 0 deletions
99
src/sagemaker_huggingface_inference_toolkit/optimum_utils.py
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,99 @@ | ||
| # Copyright 2023 The HuggingFace Team, Amazon.com, Inc. or its affiliates. All Rights Reserved. | ||
| # | ||
| # Licensed under the Apache License, Version 2.0 (the "License"); | ||
| # you may not use this file except in compliance with the License. | ||
| # You may obtain a copy of the License at | ||
| # | ||
| # http://www.apache.org/licenses/LICENSE-2.0 | ||
| # | ||
| # Unless required by applicable law or agreed to in writing, software | ||
| # distributed under the License is distributed on an "AS IS" BASIS, | ||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
| # See the License for the specific language governing permissions and | ||
| # limitations under the License. | ||
|
|
||
| import importlib.util | ||
| import logging | ||
| import os | ||
|
|
||
|
|
||
| _optimum_neuron = False | ||
| if importlib.util.find_spec("optimum") is not None: | ||
| if importlib.util.find_spec("optimum.neuron") is not None: | ||
| _optimum_neuron = True | ||
|
|
||
| logger = logging.getLogger(__name__) | ||
|
|
||
|
|
||
| def is_optimum_neuron_available(): | ||
| return _optimum_neuron | ||
|
|
||
|
|
||
| def get_input_shapes(model_dir): | ||
| """Method to get input shapes from model config file. If config file is not present, default values are returned.""" | ||
| from transformers import AutoConfig | ||
|
|
||
| input_shapes = {} | ||
| input_shapes_available = False | ||
| # try to get input shapes from config file | ||
| try: | ||
| config = AutoConfig.from_pretrained(model_dir) | ||
| if hasattr(config, "neuron_batch_size") and hasattr(config, "neuron_sequence_length"): | ||
| input_shapes["batch_size"] = config.neuron_batch_size | ||
| input_shapes["sequence_length"] = config.neuron_sequence_length | ||
| input_shapes_available = True | ||
| logger.info( | ||
| f"Input shapes found in config file. Using input shapes from config with batch size {input_shapes['batch_size']} and sequence length {input_shapes['sequence_length']}" | ||
| ) | ||
| if os.environ.get("HF_OPTIMUM_BATCH_SIZE", None) is not None: | ||
| logger.warning( | ||
| "HF_OPTIMUM_BATCH_SIZE environment variable is set. Environment variable will be ignored and input shapes from config file will be used." | ||
| ) | ||
| if os.environ.get("HF_OPTIMUM_SEQUENCE_LENGTH", None) is not None: | ||
| logger.warning( | ||
| "HF_OPTIMUM_SEQUENCE_LENGTH environment variable is set. Environment variable will be ignored and input shapes from config file will be used." | ||
| ) | ||
| except Exception: | ||
| input_shapes_available = False | ||
|
|
||
| # return input shapes if available | ||
| if input_shapes_available: | ||
| return input_shapes | ||
philschmid marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
|
||
| # extract input shapes from environment variables | ||
| sequence_length = os.environ.get("HF_OPTIMUM_SEQUENCE_LENGTH", None) | ||
| if not int(sequence_length) > 0: | ||
| raise ValueError( | ||
| f"HF_OPTIMUM_SEQUENCE_LENGTH must be set to a positive integer. Current value is {sequence_length}" | ||
| ) | ||
| batch_size = os.environ.get("HF_OPTIMUM_BATCH_SIZE", 1) | ||
| logger.info( | ||
| f"Using input shapes from environment variables with batch size {batch_size} and sequence length {sequence_length}" | ||
| ) | ||
| return {"batch_size": int(batch_size), "sequence_length": int(sequence_length)} | ||
|
|
||
|
|
||
| def get_optimum_neuron_pipeline(task, model_dir): | ||
| """Method to get optimum neuron pipeline for a given task. Method checks if task is supported by optimum neuron and if required environment variables are set, in case model is not converted. If all checks pass, optimum neuron pipeline is returned. If checks fail, an error is raised.""" | ||
| from optimum.neuron.pipelines import NEURONX_SUPPORTED_TASKS, pipeline | ||
| from optimum.neuron.utils import NEURON_FILE_NAME | ||
|
|
||
| # check task support | ||
| if task not in NEURONX_SUPPORTED_TASKS: | ||
| raise ValueError( | ||
| f"Task {task} is not supported by optimum neuron and inf2. Supported tasks are: {list(NEURONX_SUPPORTED_TASKS.keys())}" | ||
| ) | ||
|
|
||
| # check if model is already converted and has input shapes available | ||
| export = True | ||
| if NEURON_FILE_NAME in os.listdir(model_dir): | ||
| export = False | ||
| if export: | ||
| logger.info("Model is not converted. Checking if required environment variables are set and converting model.") | ||
|
|
||
| # get static input shapes to run inference | ||
| input_shapes = get_input_shapes(model_dir) | ||
| # get optimum neuron pipeline | ||
| neuron_pipe = pipeline(task, model=model_dir, export=export, input_shapes=input_shapes) | ||
|
|
||
| return neuron_pipe | ||
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,102 @@ | ||
| # Copyright 2021 The HuggingFace Team, Amazon.com, Inc. or its affiliates. All Rights Reserved. | ||
| # | ||
| # Licensed under the Apache License, Version 2.0 (the "License"); | ||
| # you may not use this file except in compliance with the License. | ||
| # You may obtain a copy of the License at | ||
| # | ||
| # http://www.apache.org/licenses/LICENSE-2.0 | ||
| # | ||
| # Unless required by applicable law or agreed to in writing, software | ||
| # distributed under the License is distributed on an "AS IS" BASIS, | ||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
| # See the License for the specific language governing permissions and | ||
| # limitations under the License. | ||
| import os | ||
| import tempfile | ||
|
|
||
| import pytest | ||
| from transformers.testing_utils import require_torch | ||
|
|
||
| from sagemaker_huggingface_inference_toolkit.optimum_utils import ( | ||
| get_input_shapes, | ||
| get_optimum_neuron_pipeline, | ||
| is_optimum_neuron_available, | ||
| ) | ||
| from sagemaker_huggingface_inference_toolkit.transformers_utils import _load_model_from_hub | ||
|
|
||
|
|
||
| require_inferentia = pytest.mark.skipif( | ||
| not is_optimum_neuron_available(), | ||
| reason="Skipping tests, since optimum neuron is not available or not running on inf2 instances.", | ||
| ) | ||
|
|
||
|
|
||
| REMOTE_NOT_CONVERTED_MODEL = "hf-internal-testing/tiny-random-BertModel" | ||
| REMOTE_CONVERTED_MODEL = "optimum/tiny_random_bert_neuron" | ||
| TASK = "text-classification" | ||
|
|
||
|
|
||
| @require_torch | ||
| @require_inferentia | ||
| def test_not_supported_task(): | ||
| os.environ["HF_TASK"] = "not-supported-task" | ||
| with pytest.raises(Exception): | ||
| get_optimum_neuron_pipeline(task=TASK, model_dir=os.getcwd()) | ||
|
|
||
|
|
||
| @require_torch | ||
| @require_inferentia | ||
| def test_get_input_shapes_from_file(): | ||
| with tempfile.TemporaryDirectory() as tmpdirname: | ||
| storage_folder = _load_model_from_hub( | ||
philschmid marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| model_id=REMOTE_CONVERTED_MODEL, | ||
| model_dir=tmpdirname, | ||
| ) | ||
| input_shapes = get_input_shapes(model_dir=storage_folder) | ||
| assert input_shapes["batch_size"] == 1 | ||
| assert input_shapes["sequence_length"] == 16 | ||
|
|
||
|
|
||
| @require_torch | ||
| @require_inferentia | ||
philschmid marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| def test_get_input_shapes_from_env(): | ||
| os.environ["HF_OPTIMUM_BATCH_SIZE"] = "4" | ||
| os.environ["HF_OPTIMUM_SEQUENCE_LENGTH"] = "32" | ||
| with tempfile.TemporaryDirectory() as tmpdirname: | ||
| storage_folder = _load_model_from_hub( | ||
| model_id=REMOTE_NOT_CONVERTED_MODEL, | ||
| model_dir=tmpdirname, | ||
| ) | ||
| input_shapes = get_input_shapes(model_dir=storage_folder) | ||
| assert input_shapes["batch_size"] == 4 | ||
| assert input_shapes["sequence_length"] == 32 | ||
|
|
||
|
|
||
| @require_torch | ||
| @require_inferentia | ||
| def test_get_optimum_neuron_pipeline_from_converted_model(): | ||
| with tempfile.TemporaryDirectory() as tmpdirname: | ||
| os.system( | ||
| f"optimum-cli export neuron --model philschmid/tiny-distilbert-classification --sequence_length 32 --batch_size 1 {tmpdirname}" | ||
| ) | ||
| pipe = get_optimum_neuron_pipeline(task=TASK, model_dir=tmpdirname) | ||
| r = pipe("This is a test") | ||
|
|
||
| assert r[0]["score"] > 0.0 | ||
| assert isinstance(r[0]["label"], str) | ||
|
|
||
|
|
||
| @require_torch | ||
| @require_inferentia | ||
| def test_get_optimum_neuron_pipeline_from_non_converted_model(): | ||
| os.environ["OPTIMUM_NEURON_SEQUENCE_LENGTH"] = "32" | ||
| with tempfile.TemporaryDirectory() as tmpdirname: | ||
| storage_folder = _load_model_from_hub( | ||
| model_id=REMOTE_NOT_CONVERTED_MODEL, | ||
| model_dir=tmpdirname, | ||
| ) | ||
| pipe = get_optimum_neuron_pipeline(task=TASK, model_dir=storage_folder) | ||
| r = pipe("This is a test") | ||
|
|
||
| assert r[0]["score"] > 0.0 | ||
| assert isinstance(r[0]["label"], str) | ||
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
Uh oh!
There was an error while loading. Please reload this page.