Skip to content

Commit

Permalink
move accelerate to utils
Browse files Browse the repository at this point in the history
  • Loading branch information
samruds committed Mar 20, 2024
1 parent 5783194 commit 3a62911
Show file tree
Hide file tree
Showing 5 changed files with 82 additions and 105 deletions.
49 changes: 12 additions & 37 deletions src/sagemaker/serve/builder/model_builder.py
Expand Up @@ -20,7 +20,6 @@

from pathlib import Path

from accelerate.commands.estimate import estimate_command_parser, gather_data
from sagemaker import Session
from sagemaker.model import Model
from sagemaker.base_predictor import PredictorBase
Expand All @@ -43,7 +42,11 @@
from sagemaker.serve.utils import task
from sagemaker.serve.utils.exceptions import TaskNotFoundException
from sagemaker.serve.utils.predictors import _get_local_mode_predictor
from sagemaker.serve.utils.hardware_detector import _get_gpu_info, _get_gpu_info_fallback
from sagemaker.serve.utils.hardware_detector import (
_get_gpu_info,
_get_gpu_info_fallback,
_total_inference_model_size_mib,
)
from sagemaker.serve.detector.image_detector import (
auto_detect_container,
_detect_framework_and_version,
Expand All @@ -70,13 +73,6 @@
ModelServer.DJL_SERVING,
}

MIB_CONVERSION_FACTOR = 0.00000095367431640625
MEMORY_BUFFER_MULTIPLIER = 1.2 # 20% buffer
VERSION_DETECTION_ERROR = (
"Please install accelerate and transformers for HuggingFace (HF) model "
"size calculations e.g. pip install 'sagemaker[huggingface]'"
)


# pylint: disable=attribute-defined-outside-init, disable=E1101
@dataclass
Expand Down Expand Up @@ -723,43 +719,22 @@ def _schema_builder_init(self, model_task: str):
except ValueError:
raise TaskNotFoundException(f"Schema builder for {model_task} could not be found.")

def _total_inference_model_size_mib(self):
"""Calculates the model size from HF accelerate
This function gets the model size from accelerate. It also adds a
padding and converts to size MiB. When performing inference, expect
to add up to an additional 20% to the given model size as found by EleutherAI.
"""
try:
dtypes = self.env_vars.get("dtypes", "float32")
parser = estimate_command_parser()
args = parser.parse_args([self.model, "--dtypes", dtypes])

output = gather_data(
args
) # "dtype", "Largest Layer", "Total Size Bytes", "Training using Adam"
except ImportError as e:
logger.warning(VERSION_DETECTION_ERROR)
raise e

if output is None:
raise ValueError(f"Could not get Model size for {self.model}")

total_memory_size_mib = MEMORY_BUFFER_MULTIPLIER * output[0][2] * MIB_CONVERSION_FACTOR
logger.info("Total memory size MIB: %s", total_memory_size_mib)
return total_memory_size_mib

def _can_fit_on_single_gpu(self) -> Type[bool]:
"""Check if model can fit on a single GPU
If the size of the model is <= single gpu memory size, returns True else False
"""
try:
single_gpu_size_mib = self._try_fetch_gpu_info()
if self._total_inference_model_size_mib() <= single_gpu_size_mib:
if (
_total_inference_model_size_mib(self.model, self.env_vars.get("dtypes", "float32"))
<= single_gpu_size_mib
):
logger.info(
"Total inference model size MIB %s, single GPU size for instance MIB %s",
self._total_inference_model_size_mib(),
_total_inference_model_size_mib(
self.model, self.env_vars.get("dtypes", "float32")
),
single_gpu_size_mib,
)
return True
Expand Down
11 changes: 5 additions & 6 deletions src/sagemaker/serve/builder/schema_builder.py
Expand Up @@ -216,12 +216,11 @@ def __repr__(self):
f"input_deserializer={self.input_deserializer._deserializer}\n"
f"output_deserializer={self.output_deserializer._deserializer})"
)
elif hasattr(self, "custom_input_translator") and hasattr(self, "custom_output_translator"):
return (
f"SchemaBuilder(\n"
f"custom_input_translator={self.custom_input_translator}\n"
f"custom_output_translator={self.custom_output_translator}\n"
)
return (
f"SchemaBuilder(\n"
f"custom_input_translator={self.custom_input_translator}\n"
f"custom_output_translator={self.custom_output_translator}\n"
)

def generate_marshalling_map(self) -> dict:
"""Generate marshalling map for the schema builder"""
Expand Down
37 changes: 37 additions & 0 deletions src/sagemaker/serve/utils/hardware_detector.py
Expand Up @@ -18,12 +18,22 @@

from botocore.exceptions import ClientError

from accelerate.commands.estimate import estimate_command_parser, gather_data
from sagemaker import Session
from sagemaker.model import Model
from sagemaker import instance_types_gpu_info

logger = logging.getLogger(__name__)


MIB_CONVERSION_FACTOR = 0.00000095367431640625
MEMORY_BUFFER_MULTIPLIER = 1.2 # 20% buffer
VERSION_DETECTION_ERROR = (
"Please install accelerate and transformers for HuggingFace (HF) model "
"size calculations e.g. pip install 'sagemaker[huggingface]'"
)


def _get_gpu_info(instance_type: str, session: Session) -> Tuple[int, int]:
"""Get GPU info for the provided instance
Expand Down Expand Up @@ -108,3 +118,30 @@ def _format_instance_type(instance_type: str) -> str:

ec2_instance = ".".join(split_instance)
return ec2_instance


def _total_inference_model_size_mib(model: Model, dtype: str) -> int:
"""Calculates the model size from HF accelerate
This function gets the model size from accelerate. It also adds a
padding and converts to size MiB. When performing inference, expect
to add up to an additional 20% to the given model size as found by EleutherAI.
"""
try:
dtypes = dtype
parser = estimate_command_parser()
args = parser.parse_args([model, "--dtypes", dtypes])

output = gather_data(
args
) # "dtype", "Largest Layer", "Total Size Bytes", "Training using Adam"
except ImportError as e:
logger.warning(VERSION_DETECTION_ERROR)
raise e

if output is None:
raise ValueError(f"Could not get Model size for {model}")

total_memory_size_mib = MEMORY_BUFFER_MULTIPLIER * output[0][2] * MIB_CONVERSION_FACTOR
logger.info("Total memory size MIB: %s", total_memory_size_mib)
return total_memory_size_mib
66 changes: 4 additions & 62 deletions tests/unit/sagemaker/serve/builder/test_model_builder.py
Expand Up @@ -53,9 +53,6 @@
ModelServer.DJL_SERVING,
}

MIB_CONVERSION_FACTOR = 0.00000095367431640625
MEMORY_BUFFER_MULTIPLIER = 1.2 # 20% buffer

mock_session = MagicMock()


Expand Down Expand Up @@ -1205,7 +1202,7 @@ def test_build_for_transformers_happy_case(

@patch("sagemaker.serve.builder.model_builder.ModelBuilder._build_for_transformers")
@patch("sagemaker.serve.builder.model_builder.ModelBuilder._try_fetch_gpu_info")
@patch("sagemaker.serve.builder.model_builder.ModelBuilder._total_inference_model_size_mib")
@patch("sagemaker.serve.builder.model_builder._total_inference_model_size_mib")
@patch("sagemaker.image_uris.retrieve")
@patch("sagemaker.djl_inference.model.urllib")
@patch("sagemaker.djl_inference.model.json")
Expand Down Expand Up @@ -1248,7 +1245,7 @@ def test_build_for_transformers_happy_case_with_values(

@patch("sagemaker.serve.builder.model_builder.ModelBuilder._build_for_djl", Mock())
@patch("sagemaker.serve.builder.model_builder._get_gpu_info")
@patch("sagemaker.serve.builder.model_builder.ModelBuilder._total_inference_model_size_mib")
@patch("sagemaker.serve.builder.model_builder._total_inference_model_size_mib")
@patch("sagemaker.image_uris.retrieve")
@patch("sagemaker.djl_inference.model.urllib")
@patch("sagemaker.djl_inference.model.json")
Expand Down Expand Up @@ -1293,7 +1290,7 @@ def test_build_for_transformers_happy_case_with_valid_gpu_info(
@patch("sagemaker.serve.builder.model_builder.ModelBuilder._build_for_transformers", Mock())
@patch("sagemaker.serve.builder.model_builder._get_gpu_info")
@patch("sagemaker.serve.builder.model_builder._get_gpu_info_fallback")
@patch("sagemaker.serve.builder.model_builder.ModelBuilder._total_inference_model_size_mib")
@patch("sagemaker.serve.builder.model_builder._total_inference_model_size_mib")
@patch("sagemaker.image_uris.retrieve")
@patch("sagemaker.djl_inference.model.urllib")
@patch("sagemaker.djl_inference.model.json")
Expand Down Expand Up @@ -1342,61 +1339,6 @@ def test_build_for_transformers_happy_case_with_valid_gpu_fallback(
)
self.assertEqual(model_builder._can_fit_on_single_gpu(), True)

@patch("sagemaker.serve.builder.model_builder.ModelBuilder._build_for_transformers", Mock())
@patch("sagemaker.serve.builder.model_builder.estimate_command_parser")
@patch("sagemaker.serve.builder.model_builder.gather_data")
@patch("sagemaker.image_uris.retrieve")
@patch("sagemaker.djl_inference.model.urllib")
@patch("sagemaker.djl_inference.model.json")
@patch("sagemaker.huggingface.llm_utils.urllib")
@patch("sagemaker.huggingface.llm_utils.json")
@patch("sagemaker.model_uris.retrieve")
@patch("sagemaker.serve.builder.model_builder._ServeSettings")
def test_build_for_transformers_happy_case_hugging_face_responses(
self,
mock_serveSettings,
mock_model_uris_retrieve,
mock_llm_utils_json,
mock_llm_utils_urllib,
mock_model_json,
mock_model_urllib,
mock_image_uris_retrieve,
mock_gather_data,
mock_parser,
):
mock_setting_object = mock_serveSettings.return_value
mock_setting_object.role_arn = mock_role_arn
mock_setting_object.s3_model_data_url = mock_s3_model_data_url

mock_model_uris_retrieve.side_effect = KeyError
mock_llm_utils_json.load.return_value = {"pipeline_tag": "text-classification"}
mock_llm_utils_urllib.request.Request.side_effect = Mock()

mock_model_json.load.return_value = {"some": "config"}
mock_model_urllib.request.Request.side_effect = Mock()
mock_image_uris_retrieve.return_value = "https://some-image-uri"

mock_parser.return_value = Mock()
mock_gather_data.return_value = [[1, 1, 1, 1]]
product = MIB_CONVERSION_FACTOR * 1 * MEMORY_BUFFER_MULTIPLIER

model_builder = ModelBuilder(
model="stable-diffusion",
sagemaker_session=mock_session,
instance_type=mock_instance_type,
)
self.assertEqual(model_builder._total_inference_model_size_mib(), product)

mock_parser.return_value = Mock()
mock_gather_data.return_value = None
model_builder = ModelBuilder(
model="stable-diffusion",
sagemaker_session=mock_session,
instance_type=mock_instance_type,
)
with self.assertRaises(ValueError) as _:
model_builder._total_inference_model_size_mib()

@patch("sagemaker.serve.builder.model_builder.ModelBuilder._build_for_djl")
@patch("sagemaker.serve.builder.model_builder.ModelBuilder._can_fit_on_single_gpu")
@patch("sagemaker.image_uris.retrieve")
Expand Down Expand Up @@ -1556,7 +1498,7 @@ def test_try_fetch_gpu_info_throws(
self.assertEqual(model_builder._can_fit_on_single_gpu(), False)

@patch("sagemaker.serve.builder.model_builder.ModelBuilder._build_for_transformers", Mock())
@patch("sagemaker.serve.builder.model_builder.ModelBuilder._total_inference_model_size_mib")
@patch("sagemaker.serve.builder.model_builder._total_inference_model_size_mib")
@patch("sagemaker.image_uris.retrieve")
@patch("sagemaker.djl_inference.model.urllib")
@patch("sagemaker.djl_inference.model.json")
Expand Down
24 changes: 24 additions & 0 deletions tests/unit/sagemaker/serve/utils/test_hardware_detector.py
Expand Up @@ -13,6 +13,7 @@
from __future__ import absolute_import

from botocore.exceptions import ClientError
from unittest.mock import patch, Mock
import pytest

from sagemaker.serve.utils import hardware_detector
Expand All @@ -21,6 +22,8 @@
VALID_INSTANCE_TYPE = "ml.g5.48xlarge"
INVALID_INSTANCE_TYPE = "fl.c5.57xxlarge"
EXPECTED_INSTANCE_GPU_INFO = (8, 196608)
MIB_CONVERSION_FACTOR = 0.00000095367431640625
MEMORY_BUFFER_MULTIPLIER = 1.2 # 20% buffer


def test_get_gpu_info_success(sagemaker_session, boto_session):
Expand Down Expand Up @@ -96,3 +99,24 @@ def test_format_instance_type_without_ml_success():
formatted_instance_type = hardware_detector._format_instance_type("g5.48xlarge")

assert formatted_instance_type == "g5.48xlarge"


@patch("sagemaker.serve.utils.hardware_detector.estimate_command_parser")
@patch("sagemaker.serve.utils.hardware_detector.gather_data")
def test_total_inference_model_size_mib(
mock_gather_data,
mock_parser,
):
mock_parser.return_value = Mock()
mock_gather_data.return_value = [[1, 1, 1, 1]]
product = MIB_CONVERSION_FACTOR * 1 * MEMORY_BUFFER_MULTIPLIER

assert (
hardware_detector._total_inference_model_size_mib("stable-diffusion", "float32") == product
)

mock_parser.return_value = Mock()
mock_gather_data.return_value = None

with pytest.raises(ValueError):
hardware_detector._total_inference_model_size_mib("stable-diffusion", "float32")

0 comments on commit 3a62911

Please sign in to comment.