Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix benchmark feature read-only apis #4675

5 changes: 1 addition & 4 deletions src/sagemaker/jumpstart/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -464,10 +464,7 @@ def benchmark_metrics(self) -> pd.DataFrame:
Returns:
Benchmark Metrics: Pandas DataFrame object.
"""
df = pd.DataFrame(self._get_deployment_configs_benchmarks_data())
default_mask = df.apply(lambda row: any("Default" in str(val) for val in row), axis=1)
sorted_df = pd.concat([df[default_mask], df[~default_mask]])
return sorted_df
return pd.DataFrame(self._get_deployment_configs_benchmarks_data())

def display_benchmark_metrics(self, *args, **kwargs) -> None:
"""Display deployment configs benchmark metrics."""
Expand Down
3 changes: 2 additions & 1 deletion src/sagemaker/jumpstart/types.py
Original file line number Diff line number Diff line change
Expand Up @@ -746,7 +746,7 @@ def _get_regional_property(
class JumpStartBenchmarkStat(JumpStartDataHolderType):
"""Data class JumpStart benchmark stat."""

__slots__ = ["name", "value", "unit"]
__slots__ = ["name", "value", "unit", "concurrency"]

def __init__(self, spec: Dict[str, Any]):
"""Initializes a JumpStartBenchmarkStat object.
Expand All @@ -765,6 +765,7 @@ def from_json(self, json_obj: Dict[str, Any]) -> None:
self.name: str = json_obj["name"]
self.value: str = json_obj["value"]
self.unit: Union[int, str] = json_obj["unit"]
self.concurrency: Union[int, str] = json_obj["concurrency"]

def to_json(self) -> Dict[str, Any]:
"""Returns json representation of JumpStartBenchmarkStat object."""
Expand Down
100 changes: 75 additions & 25 deletions src/sagemaker/jumpstart/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -1082,7 +1082,9 @@ def add_instance_rate_stats_to_benchmark_metrics(

if not benchmark_metric_stats:
benchmark_metric_stats = []
benchmark_metric_stats.append(JumpStartBenchmarkStat(instance_type_rate))
benchmark_metric_stats.append(
JumpStartBenchmarkStat({"concurrency": None, **instance_type_rate})
)

final_benchmark_metrics[instance_type] = benchmark_metric_stats
except ClientError as e:
Expand Down Expand Up @@ -1127,43 +1129,91 @@ def get_metrics_from_deployment_configs(
if not deployment_configs:
return {}

data = {"Instance Type": [], "Config Name": []}
data = {"Instance Type": [], "Concurrent Users": [], "Config Name": []}
instance_rate_data = {}
for index, deployment_config in enumerate(deployment_configs):
benchmark_metrics = deployment_config.benchmark_metrics
if not deployment_config.deployment_args or not benchmark_metrics:
continue

for inner_index, current_instance_type in enumerate(benchmark_metrics):
current_instance_type_metrics = benchmark_metrics[current_instance_type]

data["Config Name"].append(deployment_config.deployment_config_name)
instance_type_to_display = (
f"{current_instance_type} (Default)"
if index == 0
and current_instance_type == deployment_config.deployment_args.default_instance_type
else current_instance_type
for current_instance_type, current_instance_type_metrics in benchmark_metrics.items():
instance_type_rate, concurrent_users = _normalize_benchmark_metrics(
current_instance_type_metrics
)
data["Instance Type"].append(instance_type_to_display)

for metric in current_instance_type_metrics:
column_name = f"{metric.name} ({metric.unit})"

if metric.name.lower() == "instance rate":
if column_name not in instance_rate_data:
instance_rate_data[column_name] = []
instance_rate_data[column_name].append(metric.value)
else:
if column_name not in data:
data[column_name] = []
for _ in range(len(data[column_name]), inner_index):
data[column_name].append(" - ")

for concurrent_user, metrics in concurrent_users.items():
instance_type_to_display = (
f"{current_instance_type} (Default)"
if index == 0
and int(concurrent_user) == 1
and current_instance_type
== deployment_config.deployment_args.default_instance_type
else current_instance_type
)

instance_rate_column_name = f"{instance_type_rate.name} ({instance_type_rate.unit})"
instance_rate_data[instance_rate_column_name] = instance_rate_data.get(
instance_rate_column_name, []
)

data["Config Name"].append(deployment_config.deployment_config_name)
data["Instance Type"].append(instance_type_to_display)
data["Concurrent Users"].append(concurrent_user)
instance_rate_data[instance_rate_column_name].append(instance_type_rate.value)

for metric in metrics:
column_name = _normalize_benchmark_metric_column_name(metric.name)
data[column_name] = data.get(column_name, [])
data[column_name].append(metric.value)

data = {**data, **instance_rate_data}
return data


def _normalize_benchmark_metric_column_name(name: str) -> str:
"""Normalizes benchmark metric column name.

Args:
name (str): Name of the metric.
Returns:
str: Normalized metric column name.
"""
if "latency" in name.lower():
name = "Latency for each user (TTFT in ms)"
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

could we use the metric unit from metadata directly?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We can, only if it's updated to the desired. Here is what's now

{
                        "name": "latency",
                        "value": "36",
                        "unit": "ms/token",
                        "concurrency": "2"
},

elif "throughput" in name.lower():
name = "Throughput per user (token/seconds)"
return name


def _normalize_benchmark_metrics(
benchmark_metric_stats: List[JumpStartBenchmarkStat],
) -> Tuple[JumpStartBenchmarkStat, Dict[str, List[JumpStartBenchmarkStat]]]:
"""Normalizes benchmark metrics dict.

Args:
benchmark_metric_stats (List[JumpStartBenchmarkStat]):
List of benchmark metrics stats.
Returns:
Tuple[JumpStartBenchmarkStat, Dict[str, List[JumpStartBenchmarkStat]]]:
Normalized benchmark metrics dict.
"""
instance_type_rate = None
concurrent_users = {}
for current_instance_type_metric in benchmark_metric_stats:
if current_instance_type_metric.name.lower() == "instance rate":
instance_type_rate = current_instance_type_metric
elif current_instance_type_metric.concurrency not in concurrent_users:
concurrent_users[current_instance_type_metric.concurrency] = [
current_instance_type_metric
]
else:
concurrent_users[current_instance_type_metric.concurrency].append(
current_instance_type_metric
)

return instance_type_rate, concurrent_users


def deployment_config_response_data(
deployment_configs: Optional[List[DeploymentConfigMetadata]],
) -> List[Dict[str, Any]]:
Expand Down
52 changes: 39 additions & 13 deletions tests/unit/sagemaker/jumpstart/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -7662,25 +7662,33 @@
"inference_configs": {
"neuron-inference": {
"benchmark_metrics": {
"ml.inf2.2xlarge": [{"name": "Latency", "value": "100", "unit": "Tokens/S"}]
"ml.inf2.2xlarge": [
{"name": "Latency", "value": "100", "unit": "Tokens/S", "concurrency": 1}
]
},
"component_names": ["neuron-inference"],
},
"neuron-inference-budget": {
"benchmark_metrics": {
"ml.inf2.2xlarge": [{"name": "Latency", "value": "100", "unit": "Tokens/S"}]
"ml.inf2.2xlarge": [
{"name": "Latency", "value": "100", "unit": "Tokens/S", "concurrency": 1}
]
},
"component_names": ["neuron-base"],
},
"gpu-inference-budget": {
"benchmark_metrics": {
"ml.p3.2xlarge": [{"name": "Latency", "value": "100", "unit": "Tokens/S"}]
"ml.p3.2xlarge": [
{"name": "Latency", "value": "100", "unit": "Tokens/S", "concurrency": 1}
]
},
"component_names": ["gpu-inference-budget"],
},
"gpu-inference": {
"benchmark_metrics": {
"ml.p3.2xlarge": [{"name": "Latency", "value": "100", "unit": "Tokens/S"}]
"ml.p3.2xlarge": [
{"name": "Latency", "value": "100", "unit": "Tokens/S", "concurrency": 1}
]
},
"component_names": ["gpu-inference"],
},
Expand Down Expand Up @@ -7748,8 +7756,12 @@
"training_configs": {
"neuron-training": {
"benchmark_metrics": {
"ml.tr1n1.2xlarge": [{"name": "Latency", "value": "100", "unit": "Tokens/S"}],
"ml.tr1n1.4xlarge": [{"name": "Latency", "value": "50", "unit": "Tokens/S"}],
"ml.tr1n1.2xlarge": [
{"name": "Latency", "value": "100", "unit": "Tokens/S", "concurrency": 1}
],
"ml.tr1n1.4xlarge": [
{"name": "Latency", "value": "50", "unit": "Tokens/S", "concurrency": 1}
],
},
"component_names": ["neuron-training"],
"default_inference_config": "neuron-inference",
Expand All @@ -7759,8 +7771,12 @@
},
"neuron-training-budget": {
"benchmark_metrics": {
"ml.tr1n1.2xlarge": [{"name": "Latency", "value": "100", "unit": "Tokens/S"}],
"ml.tr1n1.4xlarge": [{"name": "Latency", "value": "50", "unit": "Tokens/S"}],
"ml.tr1n1.2xlarge": [
{"name": "Latency", "value": "100", "unit": "Tokens/S", "concurrency": 1}
],
"ml.tr1n1.4xlarge": [
{"name": "Latency", "value": "50", "unit": "Tokens/S", "concurrency": 1}
],
},
"component_names": ["neuron-training-budget"],
"default_inference_config": "neuron-inference-budget",
Expand All @@ -7770,7 +7786,9 @@
},
"gpu-training": {
"benchmark_metrics": {
"ml.p3.2xlarge": [{"name": "Latency", "value": "200", "unit": "Tokens/S"}],
"ml.p3.2xlarge": [
{"name": "Latency", "value": "200", "unit": "Tokens/S", "concurrency": "1"}
],
},
"component_names": ["gpu-training"],
"default_inference_config": "gpu-inference",
Expand All @@ -7780,7 +7798,9 @@
},
"gpu-training-budget": {
"benchmark_metrics": {
"ml.p3.2xlarge": [{"name": "Latency", "value": "100", "unit": "Tokens/S"}]
"ml.p3.2xlarge": [
{"name": "Latency", "value": "100", "unit": "Tokens/S", "concurrency": "1"}
]
},
"component_names": ["gpu-training-budget"],
"default_inference_config": "gpu-inference-budget",
Expand Down Expand Up @@ -7966,7 +7986,9 @@
"ContainerStartupHealthCheckTimeout": None,
},
"AccelerationConfigs": None,
"BenchmarkMetrics": [{"name": "Instance Rate", "value": "0.0083000000", "unit": "USD/Hrs"}],
"BenchmarkMetrics": [
{"name": "Instance Rate", "value": "0.0083000000", "unit": "USD/Hrs", "concurrency": 1}
],
},
{
"DeploymentConfigName": "neuron-inference-budget",
Expand Down Expand Up @@ -7998,7 +8020,9 @@
"ContainerStartupHealthCheckTimeout": None,
},
"AccelerationConfigs": None,
"BenchmarkMetrics": [{"name": "Instance Rate", "value": "0.0083000000", "unit": "USD/Hrs"}],
"BenchmarkMetrics": [
{"name": "Instance Rate", "value": "0.0083000000", "unit": "USD/Hrs", "concurrency": 1}
],
},
{
"DeploymentConfigName": "gpu-inference-budget",
Expand Down Expand Up @@ -8030,7 +8054,9 @@
"ContainerStartupHealthCheckTimeout": None,
},
"AccelerationConfigs": None,
"BenchmarkMetrics": [{"name": "Instance Rate", "value": "0.0083000000", "unit": "USD/Hrs"}],
"BenchmarkMetrics": [
{"name": "Instance Rate", "value": "0.0083000000", "unit": "USD/Hrs", "concurrency": 1}
],
},
{
"DeploymentConfigName": "gpu-inference",
Expand Down
12 changes: 9 additions & 3 deletions tests/unit/sagemaker/jumpstart/test_types.py
Original file line number Diff line number Diff line change
Expand Up @@ -1027,7 +1027,9 @@ def test_inference_configs_parsing():

assert config.benchmark_metrics == {
"ml.inf2.2xlarge": [
JumpStartBenchmarkStat({"name": "Latency", "value": "100", "unit": "Tokens/S"})
JumpStartBenchmarkStat(
{"name": "Latency", "value": "100", "unit": "Tokens/S", "concurrency": 1}
),
]
}
assert len(config.config_components) == 1
Expand Down Expand Up @@ -1191,10 +1193,14 @@ def test_training_configs_parsing():

assert config.benchmark_metrics == {
"ml.tr1n1.2xlarge": [
JumpStartBenchmarkStat({"name": "Latency", "value": "100", "unit": "Tokens/S"})
JumpStartBenchmarkStat(
{"name": "Latency", "value": "100", "unit": "Tokens/S", "concurrency": 1}
),
],
"ml.tr1n1.4xlarge": [
JumpStartBenchmarkStat({"name": "Latency", "value": "50", "unit": "Tokens/S"})
JumpStartBenchmarkStat(
{"name": "Latency", "value": "50", "unit": "Tokens/S", "concurrency": 1}
),
],
}
assert len(config.config_components) == 1
Expand Down