Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions tests/integ/sagemaker/jumpstart/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,9 @@ def _to_s3_path(filename: str, s3_prefix: Optional[str]) -> str:
("huggingface-spc-bert-base-cased", "*"): ("training-datasets/QNLI-tiny/"),
("js-trainable-model", "*"): ("training-datasets/QNLI-tiny/"),
("meta-textgeneration-llama-2-7b", "*"): ("training-datasets/sec_amazon/"),
("meta-textgeneration-llama-2-7b", "2.*"): ("training-datasets/sec_amazon/"),
("meta-textgeneration-llama-2-7b", "3.*"): ("training-datasets/sec_amazon/"),
("meta-textgenerationneuron-llama-2-7b", "*"): ("training-datasets/sec_amazon/"),
}


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,14 +35,19 @@

MAX_INIT_TIME_SECONDS = 5

GATED_TRAINING_MODEL_SUPPORTED_REGIONS = {
GATED_TRAINING_MODEL_V1_SUPPORTED_REGIONS = {
"us-west-2",
"us-east-1",
"eu-west-1",
"ap-southeast-1",
"us-east-2",
"ap-southeast-2",
}
TRN2_SUPPORTED_REGIONS = {
"us-west-2",
"us-east-1",
"us-east-2",
}


def test_jumpstart_estimator(setup):
Expand Down Expand Up @@ -87,15 +92,58 @@ def test_jumpstart_estimator(setup):

@x_fail_if_ice
@pytest.mark.skipif(
tests.integ.test_region() not in GATED_TRAINING_MODEL_SUPPORTED_REGIONS,
tests.integ.test_region() not in GATED_TRAINING_MODEL_V1_SUPPORTED_REGIONS,
reason=f"JumpStart gated training models unavailable in {tests.integ.test_region()}.",
)
def test_gated_model_training(setup):
def test_gated_model_training_v1(setup):

model_id = "meta-textgeneration-llama-2-7b"
model_version = "2.*" # model artifacts were retrieved using legacy workflow

estimator = JumpStartEstimator(
model_id=model_id,
model_version=model_version,
role=get_sm_session().get_caller_identity_arn(),
sagemaker_session=get_sm_session(),
tags=[{"Key": JUMPSTART_TAG, "Value": os.environ[ENV_VAR_JUMPSTART_SDK_TEST_SUITE_ID]}],
environment={"accept_eula": "true"},
max_run=259200, # avoid exceeding resource limits
)

# uses ml.g5.12xlarge instance
estimator.fit(
{
"training": f"s3://{get_jumpstart_content_bucket(JUMPSTART_DEFAULT_REGION_NAME)}/"
f"{get_training_dataset_for_model_and_version(model_id, model_version)}",
}
)

# uses ml.g5.2xlarge instance
predictor = estimator.deploy(
tags=[{"Key": JUMPSTART_TAG, "Value": os.environ[ENV_VAR_JUMPSTART_SDK_TEST_SUITE_ID]}],
role=get_sm_session().get_caller_identity_arn(),
sagemaker_session=get_sm_session(),
)

payload = {
"inputs": "some-payload",
"parameters": {"max_new_tokens": 256, "top_p": 0.9, "temperature": 0.6},
}

response = predictor.predict(payload, custom_attributes="accept_eula=true")

assert response is not None

model_id, model_version = "meta-textgeneration-llama-2-7b", "*"

@x_fail_if_ice
def test_gated_model_training_v2(setup):

model_id = "meta-textgeneration-llama-2-7b"
model_version = "3.*" # model artifacts retrieved from jumpstart-private-cache-* buckets

estimator = JumpStartEstimator(
model_id=model_id,
model_version=model_version,
role=get_sm_session().get_caller_identity_arn(),
sagemaker_session=get_sm_session(),
tags=[{"Key": JUMPSTART_TAG, "Value": os.environ[ENV_VAR_JUMPSTART_SDK_TEST_SUITE_ID]}],
Expand Down Expand Up @@ -128,6 +176,48 @@ def test_gated_model_training(setup):
assert response is not None


@x_fail_if_ice
@pytest.mark.skipif(
tests.integ.test_region() not in TRN2_SUPPORTED_REGIONS,
reason=f"TRN2 instances unavailable in {tests.integ.test_region()}.",
)
def test_gated_model_training_v2_neuron(setup):

model_id = "meta-textgenerationneuron-llama-2-7b"

estimator = JumpStartEstimator(
model_id=model_id,
role=get_sm_session().get_caller_identity_arn(),
sagemaker_session=get_sm_session(),
tags=[{"Key": JUMPSTART_TAG, "Value": os.environ[ENV_VAR_JUMPSTART_SDK_TEST_SUITE_ID]}],
environment={"accept_eula": "true"},
max_run=259200, # avoid exceeding resource limits
)

# uses ml.trn1.32xlarge instance
estimator.fit(
{
"training": f"s3://{get_jumpstart_content_bucket(JUMPSTART_DEFAULT_REGION_NAME)}/"
f"{get_training_dataset_for_model_and_version(model_id, '*')}",
}
)

# uses ml.inf2.xlarge instance
predictor = estimator.deploy(
tags=[{"Key": JUMPSTART_TAG, "Value": os.environ[ENV_VAR_JUMPSTART_SDK_TEST_SUITE_ID]}],
role=get_sm_session().get_caller_identity_arn(),
sagemaker_session=get_sm_session(),
)

payload = {
"inputs": "some-payload",
}

response = predictor.predict(payload, custom_attributes="accept_eula=true")

assert response is not None


@mock.patch("sagemaker.jumpstart.cache.JUMPSTART_LOGGER.warning")
def test_instatiating_estimator(mock_warning_logger, setup):

Expand Down
69 changes: 66 additions & 3 deletions tests/integ/sagemaker/jumpstart/model/test_jumpstart_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,9 +31,15 @@
get_tabular_data,
)

INF2_SUPPORTED_REGIONS = {
"us-west-2",
"us-east-1",
"us-east-2",
}

MAX_INIT_TIME_SECONDS = 5

GATED_INFERENCE_MODEL_SUPPORTED_REGIONS = {
GATED_INFERENCE_MODEL_PACKAGE_SUPPORTED_REGIONS = {
"us-west-2",
"us-east-1",
"eu-west-1",
Expand Down Expand Up @@ -87,15 +93,16 @@ def test_prepacked_jumpstart_model(setup):


@pytest.mark.skipif(
tests.integ.test_region() not in GATED_INFERENCE_MODEL_SUPPORTED_REGIONS,
reason=f"JumpStart gated inference models unavailable in {tests.integ.test_region()}.",
tests.integ.test_region() not in GATED_INFERENCE_MODEL_PACKAGE_SUPPORTED_REGIONS,
reason=f"JumpStart model package inference models unavailable in {tests.integ.test_region()}.",
)
def test_model_package_arn_jumpstart_model(setup):

model_id = "meta-textgeneration-llama-2-7b"

model = JumpStartModel(
model_id=model_id,
model_version="2.*", # version <3.0.0 uses model packages
role=get_sm_session().get_caller_identity_arn(),
sagemaker_session=get_sm_session(),
)
Expand All @@ -115,6 +122,62 @@ def test_model_package_arn_jumpstart_model(setup):
assert response is not None


@pytest.mark.skipif(
tests.integ.test_region() not in INF2_SUPPORTED_REGIONS,
reason=f"INF2 instances unavailable in {tests.integ.test_region()}.",
)
def test_jumpstart_gated_model_neuron(setup):

model_id = "meta-textgenerationneuron-llama-2-7b"

model = JumpStartModel(
model_id=model_id,
role=get_sm_session().get_caller_identity_arn(),
sagemaker_session=get_sm_session(),
)

# uses ml.inf2.xlarge instance
predictor = model.deploy(
tags=[{"Key": JUMPSTART_TAG, "Value": os.environ[ENV_VAR_JUMPSTART_SDK_TEST_SUITE_ID]}],
accept_eula=True,
)

payload = {
"inputs": "some-payload",
}

response = predictor.predict(payload)

assert response is not None


def test_jumpstart_gated_model(setup):

model_id = "meta-textgeneration-llama-2-7b"

model = JumpStartModel(
model_id=model_id,
model_version="3.*", # version >=3.0.0 stores artifacts in jumpstart-private-cache-* buckets
role=get_sm_session().get_caller_identity_arn(),
sagemaker_session=get_sm_session(),
)

# uses ml.g5.2xlarge instance
predictor = model.deploy(
tags=[{"Key": JUMPSTART_TAG, "Value": os.environ[ENV_VAR_JUMPSTART_SDK_TEST_SUITE_ID]}],
accept_eula=True,
)

payload = {
"inputs": "some-payload",
"parameters": {"max_new_tokens": 256, "top_p": 0.9, "temperature": 0.6},
}

response = predictor.predict(payload)

assert response is not None


@mock.patch("sagemaker.jumpstart.cache.JUMPSTART_LOGGER.warning")
def test_instatiating_model(mock_warning_logger, setup):

Expand Down