diff --git a/tests/integ/sagemaker/jumpstart/constants.py b/tests/integ/sagemaker/jumpstart/constants.py index d46b758826..f5ffbf7a3a 100644 --- a/tests/integ/sagemaker/jumpstart/constants.py +++ b/tests/integ/sagemaker/jumpstart/constants.py @@ -46,6 +46,9 @@ def _to_s3_path(filename: str, s3_prefix: Optional[str]) -> str: ("huggingface-spc-bert-base-cased", "*"): ("training-datasets/QNLI-tiny/"), ("js-trainable-model", "*"): ("training-datasets/QNLI-tiny/"), ("meta-textgeneration-llama-2-7b", "*"): ("training-datasets/sec_amazon/"), + ("meta-textgeneration-llama-2-7b", "2.*"): ("training-datasets/sec_amazon/"), + ("meta-textgeneration-llama-2-7b", "3.*"): ("training-datasets/sec_amazon/"), + ("meta-textgenerationneuron-llama-2-7b", "*"): ("training-datasets/sec_amazon/"), } diff --git a/tests/integ/sagemaker/jumpstart/estimator/test_jumpstart_estimator.py b/tests/integ/sagemaker/jumpstart/estimator/test_jumpstart_estimator.py index 928013150e..80b321e0a6 100644 --- a/tests/integ/sagemaker/jumpstart/estimator/test_jumpstart_estimator.py +++ b/tests/integ/sagemaker/jumpstart/estimator/test_jumpstart_estimator.py @@ -35,7 +35,7 @@ MAX_INIT_TIME_SECONDS = 5 -GATED_TRAINING_MODEL_SUPPORTED_REGIONS = { +GATED_TRAINING_MODEL_V1_SUPPORTED_REGIONS = { "us-west-2", "us-east-1", "eu-west-1", @@ -43,6 +43,11 @@ "us-east-2", "ap-southeast-2", } +TRN2_SUPPORTED_REGIONS = { + "us-west-2", + "us-east-1", + "us-east-2", +} def test_jumpstart_estimator(setup): @@ -87,15 +92,58 @@ def test_jumpstart_estimator(setup): @x_fail_if_ice @pytest.mark.skipif( - tests.integ.test_region() not in GATED_TRAINING_MODEL_SUPPORTED_REGIONS, + tests.integ.test_region() not in GATED_TRAINING_MODEL_V1_SUPPORTED_REGIONS, reason=f"JumpStart gated training models unavailable in {tests.integ.test_region()}.", ) -def test_gated_model_training(setup): +def test_gated_model_training_v1(setup): + + model_id = "meta-textgeneration-llama-2-7b" + model_version = "2.*" # model artifacts were retrieved using legacy workflow + + estimator = JumpStartEstimator( + model_id=model_id, + model_version=model_version, + role=get_sm_session().get_caller_identity_arn(), + sagemaker_session=get_sm_session(), + tags=[{"Key": JUMPSTART_TAG, "Value": os.environ[ENV_VAR_JUMPSTART_SDK_TEST_SUITE_ID]}], + environment={"accept_eula": "true"}, + max_run=259200, # avoid exceeding resource limits + ) + + # uses ml.g5.12xlarge instance + estimator.fit( + { + "training": f"s3://{get_jumpstart_content_bucket(JUMPSTART_DEFAULT_REGION_NAME)}/" + f"{get_training_dataset_for_model_and_version(model_id, model_version)}", + } + ) + + # uses ml.g5.2xlarge instance + predictor = estimator.deploy( + tags=[{"Key": JUMPSTART_TAG, "Value": os.environ[ENV_VAR_JUMPSTART_SDK_TEST_SUITE_ID]}], + role=get_sm_session().get_caller_identity_arn(), + sagemaker_session=get_sm_session(), + ) + + payload = { + "inputs": "some-payload", + "parameters": {"max_new_tokens": 256, "top_p": 0.9, "temperature": 0.6}, + } + + response = predictor.predict(payload, custom_attributes="accept_eula=true") + + assert response is not None - model_id, model_version = "meta-textgeneration-llama-2-7b", "*" + +@x_fail_if_ice +def test_gated_model_training_v2(setup): + + model_id = "meta-textgeneration-llama-2-7b" + model_version = "3.*" # model artifacts retrieved from jumpstart-private-cache-* buckets estimator = JumpStartEstimator( model_id=model_id, + model_version=model_version, role=get_sm_session().get_caller_identity_arn(), sagemaker_session=get_sm_session(), tags=[{"Key": JUMPSTART_TAG, "Value": os.environ[ENV_VAR_JUMPSTART_SDK_TEST_SUITE_ID]}], @@ -128,6 +176,48 @@ def test_gated_model_training(setup): assert response is not None +@x_fail_if_ice +@pytest.mark.skipif( + tests.integ.test_region() not in TRN2_SUPPORTED_REGIONS, + reason=f"TRN2 instances unavailable in {tests.integ.test_region()}.", +) +def test_gated_model_training_v2_neuron(setup): + + model_id = "meta-textgenerationneuron-llama-2-7b" + + estimator = JumpStartEstimator( + model_id=model_id, + role=get_sm_session().get_caller_identity_arn(), + sagemaker_session=get_sm_session(), + tags=[{"Key": JUMPSTART_TAG, "Value": os.environ[ENV_VAR_JUMPSTART_SDK_TEST_SUITE_ID]}], + environment={"accept_eula": "true"}, + max_run=259200, # avoid exceeding resource limits + ) + + # uses ml.trn1.32xlarge instance + estimator.fit( + { + "training": f"s3://{get_jumpstart_content_bucket(JUMPSTART_DEFAULT_REGION_NAME)}/" + f"{get_training_dataset_for_model_and_version(model_id, '*')}", + } + ) + + # uses ml.inf2.xlarge instance + predictor = estimator.deploy( + tags=[{"Key": JUMPSTART_TAG, "Value": os.environ[ENV_VAR_JUMPSTART_SDK_TEST_SUITE_ID]}], + role=get_sm_session().get_caller_identity_arn(), + sagemaker_session=get_sm_session(), + ) + + payload = { + "inputs": "some-payload", + } + + response = predictor.predict(payload, custom_attributes="accept_eula=true") + + assert response is not None + + @mock.patch("sagemaker.jumpstart.cache.JUMPSTART_LOGGER.warning") def test_instatiating_estimator(mock_warning_logger, setup): diff --git a/tests/integ/sagemaker/jumpstart/model/test_jumpstart_model.py b/tests/integ/sagemaker/jumpstart/model/test_jumpstart_model.py index 0dd48082b9..d55c4c976b 100644 --- a/tests/integ/sagemaker/jumpstart/model/test_jumpstart_model.py +++ b/tests/integ/sagemaker/jumpstart/model/test_jumpstart_model.py @@ -31,9 +31,15 @@ get_tabular_data, ) +INF2_SUPPORTED_REGIONS = { + "us-west-2", + "us-east-1", + "us-east-2", +} + MAX_INIT_TIME_SECONDS = 5 -GATED_INFERENCE_MODEL_SUPPORTED_REGIONS = { +GATED_INFERENCE_MODEL_PACKAGE_SUPPORTED_REGIONS = { "us-west-2", "us-east-1", "eu-west-1", @@ -87,8 +93,8 @@ def test_prepacked_jumpstart_model(setup): @pytest.mark.skipif( - tests.integ.test_region() not in GATED_INFERENCE_MODEL_SUPPORTED_REGIONS, - reason=f"JumpStart gated inference models unavailable in {tests.integ.test_region()}.", + tests.integ.test_region() not in GATED_INFERENCE_MODEL_PACKAGE_SUPPORTED_REGIONS, + reason=f"JumpStart model package inference models unavailable in {tests.integ.test_region()}.", ) def test_model_package_arn_jumpstart_model(setup): @@ -96,6 +102,7 @@ def test_model_package_arn_jumpstart_model(setup): model = JumpStartModel( model_id=model_id, + model_version="2.*", # version <3.0.0 uses model packages role=get_sm_session().get_caller_identity_arn(), sagemaker_session=get_sm_session(), ) @@ -115,6 +122,62 @@ def test_model_package_arn_jumpstart_model(setup): assert response is not None +@pytest.mark.skipif( + tests.integ.test_region() not in INF2_SUPPORTED_REGIONS, + reason=f"INF2 instances unavailable in {tests.integ.test_region()}.", +) +def test_jumpstart_gated_model_neuron(setup): + + model_id = "meta-textgenerationneuron-llama-2-7b" + + model = JumpStartModel( + model_id=model_id, + role=get_sm_session().get_caller_identity_arn(), + sagemaker_session=get_sm_session(), + ) + + # uses ml.inf2.xlarge instance + predictor = model.deploy( + tags=[{"Key": JUMPSTART_TAG, "Value": os.environ[ENV_VAR_JUMPSTART_SDK_TEST_SUITE_ID]}], + accept_eula=True, + ) + + payload = { + "inputs": "some-payload", + } + + response = predictor.predict(payload) + + assert response is not None + + +def test_jumpstart_gated_model(setup): + + model_id = "meta-textgeneration-llama-2-7b" + + model = JumpStartModel( + model_id=model_id, + model_version="3.*", # version >=3.0.0 stores artifacts in jumpstart-private-cache-* buckets + role=get_sm_session().get_caller_identity_arn(), + sagemaker_session=get_sm_session(), + ) + + # uses ml.g5.2xlarge instance + predictor = model.deploy( + tags=[{"Key": JUMPSTART_TAG, "Value": os.environ[ENV_VAR_JUMPSTART_SDK_TEST_SUITE_ID]}], + accept_eula=True, + ) + + payload = { + "inputs": "some-payload", + "parameters": {"max_new_tokens": 256, "top_p": 0.9, "temperature": 0.6}, + } + + response = predictor.predict(payload) + + assert response is not None + + @mock.patch("sagemaker.jumpstart.cache.JUMPSTART_LOGGER.warning") def test_instatiating_model(mock_warning_logger, setup):