diff --git a/src/sagemaker/xgboost/model.py b/src/sagemaker/xgboost/model.py index ae50d76329..49acc11074 100644 --- a/src/sagemaker/xgboost/model.py +++ b/src/sagemaker/xgboost/model.py @@ -145,13 +145,16 @@ def prepare_container_def(self, instance_type=None, accelerator_type=None): ) deploy_key_prefix = model_code_key_prefix(self.key_prefix, self.name, deploy_image) - self._upload_code(deploy_key_prefix) + self._upload_code(key_prefix=deploy_key_prefix, repack=self.enable_network_isolation()) deploy_env = dict(self.env) deploy_env.update(self._framework_env_vars()) if self.model_server_workers: deploy_env[MODEL_SERVER_WORKERS_PARAM_NAME.upper()] = str(self.model_server_workers) - return sagemaker.container_def(deploy_image, self.model_data, deploy_env) + model_data = ( + self.repacked_model_data if self.enable_network_isolation() else self.model_data + ) + return sagemaker.container_def(deploy_image, model_data, deploy_env) def serving_image_uri(self, region_name, instance_type): """Create a URI for the serving image. diff --git a/tests/data/xgboost_abalone/abalone.py b/tests/data/xgboost_abalone/abalone.py new file mode 100644 index 0000000000..0ba5213c1d --- /dev/null +++ b/tests/data/xgboost_abalone/abalone.py @@ -0,0 +1,50 @@ +import argparse +import os + +from sagemaker_xgboost_container.data_utils import get_dmatrix + +import xgboost as xgb + +model_filename = "xgboost-model" + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + + # Sagemaker specific arguments. Defaults are set in the environment variables. + parser.add_argument( + "--model_dir", type=str, default=os.environ.get("SM_MODEL_DIR", "/opt/ml/model") + ) + parser.add_argument( + "--train", + type=str, + default=os.environ.get("SM_CHANNEL_TRAIN", "/opt/ml/input/data/abalone"), + ) + + args, _ = parser.parse_known_args() + + dtrain = get_dmatrix(args.train, "libsvm") + + params = { + "max_depth": 5, + "eta": 0.2, + "gamma": 4, + "min_child_weight": 6, + "subsample": 0.7, + "verbosity": 2, + "objective": "reg:squarederror", + "tree_method": "auto", + "predictor": "auto", + } + + booster = xgb.train(params=params, dtrain=dtrain, num_boost_round=50) + booster.save_model(args.model_dir + "/" + model_filename) + + +def model_fn(model_dir): + """Deserialize and return fitted model. + + Note that this should have the same name as the serialized model in the _xgb_train method + """ + booster = xgb.Booster() + booster.load_model(os.path.join(model_dir, model_filename)) + return booster diff --git a/tests/integ/test_xgboost.py b/tests/integ/test_xgboost.py index 56fdffab1b..088a55d7f3 100644 --- a/tests/integ/test_xgboost.py +++ b/tests/integ/test_xgboost.py @@ -14,6 +14,8 @@ import os import pytest +from sagemaker.utils import unique_name_from_base +from sagemaker.xgboost import XGBoost from sagemaker.xgboost.processing import XGBoostProcessor from tests.integ import DATA_DIR, TRAINING_DEFAULT_TIMEOUT_MINUTES from tests.integ.timeout import timeout @@ -48,3 +50,35 @@ def test_framework_processing_job_with_deps( inputs=[], wait=True, ) + + +def test_training_with_network_isolation( + sagemaker_session, + xgboost_latest_version, + xgboost_latest_py_version, + cpu_instance_type, +): + with timeout(minutes=TRAINING_DEFAULT_TIMEOUT_MINUTES): + base_job_name = "test-network-isolation-xgboost" + + xgboost = XGBoost( + entry_point=os.path.join(DATA_DIR, "xgboost_abalone", "abalone.py"), + role=ROLE, + instance_type=cpu_instance_type, + instance_count=1, + framework_version=xgboost_latest_version, + py_version=xgboost_latest_py_version, + base_job_name=base_job_name, + sagemaker_session=sagemaker_session, + enable_network_isolation=True, + ) + + train_input = xgboost.sagemaker_session.upload_data( + path=os.path.join(DATA_DIR, "xgboost_abalone", "abalone"), + key_prefix="integ-test-data/xgboost_abalone/abalone", + ) + job_name = unique_name_from_base(base_job_name) + xgboost.fit(inputs={"train": train_input}, job_name=job_name) + assert sagemaker_session.sagemaker_client.describe_training_job(TrainingJobName=job_name)[ + "EnableNetworkIsolation" + ] diff --git a/tests/unit/test_xgboost.py b/tests/unit/test_xgboost.py index 28937ccba1..82f27c19ae 100644 --- a/tests/unit/test_xgboost.py +++ b/tests/unit/test_xgboost.py @@ -22,6 +22,7 @@ from packaging.version import Version +from sagemaker.fw_utils import UploadedCode from sagemaker.xgboost import XGBoost, XGBoostModel, XGBoostPredictor @@ -180,6 +181,26 @@ def test_create_model(sagemaker_session, xgboost_framework_version): assert model_values["Image"] == default_image_uri +@patch("sagemaker.model.FrameworkModel._upload_code") +def test_create_model_with_network_isolation(upload, sagemaker_session, xgboost_framework_version): + source_dir = "s3://mybucket/source" + repacked_model_data = "s3://mybucket/prefix/model.tar.gz" + + xgboost_model = XGBoostModel( + model_data=source_dir, + role=ROLE, + sagemaker_session=sagemaker_session, + entry_point=SCRIPT_PATH, + framework_version=xgboost_framework_version, + enable_network_isolation=True, + ) + xgboost_model.uploaded_code = UploadedCode(s3_prefix=repacked_model_data, script_name="script") + xgboost_model.repacked_model_data = repacked_model_data + model_values = xgboost_model.prepare_container_def(CPU) + assert model_values["Environment"]["SAGEMAKER_SUBMIT_DIRECTORY"] == "/opt/ml/model/code" + assert model_values["ModelDataUrl"] == repacked_model_data + + @patch("sagemaker.estimator.name_from_base") def test_create_model_from_estimator(name_from_base, sagemaker_session, xgboost_framework_version): container_log_level = '"logging.INFO"'