diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 405d15f..5b8299d 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -2,10 +2,11 @@ CHANGELOG ========= -1.2.1dev -======== +1.2.1 +===== -* Encode DenseMatrix and SparseMatrix in probobuf Record format +* spark/pyspark: encode DenseMatrix and SparseMatrix in probobuf Record format +* spark/pyspark: add new region support for BOM/SIN/LHR/YUL/SFO 1.2.0 ===== diff --git a/sagemaker-pyspark-sdk/requirements.txt b/sagemaker-pyspark-sdk/requirements.txt index 0659dda..b19acbb 100644 --- a/sagemaker-pyspark-sdk/requirements.txt +++ b/sagemaker-pyspark-sdk/requirements.txt @@ -1,3 +1,3 @@ -pyspark +pyspark==2.3.2 numpy pytest \ No newline at end of file diff --git a/sagemaker-pyspark-sdk/setup.py b/sagemaker-pyspark-sdk/setup.py index 88dd12d..eaa5952 100644 --- a/sagemaker-pyspark-sdk/setup.py +++ b/sagemaker-pyspark-sdk/setup.py @@ -7,7 +7,7 @@ from setuptools import setup -VERSION = "1.2.0" +VERSION = "1.2.1" TEMP_PATH = "deps" JARS_TARGET = os.path.join(TEMP_PATH, "jars") diff --git a/sagemaker-spark-sdk/build.sbt b/sagemaker-spark-sdk/build.sbt index eeb2269..1b65b1c 100644 --- a/sagemaker-spark-sdk/build.sbt +++ b/sagemaker-spark-sdk/build.sbt @@ -19,7 +19,7 @@ scalaVersion := "2.11.7" // to change the version of spark add -DSPARK_VERSION=2.x.x when running sbt // for example: "sbt -DSPARK_VERSION=2.1.1 clean compile test doc package" val sparkVersion = System.getProperty("SPARK_VERSION", "2.2.0") -version := "spark_" + sparkVersion + "-1.2.0" +version := "spark_" + sparkVersion + "-1.2.1" lazy val SageMakerSpark = (project in file(".")) diff --git a/sagemaker-spark-sdk/src/main/scala/com/amazonaws/services/sagemaker/sparksdk/algorithms/ImageURIProvider.scala b/sagemaker-spark-sdk/src/main/scala/com/amazonaws/services/sagemaker/sparksdk/algorithms/ImageURIProvider.scala index 454ce51..d1e7a8d 100644 --- a/sagemaker-spark-sdk/src/main/scala/com/amazonaws/services/sagemaker/sparksdk/algorithms/ImageURIProvider.scala +++ b/sagemaker-spark-sdk/src/main/scala/com/amazonaws/services/sagemaker/sparksdk/algorithms/ImageURIProvider.scala @@ -41,7 +41,12 @@ private[algorithms] object SagerMakerRegionAccountMaps { Regions.AP_NORTHEAST_2.getName -> "835164637446", Regions.EU_CENTRAL_1.getName -> "664544806723", Regions.AP_SOUTHEAST_2.getName -> "712309505854", - Regions.GovCloud.getName -> "226302683700" + Regions.GovCloud.getName -> "226302683700", + Regions.AP_SOUTH_1.getName -> "991648021394", + Regions.AP_SOUTHEAST_1.getName -> "475088953585", + Regions.CA_CENTRAL_1.getName -> "469771592824", + Regions.EU_WEST_2.getName -> "644912444149", + Regions.US_WEST_1.getName -> "632365934929" ) // For LDA @@ -53,7 +58,13 @@ private[algorithms] object SagerMakerRegionAccountMaps { Regions.AP_NORTHEAST_1.getName -> "258307448986", Regions.AP_NORTHEAST_2.getName -> "293181348795", Regions.EU_CENTRAL_1.getName -> "353608530281", - Regions.AP_SOUTHEAST_2.getName -> "297031611018" + Regions.AP_SOUTHEAST_2.getName -> "297031611018", + Regions.GovCloud.getName -> "226302683700", + Regions.AP_SOUTH_1.getName -> "991648021394", + Regions.AP_SOUTHEAST_1.getName -> "475088953585", + Regions.CA_CENTRAL_1.getName -> "469771592824", + Regions.EU_WEST_2.getName -> "644912444149", + Regions.US_WEST_1.getName -> "632365934929" ) // For XGBoost @@ -66,7 +77,12 @@ private[algorithms] object SagerMakerRegionAccountMaps { Regions.AP_NORTHEAST_2.getName -> "306986355934", Regions.EU_CENTRAL_1.getName -> "813361260812", Regions.AP_SOUTHEAST_2.getName -> "544295431143", - Regions.GovCloud.getName -> "226302683700" + Regions.GovCloud.getName -> "226302683700", + Regions.AP_SOUTH_1.getName -> "991648021394", + Regions.AP_SOUTHEAST_1.getName -> "475088953585", + Regions.CA_CENTRAL_1.getName -> "469771592824", + Regions.EU_WEST_2.getName -> "644912444149", + Regions.US_WEST_1.getName -> "632365934929" ) } diff --git a/sagemaker-spark-sdk/src/test/scala/com/amazonaws/services/sagemaker/sparksdk/algorithms/FactorizationMachinesSageMakerEstimatorTests.scala b/sagemaker-spark-sdk/src/test/scala/com/amazonaws/services/sagemaker/sparksdk/algorithms/FactorizationMachinesSageMakerEstimatorTests.scala index 5850d10..efcd8fa 100644 --- a/sagemaker-spark-sdk/src/test/scala/com/amazonaws/services/sagemaker/sparksdk/algorithms/FactorizationMachinesSageMakerEstimatorTests.scala +++ b/sagemaker-spark-sdk/src/test/scala/com/amazonaws/services/sagemaker/sparksdk/algorithms/FactorizationMachinesSageMakerEstimatorTests.scala @@ -101,6 +101,31 @@ class FactorizationMachinesSageMakerEstimatorTests extends FlatSpec with Mockito createFactorizationMachinesBinaryClassifier(region = Regions.GovCloud.getName) assert(estimatorGovCloud.trainingImage == "226302683700.dkr.ecr.us-gov-west-1.amazonaws.com/factorization-machines:1") + + val estimatorAPSouth1 = + createFactorizationMachinesBinaryClassifier(region = Regions.AP_SOUTH_1.getName) + assert(estimatorAPSouth1.trainingImage == + "991648021394.dkr.ecr.ap-south-1.amazonaws.com/factorization-machines:1") + + val estimatorAPSouthEast1 = + createFactorizationMachinesBinaryClassifier(region = Regions.AP_SOUTHEAST_1.getName) + assert(estimatorAPSouthEast1.trainingImage == + "475088953585.dkr.ecr.ap-southeast-1.amazonaws.com/factorization-machines:1") + + val estimatorEUWest2 = + createFactorizationMachinesBinaryClassifier(region = Regions.EU_WEST_2.getName) + assert(estimatorEUWest2.trainingImage == + "644912444149.dkr.ecr.eu-west-2.amazonaws.com/factorization-machines:1") + + val estimatorCACentral1 = + createFactorizationMachinesBinaryClassifier(region = Regions.CA_CENTRAL_1.getName) + assert(estimatorCACentral1.trainingImage == + "469771592824.dkr.ecr.ca-central-1.amazonaws.com/factorization-machines:1") + + val estimatorUSWest1 = + createFactorizationMachinesBinaryClassifier(region = Regions.US_WEST_1.getName) + assert(estimatorUSWest1.trainingImage == + "632365934929.dkr.ecr.us-west-1.amazonaws.com/factorization-machines:1") } it should "use the correct defaults for regressor" in { @@ -152,6 +177,31 @@ class FactorizationMachinesSageMakerEstimatorTests extends FlatSpec with Mockito createFactorizationMachinesRegressor(region = Regions.GovCloud.getName) assert(estimatorGovCloud.trainingImage == "226302683700.dkr.ecr.us-gov-west-1.amazonaws.com/factorization-machines:1") + + val estimatorAPSouth1 = + createFactorizationMachinesRegressor(region = Regions.AP_SOUTH_1.getName) + assert(estimatorAPSouth1.trainingImage == + "991648021394.dkr.ecr.ap-south-1.amazonaws.com/factorization-machines:1") + + val estimatorAPSouthEast1 = + createFactorizationMachinesRegressor(region = Regions.AP_SOUTHEAST_1.getName) + assert(estimatorAPSouthEast1.trainingImage == + "475088953585.dkr.ecr.ap-southeast-1.amazonaws.com/factorization-machines:1") + + val estimatorEUWest2 = + createFactorizationMachinesRegressor(region = Regions.EU_WEST_2.getName) + assert(estimatorEUWest2.trainingImage == + "644912444149.dkr.ecr.eu-west-2.amazonaws.com/factorization-machines:1") + + val estimatorCACentral1 = + createFactorizationMachinesRegressor(region = Regions.CA_CENTRAL_1.getName) + assert(estimatorCACentral1.trainingImage == + "469771592824.dkr.ecr.ca-central-1.amazonaws.com/factorization-machines:1") + + val estimatorUSWest1 = + createFactorizationMachinesRegressor(region = Regions.US_WEST_1.getName) + assert(estimatorUSWest1.trainingImage == + "632365934929.dkr.ecr.us-west-1.amazonaws.com/factorization-machines:1") } it should "setFeatureDim" in { diff --git a/sagemaker-spark-sdk/src/test/scala/com/amazonaws/services/sagemaker/sparksdk/algorithms/KMeansSageMakerEstimatorTests.scala b/sagemaker-spark-sdk/src/test/scala/com/amazonaws/services/sagemaker/sparksdk/algorithms/KMeansSageMakerEstimatorTests.scala index 0dabbf5..6deb23d 100644 --- a/sagemaker-spark-sdk/src/test/scala/com/amazonaws/services/sagemaker/sparksdk/algorithms/KMeansSageMakerEstimatorTests.scala +++ b/sagemaker-spark-sdk/src/test/scala/com/amazonaws/services/sagemaker/sparksdk/algorithms/KMeansSageMakerEstimatorTests.scala @@ -88,6 +88,26 @@ class KMeansSageMakerEstimatorTests extends FlatSpec with Matchers with MockitoS val estimatorGovCloud = createKMeansEstimator(region = Regions.GovCloud.getName) assert(estimatorGovCloud.trainingImage == "226302683700.dkr.ecr.us-gov-west-1.amazonaws.com/kmeans:1") + + val estimatorAPSouth1 = createKMeansEstimator(region = Regions.AP_SOUTH_1.getName) + assert(estimatorAPSouth1.trainingImage == + "991648021394.dkr.ecr.ap-south-1.amazonaws.com/kmeans:1") + + val estimatorAPSouthEast1 = createKMeansEstimator(region = Regions.AP_SOUTHEAST_1.getName) + assert(estimatorAPSouthEast1.trainingImage == + "475088953585.dkr.ecr.ap-southeast-1.amazonaws.com/kmeans:1") + + val estimatorEUWest2 = createKMeansEstimator(region = Regions.EU_WEST_2.getName) + assert(estimatorEUWest2.trainingImage == + "644912444149.dkr.ecr.eu-west-2.amazonaws.com/kmeans:1") + + val estimatorCACentral1 = createKMeansEstimator(region = Regions.CA_CENTRAL_1.getName) + assert(estimatorCACentral1.trainingImage == + "469771592824.dkr.ecr.ca-central-1.amazonaws.com/kmeans:1") + + val estimatorUSWest1 = createKMeansEstimator(region = Regions.US_WEST_1.getName) + assert(estimatorUSWest1.trainingImage == + "632365934929.dkr.ecr.us-west-1.amazonaws.com/kmeans:1") } it should "setK" in { diff --git a/sagemaker-spark-sdk/src/test/scala/com/amazonaws/services/sagemaker/sparksdk/algorithms/LDASageMakerEstimatorTests.scala b/sagemaker-spark-sdk/src/test/scala/com/amazonaws/services/sagemaker/sparksdk/algorithms/LDASageMakerEstimatorTests.scala index a3dae78..99c62c9 100644 --- a/sagemaker-spark-sdk/src/test/scala/com/amazonaws/services/sagemaker/sparksdk/algorithms/LDASageMakerEstimatorTests.scala +++ b/sagemaker-spark-sdk/src/test/scala/com/amazonaws/services/sagemaker/sparksdk/algorithms/LDASageMakerEstimatorTests.scala @@ -86,6 +86,26 @@ class LDASageMakerEstimatorTests extends FlatSpec with MockitoSugar { createLDAEstimator(region = Regions.AP_SOUTHEAST_2.getName) assert(estimatorAPSouthEast2.trainingImage == "297031611018.dkr.ecr.ap-southeast-2.amazonaws.com/lda:1") + + val estimatorAPSouth1 = createLDAEstimator(region = Regions.AP_SOUTH_1.getName) + assert(estimatorAPSouth1.trainingImage == + "991648021394.dkr.ecr.ap-south-1.amazonaws.com/lda:1") + + val estimatorAPSouthEast1 = createLDAEstimator(region = Regions.AP_SOUTHEAST_1.getName) + assert(estimatorAPSouthEast1.trainingImage == + "475088953585.dkr.ecr.ap-southeast-1.amazonaws.com/lda:1") + + val estimatorEUWest2 = createLDAEstimator(region = Regions.EU_WEST_2.getName) + assert(estimatorEUWest2.trainingImage == + "644912444149.dkr.ecr.eu-west-2.amazonaws.com/lda:1") + + val estimatorCACentral1 = createLDAEstimator(region = Regions.CA_CENTRAL_1.getName) + assert(estimatorCACentral1.trainingImage == + "469771592824.dkr.ecr.ca-central-1.amazonaws.com/lda:1") + + val estimatorUSWest1 = createLDAEstimator(region = Regions.US_WEST_1.getName) + assert(estimatorUSWest1.trainingImage == + "632365934929.dkr.ecr.us-west-1.amazonaws.com/lda:1") } it should "setFeatureDim" in { diff --git a/sagemaker-spark-sdk/src/test/scala/com/amazonaws/services/sagemaker/sparksdk/algorithms/LinearLearnerSageMakerEstimatorTests.scala b/sagemaker-spark-sdk/src/test/scala/com/amazonaws/services/sagemaker/sparksdk/algorithms/LinearLearnerSageMakerEstimatorTests.scala index 56a962e..cc089eb 100644 --- a/sagemaker-spark-sdk/src/test/scala/com/amazonaws/services/sagemaker/sparksdk/algorithms/LinearLearnerSageMakerEstimatorTests.scala +++ b/sagemaker-spark-sdk/src/test/scala/com/amazonaws/services/sagemaker/sparksdk/algorithms/LinearLearnerSageMakerEstimatorTests.scala @@ -106,6 +106,31 @@ class LinearLearnerSageMakerEstimatorTests extends FlatSpec with MockitoSugar { createLinearLearnerBinaryClassifier(region = Regions.GovCloud.getName) assert(estimatorGovCloud.trainingImage == "226302683700.dkr.ecr.us-gov-west-1.amazonaws.com/linear-learner:1") + + val estimatorAPSouth1 = + createLinearLearnerBinaryClassifier(region = Regions.AP_SOUTH_1.getName) + assert(estimatorAPSouth1.trainingImage == + "991648021394.dkr.ecr.ap-south-1.amazonaws.com/linear-learner:1") + + val estimatorAPSouthEast1 = + createLinearLearnerBinaryClassifier(region = Regions.AP_SOUTHEAST_1.getName) + assert(estimatorAPSouthEast1.trainingImage == + "475088953585.dkr.ecr.ap-southeast-1.amazonaws.com/linear-learner:1") + + val estimatorEUWest2 = + createLinearLearnerBinaryClassifier(region = Regions.EU_WEST_2.getName) + assert(estimatorEUWest2.trainingImage == + "644912444149.dkr.ecr.eu-west-2.amazonaws.com/linear-learner:1") + + val estimatorCACentral1 = + createLinearLearnerBinaryClassifier(region = Regions.CA_CENTRAL_1.getName) + assert(estimatorCACentral1.trainingImage == + "469771592824.dkr.ecr.ca-central-1.amazonaws.com/linear-learner:1") + + val estimatorUSWest1 = + createLinearLearnerBinaryClassifier(region = Regions.US_WEST_1.getName) + assert(estimatorUSWest1.trainingImage == + "632365934929.dkr.ecr.us-west-1.amazonaws.com/linear-learner:1") } it should "use the correct defaults for multiclass classifier" in { @@ -161,6 +186,31 @@ class LinearLearnerSageMakerEstimatorTests extends FlatSpec with MockitoSugar { createLinearLearnerMultiClassClassifier(region = Regions.GovCloud.getName) assert(estimatorGovCloud.trainingImage == "226302683700.dkr.ecr.us-gov-west-1.amazonaws.com/linear-learner:1") + + val estimatorAPSouth1 = + createLinearLearnerMultiClassClassifier(region = Regions.AP_SOUTH_1.getName) + assert(estimatorAPSouth1.trainingImage == + "991648021394.dkr.ecr.ap-south-1.amazonaws.com/linear-learner:1") + + val estimatorAPSouthEast1 = + createLinearLearnerMultiClassClassifier(region = Regions.AP_SOUTHEAST_1.getName) + assert(estimatorAPSouthEast1.trainingImage == + "475088953585.dkr.ecr.ap-southeast-1.amazonaws.com/linear-learner:1") + + val estimatorEUWest2 = + createLinearLearnerMultiClassClassifier(region = Regions.EU_WEST_2.getName) + assert(estimatorEUWest2.trainingImage == + "644912444149.dkr.ecr.eu-west-2.amazonaws.com/linear-learner:1") + + val estimatorCACentral1 = + createLinearLearnerMultiClassClassifier(region = Regions.CA_CENTRAL_1.getName) + assert(estimatorCACentral1.trainingImage == + "469771592824.dkr.ecr.ca-central-1.amazonaws.com/linear-learner:1") + + val estimatorUSWest1 = + createLinearLearnerMultiClassClassifier(region = Regions.US_WEST_1.getName) + assert(estimatorUSWest1.trainingImage == + "632365934929.dkr.ecr.us-west-1.amazonaws.com/linear-learner:1") } it should "use the correct defaults for regressor" in { @@ -212,6 +262,31 @@ class LinearLearnerSageMakerEstimatorTests extends FlatSpec with MockitoSugar { createLinearLearnerRegressor(region = Regions.GovCloud.getName) assert(estimatorGovCloud.trainingImage == "226302683700.dkr.ecr.us-gov-west-1.amazonaws.com/linear-learner:1") + + val estimatorAPSouth1 = + createLinearLearnerRegressor(region = Regions.AP_SOUTH_1.getName) + assert(estimatorAPSouth1.trainingImage == + "991648021394.dkr.ecr.ap-south-1.amazonaws.com/linear-learner:1") + + val estimatorAPSouthEast1 = + createLinearLearnerRegressor(region = Regions.AP_SOUTHEAST_1.getName) + assert(estimatorAPSouthEast1.trainingImage == + "475088953585.dkr.ecr.ap-southeast-1.amazonaws.com/linear-learner:1") + + val estimatorEUWest2 = + createLinearLearnerRegressor(region = Regions.EU_WEST_2.getName) + assert(estimatorEUWest2.trainingImage == + "644912444149.dkr.ecr.eu-west-2.amazonaws.com/linear-learner:1") + + val estimatorCACentral1 = + createLinearLearnerRegressor(region = Regions.CA_CENTRAL_1.getName) + assert(estimatorCACentral1.trainingImage == + "469771592824.dkr.ecr.ca-central-1.amazonaws.com/linear-learner:1") + + val estimatorUSWest1 = + createLinearLearnerRegressor(region = Regions.US_WEST_1.getName) + assert(estimatorUSWest1.trainingImage == + "632365934929.dkr.ecr.us-west-1.amazonaws.com/linear-learner:1") } it should "setFeatureDim" in { diff --git a/sagemaker-spark-sdk/src/test/scala/com/amazonaws/services/sagemaker/sparksdk/algorithms/PCASageMakerEstimatorTests.scala b/sagemaker-spark-sdk/src/test/scala/com/amazonaws/services/sagemaker/sparksdk/algorithms/PCASageMakerEstimatorTests.scala index eb78953..eac2d4b 100644 --- a/sagemaker-spark-sdk/src/test/scala/com/amazonaws/services/sagemaker/sparksdk/algorithms/PCASageMakerEstimatorTests.scala +++ b/sagemaker-spark-sdk/src/test/scala/com/amazonaws/services/sagemaker/sparksdk/algorithms/PCASageMakerEstimatorTests.scala @@ -68,6 +68,26 @@ class PCASageMakerEstimatorTests extends FlatSpec with MockitoSugar { val estimatorGovCloud = createPCAEstimator(region = Regions.GovCloud.getName) assert(estimatorGovCloud.trainingImage == "226302683700.dkr.ecr.us-gov-west-1.amazonaws.com/pca:1") + + val estimatorAPSouth1 = createPCAEstimator(region = Regions.AP_SOUTH_1.getName) + assert(estimatorAPSouth1.trainingImage == + "991648021394.dkr.ecr.ap-south-1.amazonaws.com/pca:1") + + val estimatorAPSouthEast1 = createPCAEstimator(region = Regions.AP_SOUTHEAST_1.getName) + assert(estimatorAPSouthEast1.trainingImage == + "475088953585.dkr.ecr.ap-southeast-1.amazonaws.com/pca:1") + + val estimatorEUWest2 = createPCAEstimator(region = Regions.EU_WEST_2.getName) + assert(estimatorEUWest2.trainingImage == + "644912444149.dkr.ecr.eu-west-2.amazonaws.com/pca:1") + + val estimatorCACentral1 = createPCAEstimator(region = Regions.CA_CENTRAL_1.getName) + assert(estimatorCACentral1.trainingImage == + "469771592824.dkr.ecr.ca-central-1.amazonaws.com/pca:1") + + val estimatorUSWest1 = createPCAEstimator(region = Regions.US_WEST_1.getName) + assert(estimatorUSWest1.trainingImage == + "632365934929.dkr.ecr.us-west-1.amazonaws.com/pca:1") } it should "use the correct defaults" in { diff --git a/sagemaker-spark-sdk/src/test/scala/com/amazonaws/services/sagemaker/sparksdk/algorithms/XGBoostSageMakerEstimatorTests.scala b/sagemaker-spark-sdk/src/test/scala/com/amazonaws/services/sagemaker/sparksdk/algorithms/XGBoostSageMakerEstimatorTests.scala index 23ef35d..ab63c95 100644 --- a/sagemaker-spark-sdk/src/test/scala/com/amazonaws/services/sagemaker/sparksdk/algorithms/XGBoostSageMakerEstimatorTests.scala +++ b/sagemaker-spark-sdk/src/test/scala/com/amazonaws/services/sagemaker/sparksdk/algorithms/XGBoostSageMakerEstimatorTests.scala @@ -87,6 +87,26 @@ class XGBoostSageMakerEstimatorTests extends FlatSpec with Matchers with Mockito val estimatorGovCloud = createXGBoostEstimator(region = Regions.GovCloud.getName) assert(estimatorGovCloud.trainingImage == "226302683700.dkr.ecr.us-gov-west-1.amazonaws.com/xgboost:1") + + val estimatorAPSouth1 = createXGBoostEstimator(region = Regions.AP_SOUTH_1.getName) + assert(estimatorAPSouth1.trainingImage == + "991648021394.dkr.ecr.ap-south-1.amazonaws.com/xgboost:1") + + val estimatorAPSouthEast1 = createXGBoostEstimator(region = Regions.AP_SOUTHEAST_1.getName) + assert(estimatorAPSouthEast1.trainingImage == + "475088953585.dkr.ecr.ap-southeast-1.amazonaws.com/xgboost:1") + + val estimatorEUWest2 = createXGBoostEstimator(region = Regions.EU_WEST_2.getName) + assert(estimatorEUWest2.trainingImage == + "644912444149.dkr.ecr.eu-west-2.amazonaws.com/xgboost:1") + + val estimatorCACentral1 = createXGBoostEstimator(region = Regions.CA_CENTRAL_1.getName) + assert(estimatorCACentral1.trainingImage == + "469771592824.dkr.ecr.ca-central-1.amazonaws.com/xgboost:1") + + val estimatorUSWest1 = createXGBoostEstimator(region = Regions.US_WEST_1.getName) + assert(estimatorUSWest1.trainingImage == + "632365934929.dkr.ecr.us-west-1.amazonaws.com/xgboost:1") } it should "setBooster" in {