Skip to content

Commit

Permalink
change: add airflow_config tests to canaries (#1055)
Browse files Browse the repository at this point in the history
This change also addresses minor refactoring comments on previous PR.
  • Loading branch information
knakad committed Sep 20, 2019
1 parent 453e939 commit 7386979
Show file tree
Hide file tree
Showing 3 changed files with 29 additions and 15 deletions.
34 changes: 24 additions & 10 deletions tests/integ/test_airflow_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
import os
import pickle
import sys
import pytest

import numpy as np

Expand All @@ -30,7 +31,7 @@
PCA,
RandomCutForest,
)
from sagemaker.amazon.amazon_estimator import registry
from sagemaker.amazon.amazon_estimator import get_image_uri
from sagemaker.amazon.common import read_records
from sagemaker.chainer import Chainer
from sagemaker.estimator import Estimator
Expand Down Expand Up @@ -65,13 +66,11 @@
SINGLE_INSTANCE_COUNT = 1


@pytest.mark.canary_quick
def test_byo_airflow_config_uploads_data_source_to_s3_when_inputs_provided(
sagemaker_session, cpu_instance_type
):
with timeout(seconds=AIRFLOW_CONFIG_TIMEOUT_IN_SECONDS):
image_name = (
registry(sagemaker_session.boto_session.region_name) + "/factorization-machines:1"
)
training_data_path = os.path.join(DATA_DIR, "dummy_tensor")

data_source_location = "test-airflow-config-{}".format(sagemaker_timestamp())
Expand All @@ -80,7 +79,9 @@ def test_byo_airflow_config_uploads_data_source_to_s3_when_inputs_provided(
)

estimator = Estimator(
image_name=image_name,
image_name=get_image_uri(
sagemaker_session.boto_session.region_name, "factorization-machines"
),
role=ROLE,
train_instance_count=SINGLE_INSTANCE_COUNT,
train_instance_type=cpu_instance_type,
Expand All @@ -95,6 +96,7 @@ def test_byo_airflow_config_uploads_data_source_to_s3_when_inputs_provided(
)


@pytest.mark.canary_quick
def test_kmeans_airflow_config_uploads_data_source_to_s3(sagemaker_session, cpu_instance_type):
with timeout(seconds=AIRFLOW_CONFIG_TIMEOUT_IN_SECONDS):
data_path = os.path.join(DATA_DIR, "one_p_mnist", "mnist.pkl.gz")
Expand Down Expand Up @@ -132,6 +134,7 @@ def test_kmeans_airflow_config_uploads_data_source_to_s3(sagemaker_session, cpu_
)


@pytest.mark.canary_quick
def test_fm_airflow_config_uploads_data_source_to_s3(sagemaker_session, cpu_instance_type):
with timeout(seconds=AIRFLOW_CONFIG_TIMEOUT_IN_SECONDS):
data_path = os.path.join(DATA_DIR, "one_p_mnist", "mnist.pkl.gz")
Expand Down Expand Up @@ -164,6 +167,7 @@ def test_fm_airflow_config_uploads_data_source_to_s3(sagemaker_session, cpu_inst
)


@pytest.mark.canary_quick
def test_ipinsights_airflow_config_uploads_data_source_to_s3(sagemaker_session, cpu_instance_type):
with timeout(seconds=AIRFLOW_CONFIG_TIMEOUT_IN_SECONDS):
data_path = os.path.join(DATA_DIR, "ipinsights")
Expand Down Expand Up @@ -193,6 +197,7 @@ def test_ipinsights_airflow_config_uploads_data_source_to_s3(sagemaker_session,
)


@pytest.mark.canary_quick
def test_knn_airflow_config_uploads_data_source_to_s3(sagemaker_session, cpu_instance_type):
with timeout(seconds=AIRFLOW_CONFIG_TIMEOUT_IN_SECONDS):
data_path = os.path.join(DATA_DIR, "one_p_mnist", "mnist.pkl.gz")
Expand Down Expand Up @@ -222,6 +227,7 @@ def test_knn_airflow_config_uploads_data_source_to_s3(sagemaker_session, cpu_ins
)


@pytest.mark.canary_quick
def test_lda_airflow_config_uploads_data_source_to_s3(sagemaker_session, cpu_instance_type):
with timeout(seconds=AIRFLOW_CONFIG_TIMEOUT_IN_SECONDS):
data_path = os.path.join(DATA_DIR, "lda")
Expand Down Expand Up @@ -252,6 +258,7 @@ def test_lda_airflow_config_uploads_data_source_to_s3(sagemaker_session, cpu_ins
)


@pytest.mark.canary_quick
def test_linearlearner_airflow_config_uploads_data_source_to_s3(
sagemaker_session, cpu_instance_type
):
Expand Down Expand Up @@ -320,6 +327,7 @@ def test_linearlearner_airflow_config_uploads_data_source_to_s3(
)


@pytest.mark.canary_quick
def test_ntm_airflow_config_uploads_data_source_to_s3(sagemaker_session, cpu_instance_type):
with timeout(seconds=AIRFLOW_CONFIG_TIMEOUT_IN_SECONDS):
data_path = os.path.join(DATA_DIR, "ntm")
Expand Down Expand Up @@ -351,6 +359,7 @@ def test_ntm_airflow_config_uploads_data_source_to_s3(sagemaker_session, cpu_ins
)


@pytest.mark.canary_quick
def test_pca_airflow_config_uploads_data_source_to_s3(sagemaker_session, cpu_instance_type):
with timeout(seconds=AIRFLOW_CONFIG_TIMEOUT_IN_SECONDS):
data_path = os.path.join(DATA_DIR, "one_p_mnist", "mnist.pkl.gz")
Expand Down Expand Up @@ -382,6 +391,7 @@ def test_pca_airflow_config_uploads_data_source_to_s3(sagemaker_session, cpu_ins
)


@pytest.mark.canary_quick
def test_rcf_airflow_config_uploads_data_source_to_s3(sagemaker_session, cpu_instance_type):
with timeout(seconds=AIRFLOW_CONFIG_TIMEOUT_IN_SECONDS):
# Generate a thousand 14-dimensional datapoints.
Expand All @@ -408,6 +418,7 @@ def test_rcf_airflow_config_uploads_data_source_to_s3(sagemaker_session, cpu_ins
)


@pytest.mark.canary_quick
def test_chainer_airflow_config_uploads_data_source_to_s3(sagemaker_session, chainer_full_version):
with timeout(seconds=AIRFLOW_CONFIG_TIMEOUT_IN_SECONDS):
script_path = os.path.join(DATA_DIR, "chainer_mnist", "mnist.py")
Expand Down Expand Up @@ -441,6 +452,7 @@ def test_chainer_airflow_config_uploads_data_source_to_s3(sagemaker_session, cha
)


@pytest.mark.canary_quick
def test_mxnet_airflow_config_uploads_data_source_to_s3(
sagemaker_session, cpu_instance_type, mxnet_full_version
):
Expand Down Expand Up @@ -469,6 +481,7 @@ def test_mxnet_airflow_config_uploads_data_source_to_s3(
)


@pytest.mark.canary_quick
def test_sklearn_airflow_config_uploads_data_source_to_s3(
sagemaker_session, cpu_instance_type, sklearn_full_version
):
Expand Down Expand Up @@ -503,14 +516,13 @@ def test_sklearn_airflow_config_uploads_data_source_to_s3(
)


@pytest.mark.canary_quick
def test_tf_airflow_config_uploads_data_source_to_s3(sagemaker_session, cpu_instance_type):
with timeout(seconds=AIRFLOW_CONFIG_TIMEOUT_IN_SECONDS):
image_name = (
registry(sagemaker_session.boto_session.region_name) + "/factorization-machines:1"
)

tf = TensorFlow(
image_name=image_name,
image_name=get_image_uri(
sagemaker_session.boto_session.region_name, "factorization-machines"
),
entry_point=SCRIPT,
role=ROLE,
train_instance_count=SINGLE_INSTANCE_COUNT,
Expand All @@ -535,6 +547,7 @@ def test_tf_airflow_config_uploads_data_source_to_s3(sagemaker_session, cpu_inst
)


@pytest.mark.canary_quick
def test_xgboost_airflow_config_uploads_data_source_to_s3(sagemaker_session, cpu_instance_type):
with timeout(seconds=AIRFLOW_CONFIG_TIMEOUT_IN_SECONDS):

Expand All @@ -559,6 +572,7 @@ def test_xgboost_airflow_config_uploads_data_source_to_s3(sagemaker_session, cpu
)


@pytest.mark.canary_quick
def test_pytorch_airflow_config_uploads_data_source_to_s3_when_inputs_not_provided(
sagemaker_session, cpu_instance_type
):
Expand Down
6 changes: 3 additions & 3 deletions tests/integ/test_byo_estimator.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
import pytest

import sagemaker
from sagemaker.amazon.amazon_estimator import registry
from sagemaker.amazon.amazon_estimator import get_image_uri
from sagemaker.estimator import Estimator
from sagemaker.utils import unique_name_from_base
from tests.integ import DATA_DIR, TRAINING_DEFAULT_TIMEOUT_MINUTES
Expand Down Expand Up @@ -52,7 +52,7 @@ def test_byo_estimator(sagemaker_session, region, cpu_instance_type):
Default predictor is updated with json serializer and deserializer.
"""
image_name = registry(region) + "/factorization-machines:1"
image_name = get_image_uri(region, "factorization-machines")
training_data_path = os.path.join(DATA_DIR, "dummy_tensor")
job_name = unique_name_from_base("byo")

Expand Down Expand Up @@ -100,7 +100,7 @@ def test_byo_estimator(sagemaker_session, region, cpu_instance_type):


def test_async_byo_estimator(sagemaker_session, region, cpu_instance_type):
image_name = registry(region) + "/factorization-machines:1"
image_name = get_image_uri(region, "factorization-machines")
endpoint_name = unique_name_from_base("byo")
training_data_path = os.path.join(DATA_DIR, "dummy_tensor")
job_name = unique_name_from_base("byo")
Expand Down
4 changes: 2 additions & 2 deletions tests/integ/test_tuner.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@
from tests.integ import vpc_test_utils

from sagemaker import KMeans, LDA, RandomCutForest
from sagemaker.amazon.amazon_estimator import registry
from sagemaker.amazon.amazon_estimator import get_image_uri
from sagemaker.amazon.common import read_records
from sagemaker.chainer import Chainer
from sagemaker.estimator import Estimator
Expand Down Expand Up @@ -891,7 +891,7 @@ def test_tuning_byo_estimator(sagemaker_session, cpu_instance_type):
Later the trained model is deployed and prediction is called against the endpoint.
Default predictor is updated with json serializer and deserializer.
"""
image_name = registry(sagemaker_session.boto_session.region_name) + "/factorization-machines:1"
image_name = get_image_uri(sagemaker_session.boto_session.region_name, "factorization-machines")
training_data_path = os.path.join(DATA_DIR, "dummy_tensor")

with timeout(minutes=TUNING_DEFAULT_TIMEOUT_MINUTES):
Expand Down

0 comments on commit 7386979

Please sign in to comment.