Skip to content

Commit

Permalink
Add Chainer 4.1.0 (#278)
Browse files Browse the repository at this point in the history
  • Loading branch information
icywang86rui committed Jul 11, 2018
1 parent eacb833 commit 16f5d25
Show file tree
Hide file tree
Showing 6 changed files with 29 additions and 18 deletions.
6 changes: 6 additions & 0 deletions CHANGELOG.rst
Expand Up @@ -2,6 +2,12 @@
CHANGELOG
=========

1.6.0
=====

* feature: Add Chainer 4.1.0 support
* bug-fix: Use chainer_full_version fixture in Chainer integration tests

1.5.4
=====

Expand Down
4 changes: 2 additions & 2 deletions README.rst
Expand Up @@ -50,7 +50,7 @@ You can install from source by cloning this repository and issuing a pip install

git clone https://github.com/aws/sagemaker-python-sdk.git
python setup.py sdist
pip install dist/sagemaker-1.5.4.tar.gz
pip install dist/sagemaker-1.6.0.tar.gz

Supported Python versions
~~~~~~~~~~~~~~~~~~~~~~~~~
Expand Down Expand Up @@ -236,7 +236,7 @@ Chainer SageMaker Estimators

With Chainer Estimators, you can train and host Chainer models on Amazon SageMaker.

Supported versions of Chainer: ``4.0.0``
Supported versions of Chainer: ``4.0.0``, ``4.1.0``.

You can visit the Chainer repository at https://github.com/chainer/chainer.

Expand Down
2 changes: 1 addition & 1 deletion setup.py
Expand Up @@ -23,7 +23,7 @@ def read(fname):


setup(name="sagemaker",
version="1.5.4",
version="1.6.0",
description="Open source library for training and deploying models on Amazon SageMaker.",
packages=find_packages('src'),
package_dir={'': 'src'},
Expand Down
2 changes: 1 addition & 1 deletion src/sagemaker/chainer/defaults.py
Expand Up @@ -12,4 +12,4 @@
# language governing permissions and limitations under the License.
from __future__ import absolute_import

CHAINER_VERSION = '4.0.0'
CHAINER_VERSION = '4.1.0'
4 changes: 2 additions & 2 deletions tests/conftest.py
Expand Up @@ -86,7 +86,7 @@ def pytorch_version(request):
return request.param


@pytest.fixture(scope='module', params=['4.0', '4.0.0'])
@pytest.fixture(scope='module', params=['4.0', '4.0.0', '4.1', '4.1.0'])
def chainer_version(request):
return request.param

Expand All @@ -106,6 +106,6 @@ def pytorch_full_version(request):
return request.param


@pytest.fixture(scope='module', params=['4.0.0'])
@pytest.fixture(scope='module', params=['4.0.0', '4.1.0'])
def chainer_full_version(request):
return request.param
29 changes: 17 additions & 12 deletions tests/integ/test_chainer_train.py
Expand Up @@ -18,6 +18,7 @@
import pytest
import numpy

from sagemaker.chainer.defaults import CHAINER_VERSION
from sagemaker.chainer.estimator import Chainer
from sagemaker.chainer.model import ChainerModel
from sagemaker.utils import sagemaker_timestamp
Expand All @@ -26,25 +27,26 @@


@pytest.fixture(scope='module')
def chainer_training_job(sagemaker_session):
return _run_mnist_training_job(sagemaker_session, "ml.c4.xlarge", 1)
def chainer_training_job(sagemaker_session, chainer_full_version):
return _run_mnist_training_job(sagemaker_session, "ml.c4.xlarge", 1, chainer_full_version)


def test_distributed_cpu_training(sagemaker_session):
_run_mnist_training_job(sagemaker_session, "ml.c4.xlarge", 2)
def test_distributed_cpu_training(sagemaker_session, chainer_full_version):
_run_mnist_training_job(sagemaker_session, "ml.c4.xlarge", 2, chainer_full_version)


def test_distributed_gpu_training(sagemaker_session):
_run_mnist_training_job(sagemaker_session, "ml.p2.xlarge", 2)
def test_distributed_gpu_training(sagemaker_session, chainer_full_version):
_run_mnist_training_job(sagemaker_session, "ml.p2.xlarge", 2, chainer_full_version)


def test_training_with_additional_hyperparameters(sagemaker_session):
def test_training_with_additional_hyperparameters(sagemaker_session, chainer_full_version):
with timeout(minutes=15):
script_path = os.path.join(DATA_DIR, 'chainer_mnist', 'mnist.py')
data_path = os.path.join(DATA_DIR, 'chainer_mnist')

chainer = Chainer(entry_point=script_path, role='SageMakerRole',
train_instance_count=1, train_instance_type="ml.c4.xlarge",
framework_version=chainer_full_version,
sagemaker_session=sagemaker_session, hyperparameters={'epochs': 1},
use_mpi=True,
num_processes=2,
Expand Down Expand Up @@ -75,8 +77,7 @@ def test_deploy_model(chainer_training_job, sagemaker_session):
desc = sagemaker_session.sagemaker_client.describe_training_job(TrainingJobName=chainer_training_job)
model_data = desc['ModelArtifacts']['S3ModelArtifacts']
script_path = os.path.join(DATA_DIR, 'chainer_mnist', 'mnist.py')
model = ChainerModel(model_data, 'SageMakerRole', entry_point=script_path,
sagemaker_session=sagemaker_session)
model = ChainerModel(model_data, 'SageMakerRole', entry_point=script_path, sagemaker_session=sagemaker_session)
predictor = model.deploy(1, "ml.m4.xlarge", endpoint_name=endpoint_name)
_predict_and_assert(predictor)

Expand All @@ -85,7 +86,8 @@ def test_async_fit(sagemaker_session):
endpoint_name = 'test-chainer-attach-deploy-{}'.format(sagemaker_timestamp())

with timeout(minutes=5):
training_job_name = _run_mnist_training_job(sagemaker_session, "ml.c4.xlarge", 1, wait=False)
training_job_name = _run_mnist_training_job(sagemaker_session, "ml.c4.xlarge", 1,
chainer_full_version=CHAINER_VERSION, wait=False)

print("Waiting to re-attach to the training job: %s" % training_job_name)
time.sleep(20)
Expand All @@ -97,12 +99,13 @@ def test_async_fit(sagemaker_session):
_predict_and_assert(predictor)


def test_failed_training_job(sagemaker_session):
def test_failed_training_job(sagemaker_session, chainer_full_version):
with timeout(minutes=15):
script_path = os.path.join(DATA_DIR, 'chainer_mnist', 'failure_script.py')
data_path = os.path.join(DATA_DIR, 'chainer_mnist')

chainer = Chainer(entry_point=script_path, role='SageMakerRole',
framework_version=chainer_full_version,
train_instance_count=1, train_instance_type='ml.c4.xlarge',
sagemaker_session=sagemaker_session)

Expand All @@ -113,7 +116,8 @@ def test_failed_training_job(sagemaker_session):
chainer.fit(train_input)


def _run_mnist_training_job(sagemaker_session, instance_type, instance_count, wait=True):
def _run_mnist_training_job(sagemaker_session, instance_type, instance_count,
chainer_full_version, wait=True):
with timeout(minutes=15):

script_path = os.path.join(DATA_DIR, 'chainer_mnist', 'mnist.py') if instance_type == 1 else \
Expand All @@ -122,6 +126,7 @@ def _run_mnist_training_job(sagemaker_session, instance_type, instance_count, wa
data_path = os.path.join(DATA_DIR, 'chainer_mnist')

chainer = Chainer(entry_point=script_path, role='SageMakerRole',
framework_version=chainer_full_version,
train_instance_count=instance_count, train_instance_type=instance_type,
sagemaker_session=sagemaker_session, hyperparameters={'epochs': 1})

Expand Down

0 comments on commit 16f5d25

Please sign in to comment.