diff --git a/.travis.yml b/.travis.yml index b176f5976..bebef1e95 100644 --- a/.travis.yml +++ b/.travis.yml @@ -23,4 +23,4 @@ script: - pylint --rcfile=pylintrc test -E - mypy --ignore-missing-imports --follow-imports=silent @typechecked-files - check-manifest --ignore sockeye/git_version.py - +# - python -m pytest test/system diff --git a/MANIFEST.in b/MANIFEST.in index 2138cc6a2..8afac1fb6 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -8,6 +8,7 @@ include sockeye/git_version.py exclude *.sh include pytest.ini recursive-include test *.py +recursive-include test *.ini recursive-include docs *.bat recursive-include docs *.md recursive-include docs *.py diff --git a/pre-commit.sh b/pre-commit.sh index 888b1dc6a..ef2ae78aa 100755 --- a/pre-commit.sh +++ b/pre-commit.sh @@ -10,7 +10,7 @@ STASH_NAME="pre-commit-$(date +%s)" git stash save -q --keep-index $STASH_NAME -# Run unit tests +# Run unit and integration tests python3 setup.py test TEST_RESULT=$? @@ -27,16 +27,21 @@ TESTS_LINT_RESULT=$? mypy --ignore-missing-imports --follow-imports=silent @typechecked-files MYPY_RESULT=$? +# Run system tests +python3 -m pytest test/system +SYSTEM_RESULT=$? + # Pop our stashed files STASHES=$(git stash list) if [[ $STASHES == "$STASH_NAME" ]]; then git stash pop -q fi -[ $TEST_RESULT -ne 0 ] && echo 'Unit tests failed' && exit 1 +[ $TEST_RESULT -ne 0 ] && echo 'Unit or integration tests failed' && exit 1 [ $SOCKEYE_LINT_RESULT -ne 0 ] && echo 'pylint found errors in the sockeye package' && exit 1 [ $TESTS_LINT_RESULT -ne 0 ] && echo 'pylint found errors in the test package' && exit 1 [ $MYPY_RESULT -ne 0 ] && echo 'mypy found incorrect type usage' && exit 1 +[ $SYSTEM_RESULT -ne 0 ] && echo 'System tests failed' && exit 1 echo 'all pre-commit checks passed' exit 0 diff --git a/pytest.ini b/pytest.ini index d05332148..f45f864b4 100644 --- a/pytest.ini +++ b/pytest.ini @@ -1,2 +1,2 @@ [pytest] -addopts = --cov sockeye test -v +addopts = --cov sockeye test/unit test/integration -v diff --git a/sockeye/average.py b/sockeye/average.py index 17befd2bd..72aca8b1b 100644 --- a/sockeye/average.py +++ b/sockeye/average.py @@ -74,7 +74,7 @@ def find_checkpoints(model_path: str, size=4, strategy="best", maximize=False, m """ Finds N best points from .metrics file according to strategy - :param metric: Metric according to which checkpoints are selected. Corresponds to columns in model\metrics file. + :param metric: Metric according to which checkpoints are selected. Corresponds to columns in model/metrics file. :param model_path: Path to model. :param size: Number of checkpoints to combine. :param strategy: Combination strategy. @@ -82,7 +82,7 @@ def find_checkpoints(model_path: str, size=4, strategy="best", maximize=False, m :return: List of paths corresponding to chosen checkpoints. """ metrics_path = os.path.join(model_path, C.METRICS_NAME) - points = _read_metrics_points(metrics_path, model_path, metric=metric) + points = sockeye.utils.read_metrics_points(metrics_path, model_path, metric=metric) if strategy == "best": # N best scoring points @@ -111,35 +111,6 @@ def find_checkpoints(model_path: str, size=4, strategy="best", maximize=False, m return params_paths -def _read_metrics_points(path: str, model_path: str, metric: str) -> List[Tuple[float, int]]: - """ - Reads lines from .metrics file and return list of elements [val, checkpoint] - - :param metric: Metric according to which checkpoints are selected. Corresponds to columns in model\metrics file. - :param path: File to read metric values from. - :param model_path: path where the params files reside. - :return: List of pairs (metric value, checkpoint). - """ - points = [] - # First field is checkpoint id - # Metric on validation (dev) set looks like this: METRIC-val=N - with open(path, "r") as metrics_in: - for line in metrics_in: - fields = line.split() - checkpoint = int(fields[0]) - # Check that the corresponding params files exists - if not os.path.exists(os.path.join(model_path, C.PARAMS_NAME % checkpoint)): - continue - for field in fields[1:]: - key_value = field.split("=") - if len(key_value) == 2: - metric_set = key_value[0].split("-") - if len(metric_set) == 2 and metric_set[0] == metric and metric_set[1] == "val": - metric_value = float(key_value[1]) - points.append([metric_value, checkpoint]) - return points - - def _strategy_best(points, size, maximize): top_n = sorted(points, reverse=maximize)[:size] return top_n diff --git a/sockeye/utils.py b/sockeye/utils.py index 0a17768b7..5b90ffbd1 100644 --- a/sockeye/utils.py +++ b/sockeye/utils.py @@ -31,6 +31,7 @@ import numpy as np from sockeye import __version__ +import sockeye.constants as C logger = logging.getLogger(__name__) @@ -525,3 +526,32 @@ def namedtuple_with_defaults(typename, field_names, default_values: Mapping[str, prototype = T(*default_values) T.__new__.__defaults__ = tuple(prototype) return T + + +def read_metrics_points(path: str, model_path: str, metric: str) -> List[Tuple[float, int]]: + """ + Reads lines from .metrics file and return list of elements [val, checkpoint] + + :param metric: Metric according to which checkpoints are selected. Corresponds to columns in model/metrics file. + :param path: File to read metric values from. + :param model_path: path where the params files reside. + :return: List of pairs (metric value, checkpoint). + """ + points = [] + # First field is checkpoint id + # Metric on validation (dev) set looks like this: METRIC-val=N + with open(path, "r") as metrics_in: + for line in metrics_in: + fields = line.split() + checkpoint = int(fields[0]) + # Check that the corresponding params files exists + if not os.path.exists(os.path.join(model_path, C.PARAMS_NAME % checkpoint)): + continue + for field in fields[1:]: + key_value = field.split("=") + if len(key_value) == 2: + metric_set = key_value[0].split("-") + if len(metric_set) == 2 and metric_set[0] == metric and metric_set[1] == "val": + metric_value = float(key_value[1]) + points.append([metric_value, checkpoint]) + return points diff --git a/test/__init__.py b/test/__init__.py index 214e3177f..3d9e97c1e 100644 --- a/test/__init__.py +++ b/test/__init__.py @@ -5,9 +5,8 @@ # is located at # # http://aws.amazon.com/apache2.0/ -# +# # or in the "license" file accompanying this file. This file is distributed on # an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either # express or implied. See the License for the specific language governing # permissions and limitations under the License. - diff --git a/test/common.py b/test/common.py new file mode 100644 index 000000000..1c71ba361 --- /dev/null +++ b/test/common.py @@ -0,0 +1,159 @@ +# Copyright 2017 Amazon.com, Inc. or its affiliates. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"). You may not +# use this file except in compliance with the License. A copy of the License +# is located at +# +# http://aws.amazon.com/apache2.0/ +# +# or in the "license" file accompanying this file. This file is distributed on +# an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either +# express or implied. See the License for the specific language governing +# permissions and limitations under the License. + +import os +import random +import sys +from tempfile import TemporaryDirectory +from typing import Optional, Tuple +from unittest.mock import patch + +import mxnet as mx +import numpy as np + +import sockeye.bleu +import sockeye.constants as C +import sockeye.train +import sockeye.translate +import sockeye.utils + + +def gaussian_vector(shape, return_symbol=False): + """ + Generates random normal tensors (diagonal covariance) + + :param shape: shape of the tensor. + :param return_symbol: True if the result should be a Symbol, False if it should be an Numpy array. + :return: A gaussian tensor. + """ + return mx.sym.random_normal(shape=shape) if return_symbol else np.random.normal(size=shape) + + +def integer_vector(shape, max_value, return_symbol=False): + """ + Generates a random positive integer tensor + + :param shape: shape of the tensor. + :param max_value: maximum integer value. + :param return_symbol: True if the result should be a Symbol, False if it should be an Numpy array. + :return: A random integer tensor. + """ + return mx.sym.round(mx.sym.random_uniform(shape=shape) * max_value) if return_symbol \ + else np.round(np.random.uniform(size=shape) * max_value) + + +def uniform_vector(shape, min_value=0, max_value=1, return_symbol=False): + """ + Generates a uniformly random tensor + + :param shape: shape of the tensor + :param min_value: minimum possible value + :param max_value: maximum possible value (exclusive) + :param return_symbol: True if the result should be a mx.sym.Symbol, False if it should be a Numpy array + :return: + """ + return mx.sym.random_uniform(low=min_value, high=max_value, shape=shape) if return_symbol \ + else np.random.uniform(low=min_value, high=max_value, size=shape) + + +def generate_random_sentence(vocab_size, max_len): + """ + Generates a random "sentence" as a list of integers. + + :param vocab_size: Number of words in the "vocabulary". Note that due to + the inclusion of special words (BOS, EOS, UNK) this does *not* + correspond to the maximum possible value. + :param max_len: maximum sentence length. + """ + length = random.randint(1, max_len) + # Due to the special words, the actual words start at index 3 and go up to vocab_size+2 + return [random.randint(3, vocab_size + 2) for _ in range(length)] + + +_DIGITS = "0123456789" + + +def generate_digits_file(source_path: str, + target_path: str, + line_count: int = 100, + line_length: int = 9, + sort_target: bool = False): + with open(source_path, "w") as source_out, open(target_path, "w") as target_out: + for _ in range(line_count): + digits = [random.choice(_DIGITS) for _ in range(random.randint(1, line_length))] + print(" ".join(digits), file=source_out) + if sort_target: + digits.sort() + print(" ".join(digits), file=target_out) + + +_TRAIN_PARAMS_COMMON = "--use-cpu --max-seq-len {max_len} --source {train_source} --target {train_target}" \ + " --validation-source {dev_source} --validation-target {dev_target} --output {model}" + + +_TRANSLATE_PARAMS_COMMON = "--use-cpu --models {model} --input {input} --output {output}" + + +def run_train_translate(train_params: str, + translate_params: str, + train_source_path: str, + train_target_path: str, + dev_source_path: str, + dev_target_path: str, + max_seq_len: int = 10, + work_dir: Optional[str] = None) -> Tuple[float, float]: + """ + Train a model and translate a dev set. Report perplexity and BLEU. + + :param train_params: Command line args for model training. + :param translate_params: Command line args for translation. + :param perplexity_thresh: Maximum perplexity for success + :param bleu_thresh: Minimum BLEU score for success + :return: (perplexity, bleu) + """ + with TemporaryDirectory(dir=work_dir, prefix="test_train_translate.") as work_dir: + + # Train model + model_path = os.path.join(work_dir, "model") + params = "{} {} {}".format(sockeye.train.__file__, + _TRAIN_PARAMS_COMMON.format(train_source=train_source_path, + train_target=train_target_path, + dev_source=dev_source_path, + dev_target=dev_target_path, + model=model_path, + max_len=max_seq_len), + train_params) + with patch.object(sys, "argv", params.split()): + sockeye.train.main() + + # Translate corpus + out_path = os.path.join(work_dir, "out.txt") + params = "{} {} {}".format(sockeye.translate.__file__, + _TRANSLATE_PARAMS_COMMON.format(model=model_path, + input=dev_source_path, + output=out_path), + translate_params) + with patch.object(sys, "argv", params.split()): + sockeye.translate.main() + + # Measure perplexity + checkpoints = sockeye.utils.read_metrics_points(path=os.path.join(model_path, C.METRICS_NAME), + model_path=model_path, + metric=C.PERPLEXITY) + perplexity = checkpoints[-1][0] + + # Measure BLEU + bleu = sockeye.bleu.corpus_bleu(open(out_path, "r").readlines(), + open(dev_target_path, "r").readlines()) + + return perplexity, bleu diff --git a/test/integration/__init__.py b/test/integration/__init__.py new file mode 100644 index 000000000..3d9e97c1e --- /dev/null +++ b/test/integration/__init__.py @@ -0,0 +1,12 @@ +# Copyright 2017 Amazon.com, Inc. or its affiliates. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"). You may not +# use this file except in compliance with the License. A copy of the License +# is located at +# +# http://aws.amazon.com/apache2.0/ +# +# or in the "license" file accompanying this file. This file is distributed on +# an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either +# express or implied. See the License for the specific language governing +# permissions and limitations under the License. diff --git a/test/integration/test_seq_copy_int.py b/test/integration/test_seq_copy_int.py new file mode 100644 index 000000000..e39c1411b --- /dev/null +++ b/test/integration/test_seq_copy_int.py @@ -0,0 +1,66 @@ +# Copyright 2017 Amazon.com, Inc. or its affiliates. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"). You may not +# use this file except in compliance with the License. A copy of the License +# is located at +# +# http://aws.amazon.com/apache2.0/ +# +# or in the "license" file accompanying this file. This file is distributed on +# an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either +# express or implied. See the License for the specific language governing +# permissions and limitations under the License. + +import os +from tempfile import TemporaryDirectory + +import pytest + +from test.common import generate_digits_file, run_train_translate + +_TRAIN_LINE_COUNT = 100 +_DEV_LINE_COUNT = 10 +_LINE_MAX_LENGTH = 9 + +@pytest.mark.parametrize("train_params, translate_params", [ + # "Vanilla" LSTM encoder-decoder with attention + ("--encoder rnn --rnn-num-layers 1 --rnn-cell-type lstm --rnn-num-hidden 16 --num-embed 8 --attention-type mlp" + " --attention-num-hidden 16 --batch-size 8 --loss cross-entropy --optimized-metric perplexity --max-updates 10" + " --checkpoint-frequency 10 --optimizer adam --initial-learning-rate 0.01", + "--beam-size 2"), + # "Kitchen sink" LSTM encoder-decoder with attention + ("--encoder rnn --rnn-num-layers 4 --rnn-cell-type lstm --rnn-num-hidden 16 --rnn-residual-connections" + " --num-embed 16 --attention-type coverage --attention-num-hidden 16 --weight-tying --attention-use-prev-word" + " --context-gating --layer-normalization --batch-size 8 --loss smoothed-cross-entropy" + " --smoothed-cross-entropy-alpha 0.1 --normalize-loss --optimized-metric perplexity --max-updates 10" + " --checkpoint-frequency 10 --dropout 0.1 --optimizer adam --initial-learning-rate 0.01", + "--beam-size 2"), + # Convolutional embedding encoder + LSTM encoder-decoder with attention + ("--encoder rnn-with-conv-embed --conv-embed-max-filter-width 3 --conv-embed-num-filters 4 4 8" + " --conv-embed-pool-stride 2 --conv-embed-num-highway-layers 1 --rnn-num-layers 1 --rnn-cell-type lstm" + " --rnn-num-hidden 16 --num-embed 8 --attention-num-hidden 16 --batch-size 8 --loss cross-entropy" + " --optimized-metric perplexity --max-updates 10 --checkpoint-frequency 10 --optimizer adam" + " --initial-learning-rate 0.01", + "--beam-size 2"), +]) + +def test_seq_copy(train_params, translate_params): + """Task: copy short sequences of digits""" + with TemporaryDirectory(prefix="test_seq_copy") as work_dir: + # Simple digits files for train/dev data + train_source_path = os.path.join(work_dir, "train.src") + train_target_path = os.path.join(work_dir, "train.tgt") + dev_source_path = os.path.join(work_dir, "dev.src") + dev_target_path = os.path.join(work_dir, "dev.tgt") + generate_digits_file(train_source_path, train_target_path, _TRAIN_LINE_COUNT, _LINE_MAX_LENGTH) + generate_digits_file(dev_source_path, dev_target_path, _DEV_LINE_COUNT, _LINE_MAX_LENGTH) + # Test model configuration + # Ignore return values (perplexity and BLEU) for integration test + run_train_translate(train_params, + translate_params, + train_source_path, + train_target_path, + dev_source_path, + dev_target_path, + max_seq_len=_LINE_MAX_LENGTH + 1, + work_dir=work_dir) diff --git a/test/system/__init__.py b/test/system/__init__.py new file mode 100644 index 000000000..3d9e97c1e --- /dev/null +++ b/test/system/__init__.py @@ -0,0 +1,12 @@ +# Copyright 2017 Amazon.com, Inc. or its affiliates. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"). You may not +# use this file except in compliance with the License. A copy of the License +# is located at +# +# http://aws.amazon.com/apache2.0/ +# +# or in the "license" file accompanying this file. This file is distributed on +# an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either +# express or implied. See the License for the specific language governing +# permissions and limitations under the License. diff --git a/test/system/pytest.ini b/test/system/pytest.ini new file mode 100644 index 000000000..ca0c9f171 --- /dev/null +++ b/test/system/pytest.ini @@ -0,0 +1,2 @@ +[pytest] +addopts = -s diff --git a/test/system/test_seq_copy_sys.py b/test/system/test_seq_copy_sys.py new file mode 100644 index 000000000..4095cd25f --- /dev/null +++ b/test/system/test_seq_copy_sys.py @@ -0,0 +1,88 @@ +# Copyright 2017 Amazon.com, Inc. or its affiliates. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"). You may not +# use this file except in compliance with the License. A copy of the License +# is located at +# +# http://aws.amazon.com/apache2.0/ +# +# or in the "license" file accompanying this file. This file is distributed on +# an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either +# express or implied. See the License for the specific language governing +# permissions and limitations under the License. + +import os +from tempfile import TemporaryDirectory + +import pytest + +from test.common import generate_digits_file, run_train_translate + + +_TRAIN_LINE_COUNT = 10000 +_DEV_LINE_COUNT = 100 +_LINE_MAX_LENGTH = 9 + + +@pytest.mark.parametrize("train_params, translate_params, perplexity_thresh, bleu_thresh", [ + # "Vanilla" LSTM encoder-decoder with attention + ("--encoder rnn --rnn-num-layers 1 --rnn-cell-type lstm --rnn-num-hidden 64 --num-embed 32 --attention-type mlp" + " --attention-num-hidden 32 --batch-size 16 --loss cross-entropy --optimized-metric perplexity --max-updates 10000" + " --checkpoint-frequency 1000 --optimizer adam --initial-learning-rate 0.001", + "--beam-size 5", + 1.01, + 0.98), +]) +def test_seq_copy(train_params, translate_params, perplexity_thresh, bleu_thresh): + """Task: copy short sequences of digits""" + with TemporaryDirectory(prefix="test_seq_copy.") as work_dir: + # Simple digits files for train/dev data + train_source_path = os.path.join(work_dir, "train.src") + train_target_path = os.path.join(work_dir, "train.tgt") + dev_source_path = os.path.join(work_dir, "dev.src") + dev_target_path = os.path.join(work_dir, "dev.tgt") + generate_digits_file(train_source_path, train_target_path, _TRAIN_LINE_COUNT, _LINE_MAX_LENGTH) + generate_digits_file(dev_source_path, dev_target_path, _DEV_LINE_COUNT, _LINE_MAX_LENGTH) + # Test model configuration + perplexity, bleu = run_train_translate(train_params, + translate_params, + train_source_path, + train_target_path, + dev_source_path, + dev_target_path, + max_seq_len=_LINE_MAX_LENGTH + 1, + work_dir=work_dir) + assert perplexity <= perplexity_thresh + assert bleu >= bleu_thresh + + +@pytest.mark.parametrize("train_params, translate_params, perplexity_thresh, bleu_thresh", [ + # "Vanilla" LSTM encoder-decoder with attention + ("--encoder rnn --rnn-num-layers 1 --rnn-cell-type lstm --rnn-num-hidden 64 --num-embed 32 --attention-type mlp" + " --attention-num-hidden 32 --batch-size 16 --loss cross-entropy --optimized-metric perplexity --max-updates 10000" + " --checkpoint-frequency 1000 --optimizer adam --initial-learning-rate 0.001", + "--beam-size 5", + 1.01, + 0.98), +]) +def test_seq_sort(train_params, translate_params, perplexity_thresh, bleu_thresh): + """Task: sort short sequences of digits""" + with TemporaryDirectory(prefix="test_seq_sort.") as work_dir: + # Simple digits files for train/dev data + train_source_path = os.path.join(work_dir, "train.src") + train_target_path = os.path.join(work_dir, "train.tgt") + dev_source_path = os.path.join(work_dir, "dev.src") + dev_target_path = os.path.join(work_dir, "dev.tgt") + generate_digits_file(train_source_path, train_target_path, _TRAIN_LINE_COUNT, _LINE_MAX_LENGTH, sort_target=True) + generate_digits_file(dev_source_path, dev_target_path, _DEV_LINE_COUNT, _LINE_MAX_LENGTH, sort_target=True) + # Test model configuration + perplexity, bleu = run_train_translate(train_params, + translate_params, + train_source_path, + train_target_path, + dev_source_path, + dev_target_path, + max_seq_len=_LINE_MAX_LENGTH + 1, + work_dir=work_dir) + assert perplexity <= perplexity_thresh + assert bleu >= bleu_thresh diff --git a/test/unit/__init__.py b/test/unit/__init__.py new file mode 100644 index 000000000..3d9e97c1e --- /dev/null +++ b/test/unit/__init__.py @@ -0,0 +1,12 @@ +# Copyright 2017 Amazon.com, Inc. or its affiliates. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"). You may not +# use this file except in compliance with the License. A copy of the License +# is located at +# +# http://aws.amazon.com/apache2.0/ +# +# or in the "license" file accompanying this file. This file is distributed on +# an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either +# express or implied. See the License for the specific language governing +# permissions and limitations under the License. diff --git a/test/test_arguments.py b/test/unit/test_arguments.py similarity index 100% rename from test/test_arguments.py rename to test/unit/test_arguments.py diff --git a/test/test_attention.py b/test/unit/test_attention.py similarity index 99% rename from test/test_attention.py rename to test/unit/test_attention.py index b5dd3eb8d..f643fc99f 100644 --- a/test/test_attention.py +++ b/test/unit/test_attention.py @@ -18,7 +18,7 @@ import sockeye.attention import sockeye.constants as C import sockeye.coverage -from test.test_utils import gaussian_vector, integer_vector +from test.common import gaussian_vector, integer_vector attention_types = [C.ATT_BILINEAR, C.ATT_DOT, C.ATT_DOT_SCALED, C.ATT_LOC, C.ATT_MLP] diff --git a/test/test_average.py b/test/unit/test_average.py similarity index 100% rename from test/test_average.py rename to test/unit/test_average.py diff --git a/test/test_bleu.py b/test/unit/test_bleu.py similarity index 100% rename from test/test_bleu.py rename to test/unit/test_bleu.py diff --git a/test/test_callback.py b/test/unit/test_callback.py similarity index 100% rename from test/test_callback.py rename to test/unit/test_callback.py diff --git a/test/test_checkpoint.py b/test/unit/test_checkpoint.py similarity index 98% rename from test/test_checkpoint.py rename to test/unit/test_checkpoint.py index 1f9fa101f..cdbaf80bf 100644 --- a/test/test_checkpoint.py +++ b/test/unit/test_checkpoint.py @@ -13,7 +13,7 @@ from math import isclose import tempfile -from test.test_utils import generate_random_sentence +from test.common import generate_random_sentence import sockeye.data_io import mxnet as mx diff --git a/test/test_config.py b/test/unit/test_config.py similarity index 100% rename from test/test_config.py rename to test/unit/test_config.py diff --git a/test/test_coverage.py b/test/unit/test_coverage.py similarity index 99% rename from test/test_coverage.py rename to test/unit/test_coverage.py index ecb2c6bf1..3d285ca55 100644 --- a/test/test_coverage.py +++ b/test/unit/test_coverage.py @@ -16,7 +16,7 @@ import numpy as np import pytest import sockeye.coverage -from test.test_utils import gaussian_vector, integer_vector, uniform_vector +from test.common import gaussian_vector, integer_vector, uniform_vector activation_types = ["tanh", "sigmoid", "relu", "softrelu"] diff --git a/test/test_data_io.py b/test/unit/test_data_io.py similarity index 100% rename from test/test_data_io.py rename to test/unit/test_data_io.py diff --git a/test/test_decoder.py b/test/unit/test_decoder.py similarity index 98% rename from test/test_decoder.py rename to test/unit/test_decoder.py index 8b4377678..ce3019501 100644 --- a/test/test_decoder.py +++ b/test/unit/test_decoder.py @@ -19,7 +19,7 @@ import sockeye.constants as C import sockeye.coverage import sockeye.decoder -from test.test_utils import gaussian_vector, integer_vector +from test.common import gaussian_vector, integer_vector step_tests = [(C.GRU_TYPE, True), (C.LSTM_TYPE, False)] diff --git a/test/test_encoder.py b/test/unit/test_encoder.py similarity index 100% rename from test/test_encoder.py rename to test/unit/test_encoder.py diff --git a/test/test_layers.py b/test/unit/test_layers.py similarity index 100% rename from test/test_layers.py rename to test/unit/test_layers.py diff --git a/test/test_loss.py b/test/unit/test_loss.py similarity index 100% rename from test/test_loss.py rename to test/unit/test_loss.py diff --git a/test/test_lr_scheduler.py b/test/unit/test_lr_scheduler.py similarity index 100% rename from test/test_lr_scheduler.py rename to test/unit/test_lr_scheduler.py diff --git a/test/test_output_handler.py b/test/unit/test_output_handler.py similarity index 100% rename from test/test_output_handler.py rename to test/unit/test_output_handler.py diff --git a/test/test_params.py b/test/unit/test_params.py similarity index 100% rename from test/test_params.py rename to test/unit/test_params.py diff --git a/test/test_rnn.py b/test/unit/test_rnn.py similarity index 100% rename from test/test_rnn.py rename to test/unit/test_rnn.py diff --git a/test/test_translate.py b/test/unit/test_translate.py similarity index 100% rename from test/test_translate.py rename to test/unit/test_translate.py diff --git a/test/test_utils.py b/test/unit/test_utils.py similarity index 74% rename from test/test_utils.py rename to test/unit/test_utils.py index 3bfa0e903..48bbc47f9 100644 --- a/test/test_utils.py +++ b/test/unit/test_utils.py @@ -12,9 +12,7 @@ # permissions and limitations under the License. import sockeye.utils -import mxnet as mx import numpy as np -import random import pytest from sockeye.utils import check_condition, SockeyeError @@ -31,58 +29,6 @@ def test_get_alignments(): assert alignment == expected_alignment -def gaussian_vector(shape, return_symbol=False): - """ - Generates random normal tensors (diagonal covariance) - - :param shape: shape of the tensor. - :param return_symbol: True if the result should be a Symbol, False if it should be an Numpy array. - :return: A gaussian tensor. - """ - return mx.sym.random_normal(shape=shape) if return_symbol else np.random.normal(size=shape) - - -def integer_vector(shape, max_value, return_symbol=False): - """ - Generates a random positive integer tensor - - :param shape: shape of the tensor. - :param max_value: maximum integer value. - :param return_symbol: True if the result should be a Symbol, False if it should be an Numpy array. - :return: A random integer tensor. - """ - return mx.sym.round(mx.sym.random_uniform(shape=shape) * max_value) if return_symbol \ - else np.round(np.random.uniform(size=shape) * max_value) - - -def uniform_vector(shape, min_value=0, max_value=1, return_symbol=False): - """ - Generates a uniformly random tensor - - :param shape: shape of the tensor - :param min_value: minimum possible value - :param max_value: maximum possible value (exclusive) - :param return_symbol: True if the result should be a mx.sym.Symbol, False if it should be a Numpy array - :return: - """ - return mx.sym.random_uniform(low=min_value, high=max_value, shape=shape) if return_symbol \ - else np.random.uniform(low=min_value, high=max_value, size=shape) - - -def generate_random_sentence(vocab_size, max_len): - """ - Generates a random "sentence" as a list of integers. - - :param vocab_size: Number of words in the "vocabulary". Note that due to - the inclusion of special words (BOS, EOS, UNK) this does *not* - correspond to the maximum possible value. - :param max_len: maximum sentence length. - """ - length = random.randint(1, max_len) - # Due to the special words, the actual words start at index 3 and go up to vocab_size+2 - return [random.randint(3, vocab_size + 2) for _ in range(length)] - - device_params = [([-4, 3, 5], 6, [0, 1, 2, 3, 4, 5]), ([-2, 3, -2, 5], 6, [0, 1, 2, 3, 4, 5]), ([-1], 1, [0]), diff --git a/test/test_vocab.py b/test/unit/test_vocab.py similarity index 100% rename from test/test_vocab.py rename to test/unit/test_vocab.py