Skip to content

Commit

Permalink
End-to-end testing with various model configurations (#85)
Browse files Browse the repository at this point in the history
* End-to-end testing with various model configurations

* Reorganize unit and integration tests

* More reorganization, add system tests

pytest coverage 57% -> 80%
  • Loading branch information
Michael Denkowski committed Jul 26, 2017
1 parent 6c22eb2 commit 7da864e
Show file tree
Hide file tree
Showing 34 changed files with 398 additions and 95 deletions.
2 changes: 1 addition & 1 deletion .travis.yml
Expand Up @@ -23,4 +23,4 @@ script:
- pylint --rcfile=pylintrc test -E
- mypy --ignore-missing-imports --follow-imports=silent @typechecked-files
- check-manifest --ignore sockeye/git_version.py

# - python -m pytest test/system
1 change: 1 addition & 0 deletions MANIFEST.in
Expand Up @@ -8,6 +8,7 @@ include sockeye/git_version.py
exclude *.sh
include pytest.ini
recursive-include test *.py
recursive-include test *.ini
recursive-include docs *.bat
recursive-include docs *.md
recursive-include docs *.py
Expand Down
9 changes: 7 additions & 2 deletions pre-commit.sh
Expand Up @@ -10,7 +10,7 @@
STASH_NAME="pre-commit-$(date +%s)"
git stash save -q --keep-index $STASH_NAME

# Run unit tests
# Run unit and integration tests
python3 setup.py test
TEST_RESULT=$?

Expand All @@ -27,16 +27,21 @@ TESTS_LINT_RESULT=$?
mypy --ignore-missing-imports --follow-imports=silent @typechecked-files
MYPY_RESULT=$?

# Run system tests
python3 -m pytest test/system
SYSTEM_RESULT=$?

# Pop our stashed files
STASHES=$(git stash list)
if [[ $STASHES == "$STASH_NAME" ]]; then
git stash pop -q
fi

[ $TEST_RESULT -ne 0 ] && echo 'Unit tests failed' && exit 1
[ $TEST_RESULT -ne 0 ] && echo 'Unit or integration tests failed' && exit 1
[ $SOCKEYE_LINT_RESULT -ne 0 ] && echo 'pylint found errors in the sockeye package' && exit 1
[ $TESTS_LINT_RESULT -ne 0 ] && echo 'pylint found errors in the test package' && exit 1
[ $MYPY_RESULT -ne 0 ] && echo 'mypy found incorrect type usage' && exit 1
[ $SYSTEM_RESULT -ne 0 ] && echo 'System tests failed' && exit 1

echo 'all pre-commit checks passed'
exit 0
2 changes: 1 addition & 1 deletion pytest.ini
@@ -1,2 +1,2 @@
[pytest]
addopts = --cov sockeye test -v
addopts = --cov sockeye test/unit test/integration -v
33 changes: 2 additions & 31 deletions sockeye/average.py
Expand Up @@ -74,15 +74,15 @@ def find_checkpoints(model_path: str, size=4, strategy="best", maximize=False, m
"""
Finds N best points from .metrics file according to strategy
:param metric: Metric according to which checkpoints are selected. Corresponds to columns in model\metrics file.
:param metric: Metric according to which checkpoints are selected. Corresponds to columns in model/metrics file.
:param model_path: Path to model.
:param size: Number of checkpoints to combine.
:param strategy: Combination strategy.
:param maximize: Whether the value of the metric should be maximized.
:return: List of paths corresponding to chosen checkpoints.
"""
metrics_path = os.path.join(model_path, C.METRICS_NAME)
points = _read_metrics_points(metrics_path, model_path, metric=metric)
points = sockeye.utils.read_metrics_points(metrics_path, model_path, metric=metric)

if strategy == "best":
# N best scoring points
Expand Down Expand Up @@ -111,35 +111,6 @@ def find_checkpoints(model_path: str, size=4, strategy="best", maximize=False, m
return params_paths


def _read_metrics_points(path: str, model_path: str, metric: str) -> List[Tuple[float, int]]:
"""
Reads lines from .metrics file and return list of elements [val, checkpoint]
:param metric: Metric according to which checkpoints are selected. Corresponds to columns in model\metrics file.
:param path: File to read metric values from.
:param model_path: path where the params files reside.
:return: List of pairs (metric value, checkpoint).
"""
points = []
# First field is checkpoint id
# Metric on validation (dev) set looks like this: METRIC-val=N
with open(path, "r") as metrics_in:
for line in metrics_in:
fields = line.split()
checkpoint = int(fields[0])
# Check that the corresponding params files exists
if not os.path.exists(os.path.join(model_path, C.PARAMS_NAME % checkpoint)):
continue
for field in fields[1:]:
key_value = field.split("=")
if len(key_value) == 2:
metric_set = key_value[0].split("-")
if len(metric_set) == 2 and metric_set[0] == metric and metric_set[1] == "val":
metric_value = float(key_value[1])
points.append([metric_value, checkpoint])
return points


def _strategy_best(points, size, maximize):
top_n = sorted(points, reverse=maximize)[:size]
return top_n
Expand Down
30 changes: 30 additions & 0 deletions sockeye/utils.py
Expand Up @@ -31,6 +31,7 @@
import numpy as np

from sockeye import __version__
import sockeye.constants as C

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -525,3 +526,32 @@ def namedtuple_with_defaults(typename, field_names, default_values: Mapping[str,
prototype = T(*default_values)
T.__new__.__defaults__ = tuple(prototype)
return T


def read_metrics_points(path: str, model_path: str, metric: str) -> List[Tuple[float, int]]:
"""
Reads lines from .metrics file and return list of elements [val, checkpoint]
:param metric: Metric according to which checkpoints are selected. Corresponds to columns in model/metrics file.
:param path: File to read metric values from.
:param model_path: path where the params files reside.
:return: List of pairs (metric value, checkpoint).
"""
points = []
# First field is checkpoint id
# Metric on validation (dev) set looks like this: METRIC-val=N
with open(path, "r") as metrics_in:
for line in metrics_in:
fields = line.split()
checkpoint = int(fields[0])
# Check that the corresponding params files exists
if not os.path.exists(os.path.join(model_path, C.PARAMS_NAME % checkpoint)):
continue
for field in fields[1:]:
key_value = field.split("=")
if len(key_value) == 2:
metric_set = key_value[0].split("-")
if len(metric_set) == 2 and metric_set[0] == metric and metric_set[1] == "val":
metric_value = float(key_value[1])
points.append([metric_value, checkpoint])
return points
3 changes: 1 addition & 2 deletions test/__init__.py
Expand Up @@ -5,9 +5,8 @@
# is located at
#
# http://aws.amazon.com/apache2.0/
#
#
# or in the "license" file accompanying this file. This file is distributed on
# an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
# express or implied. See the License for the specific language governing
# permissions and limitations under the License.

159 changes: 159 additions & 0 deletions test/common.py
@@ -0,0 +1,159 @@
# Copyright 2017 Amazon.com, Inc. or its affiliates. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License"). You may not
# use this file except in compliance with the License. A copy of the License
# is located at
#
# http://aws.amazon.com/apache2.0/
#
# or in the "license" file accompanying this file. This file is distributed on
# an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
# express or implied. See the License for the specific language governing
# permissions and limitations under the License.

import os
import random
import sys
from tempfile import TemporaryDirectory
from typing import Optional, Tuple
from unittest.mock import patch

import mxnet as mx
import numpy as np

import sockeye.bleu
import sockeye.constants as C
import sockeye.train
import sockeye.translate
import sockeye.utils


def gaussian_vector(shape, return_symbol=False):
"""
Generates random normal tensors (diagonal covariance)
:param shape: shape of the tensor.
:param return_symbol: True if the result should be a Symbol, False if it should be an Numpy array.
:return: A gaussian tensor.
"""
return mx.sym.random_normal(shape=shape) if return_symbol else np.random.normal(size=shape)


def integer_vector(shape, max_value, return_symbol=False):
"""
Generates a random positive integer tensor
:param shape: shape of the tensor.
:param max_value: maximum integer value.
:param return_symbol: True if the result should be a Symbol, False if it should be an Numpy array.
:return: A random integer tensor.
"""
return mx.sym.round(mx.sym.random_uniform(shape=shape) * max_value) if return_symbol \
else np.round(np.random.uniform(size=shape) * max_value)


def uniform_vector(shape, min_value=0, max_value=1, return_symbol=False):
"""
Generates a uniformly random tensor
:param shape: shape of the tensor
:param min_value: minimum possible value
:param max_value: maximum possible value (exclusive)
:param return_symbol: True if the result should be a mx.sym.Symbol, False if it should be a Numpy array
:return:
"""
return mx.sym.random_uniform(low=min_value, high=max_value, shape=shape) if return_symbol \
else np.random.uniform(low=min_value, high=max_value, size=shape)


def generate_random_sentence(vocab_size, max_len):
"""
Generates a random "sentence" as a list of integers.
:param vocab_size: Number of words in the "vocabulary". Note that due to
the inclusion of special words (BOS, EOS, UNK) this does *not*
correspond to the maximum possible value.
:param max_len: maximum sentence length.
"""
length = random.randint(1, max_len)
# Due to the special words, the actual words start at index 3 and go up to vocab_size+2
return [random.randint(3, vocab_size + 2) for _ in range(length)]


_DIGITS = "0123456789"


def generate_digits_file(source_path: str,
target_path: str,
line_count: int = 100,
line_length: int = 9,
sort_target: bool = False):
with open(source_path, "w") as source_out, open(target_path, "w") as target_out:
for _ in range(line_count):
digits = [random.choice(_DIGITS) for _ in range(random.randint(1, line_length))]
print(" ".join(digits), file=source_out)
if sort_target:
digits.sort()
print(" ".join(digits), file=target_out)


_TRAIN_PARAMS_COMMON = "--use-cpu --max-seq-len {max_len} --source {train_source} --target {train_target}" \
" --validation-source {dev_source} --validation-target {dev_target} --output {model}"


_TRANSLATE_PARAMS_COMMON = "--use-cpu --models {model} --input {input} --output {output}"


def run_train_translate(train_params: str,
translate_params: str,
train_source_path: str,
train_target_path: str,
dev_source_path: str,
dev_target_path: str,
max_seq_len: int = 10,
work_dir: Optional[str] = None) -> Tuple[float, float]:
"""
Train a model and translate a dev set. Report perplexity and BLEU.
:param train_params: Command line args for model training.
:param translate_params: Command line args for translation.
:param perplexity_thresh: Maximum perplexity for success
:param bleu_thresh: Minimum BLEU score for success
:return: (perplexity, bleu)
"""
with TemporaryDirectory(dir=work_dir, prefix="test_train_translate.") as work_dir:

# Train model
model_path = os.path.join(work_dir, "model")
params = "{} {} {}".format(sockeye.train.__file__,
_TRAIN_PARAMS_COMMON.format(train_source=train_source_path,
train_target=train_target_path,
dev_source=dev_source_path,
dev_target=dev_target_path,
model=model_path,
max_len=max_seq_len),
train_params)
with patch.object(sys, "argv", params.split()):
sockeye.train.main()

# Translate corpus
out_path = os.path.join(work_dir, "out.txt")
params = "{} {} {}".format(sockeye.translate.__file__,
_TRANSLATE_PARAMS_COMMON.format(model=model_path,
input=dev_source_path,
output=out_path),
translate_params)
with patch.object(sys, "argv", params.split()):
sockeye.translate.main()

# Measure perplexity
checkpoints = sockeye.utils.read_metrics_points(path=os.path.join(model_path, C.METRICS_NAME),
model_path=model_path,
metric=C.PERPLEXITY)
perplexity = checkpoints[-1][0]

# Measure BLEU
bleu = sockeye.bleu.corpus_bleu(open(out_path, "r").readlines(),
open(dev_target_path, "r").readlines())

return perplexity, bleu
12 changes: 12 additions & 0 deletions test/integration/__init__.py
@@ -0,0 +1,12 @@
# Copyright 2017 Amazon.com, Inc. or its affiliates. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License"). You may not
# use this file except in compliance with the License. A copy of the License
# is located at
#
# http://aws.amazon.com/apache2.0/
#
# or in the "license" file accompanying this file. This file is distributed on
# an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
# express or implied. See the License for the specific language governing
# permissions and limitations under the License.
66 changes: 66 additions & 0 deletions test/integration/test_seq_copy_int.py
@@ -0,0 +1,66 @@
# Copyright 2017 Amazon.com, Inc. or its affiliates. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License"). You may not
# use this file except in compliance with the License. A copy of the License
# is located at
#
# http://aws.amazon.com/apache2.0/
#
# or in the "license" file accompanying this file. This file is distributed on
# an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
# express or implied. See the License for the specific language governing
# permissions and limitations under the License.

import os
from tempfile import TemporaryDirectory

import pytest

from test.common import generate_digits_file, run_train_translate

_TRAIN_LINE_COUNT = 100
_DEV_LINE_COUNT = 10
_LINE_MAX_LENGTH = 9

@pytest.mark.parametrize("train_params, translate_params", [
# "Vanilla" LSTM encoder-decoder with attention
("--encoder rnn --rnn-num-layers 1 --rnn-cell-type lstm --rnn-num-hidden 16 --num-embed 8 --attention-type mlp"
" --attention-num-hidden 16 --batch-size 8 --loss cross-entropy --optimized-metric perplexity --max-updates 10"
" --checkpoint-frequency 10 --optimizer adam --initial-learning-rate 0.01",
"--beam-size 2"),
# "Kitchen sink" LSTM encoder-decoder with attention
("--encoder rnn --rnn-num-layers 4 --rnn-cell-type lstm --rnn-num-hidden 16 --rnn-residual-connections"
" --num-embed 16 --attention-type coverage --attention-num-hidden 16 --weight-tying --attention-use-prev-word"
" --context-gating --layer-normalization --batch-size 8 --loss smoothed-cross-entropy"
" --smoothed-cross-entropy-alpha 0.1 --normalize-loss --optimized-metric perplexity --max-updates 10"
" --checkpoint-frequency 10 --dropout 0.1 --optimizer adam --initial-learning-rate 0.01",
"--beam-size 2"),
# Convolutional embedding encoder + LSTM encoder-decoder with attention
("--encoder rnn-with-conv-embed --conv-embed-max-filter-width 3 --conv-embed-num-filters 4 4 8"
" --conv-embed-pool-stride 2 --conv-embed-num-highway-layers 1 --rnn-num-layers 1 --rnn-cell-type lstm"
" --rnn-num-hidden 16 --num-embed 8 --attention-num-hidden 16 --batch-size 8 --loss cross-entropy"
" --optimized-metric perplexity --max-updates 10 --checkpoint-frequency 10 --optimizer adam"
" --initial-learning-rate 0.01",
"--beam-size 2"),
])

def test_seq_copy(train_params, translate_params):
"""Task: copy short sequences of digits"""
with TemporaryDirectory(prefix="test_seq_copy") as work_dir:
# Simple digits files for train/dev data
train_source_path = os.path.join(work_dir, "train.src")
train_target_path = os.path.join(work_dir, "train.tgt")
dev_source_path = os.path.join(work_dir, "dev.src")
dev_target_path = os.path.join(work_dir, "dev.tgt")
generate_digits_file(train_source_path, train_target_path, _TRAIN_LINE_COUNT, _LINE_MAX_LENGTH)
generate_digits_file(dev_source_path, dev_target_path, _DEV_LINE_COUNT, _LINE_MAX_LENGTH)
# Test model configuration
# Ignore return values (perplexity and BLEU) for integration test
run_train_translate(train_params,
translate_params,
train_source_path,
train_target_path,
dev_source_path,
dev_target_path,
max_seq_len=_LINE_MAX_LENGTH + 1,
work_dir=work_dir)

0 comments on commit 7da864e

Please sign in to comment.