Use tensorflow compilation + batching (#55)

* use tf.function to compile predict and sample * do batch predictions * add tests for batch predict * make work with parallel, fix unit tests
gretelai · Sep 18, 2020 · 31e4d2c · 31e4d2c
1 parent dd9f615
commit 31e4d2c
Show file tree

Hide file tree

Showing 7 changed files with 103 additions and 63 deletions.
diff --git a/docs/conf.py b/docs/conf.py
@@ -64,11 +64,12 @@
 html_css_files = ["styles.css"]
 
 html_theme_options = {
-   'logo_only': True,
-    'display_version': True,
-    'style_nav_header_background': '#0c0c0d',
+    "logo_only": True,
+    "display_version": True,
+    "style_nav_header_background": "#0c0c0d",
 }
 
+
 def monkeypatch(cls):
     """ decorator to monkey-patch methods """
 

diff --git a/src/gretel_synthetics/config.py b/src/gretel_synthetics/config.py
@@ -100,6 +100,8 @@ class BaseConfig:
         gen_lines (optional): Maximum number of text lines to generate. This function is used by
             ``generate_text`` and the optional ``line_validator`` to make sure that all lines created
             by the model pass validation. Default is ``1000``.
+        predict_batch_size (optional): How many words to generate in parallel. Higher values may result in increased
+            throughput. The default of ``64`` should provide reasonable performance for most users.
         save_all_checkpoints (optional). Set to ``True`` to save all model checkpoints as they are created,
             which can be useful for optimal model selection. Set to ``False`` to save only the latest
             checkpoint. Default is ``True``.
@@ -142,6 +144,7 @@ class BaseConfig:
     gen_temp: float = 1.0
     gen_chars: int = 0
     gen_lines: int = 1000
+    predict_batch_size: int = 64
 
     # Checkpoint storage
     save_all_checkpoints: bool = False

diff --git a/src/gretel_synthetics/generate_parallel.py b/src/gretel_synthetics/generate_parallel.py
@@ -69,7 +69,7 @@ def generate_parallel(settings: Settings, num_lines: int, num_workers: int, chun
     remaining_lines = num_lines
 
     # This set tracks the currently outstanding invocations to _loky_worker_process_chunk.
-    pending_tasks: Set[futures.Future[Tuple[int, List[gen_text], int]]] = set()
+    pending_tasks: Set[futures.Future[Tuple[int, List[gen_text], int]]] = set()  # pylint: disable=unsubscriptable-object  # noqa
 
     # How many tasks can be pending at once. While a lower factor saves memory, it increases the
     # risk that workers sit idle because the main process is blocked on processing data and

diff --git a/src/gretel_synthetics/generator.py b/src/gretel_synthetics/generator.py
@@ -1,4 +1,4 @@
-from typing import TYPE_CHECKING, Callable, List, Iterable, Optional, Tuple
+from typing import TYPE_CHECKING, Callable, Generator as GeneratorType, List, Iterable, Optional, Tuple
 from dataclasses import dataclass, asdict
 from collections import namedtuple
 
@@ -41,7 +41,7 @@ def _load_model(
     store: LocalConfig,
 ) -> Tuple[spm.SentencePieceProcessor, tf.keras.Sequential]:
     sp = _load_tokenizer(store)
-    model = _prepare_model(sp, 1, store)
+    model = _prepare_model(sp, store.predict_batch_size, store)
     return sp, model
 
 
@@ -125,11 +125,13 @@ class Generator:
     delim: str
     total_invalid: int = 0
     total_generated: int = 0
+    _predictions: GeneratorType[PredString, None, None]
 
     def __init__(self, settings: Settings):
         self.settings = settings
         self.sp, self.model = _load_model(settings.config)
         self.delim = settings.config.field_delimiter
+        self._predictions = self._predict_forever()
 
     def generate_next(self, num_lines: int, hard_limit: Optional[int] = None) -> Iterable[gen_text]:
         """
@@ -148,7 +150,7 @@ def generate_next(self, num_lines: int, hard_limit: Optional[int] = None) -> Ite
         total_lines_generated = 0
 
         while valid_lines_generated < num_lines and (hard_limit is None or total_lines_generated < hard_limit):
-            rec = _predict_chars(self.model, self.sp, self.settings.start_string, self.settings.config).data
+            rec = next(self._predictions).data
             total_lines_generated += 1
             _valid = None
             try:
@@ -178,13 +180,30 @@ def generate_next(self, num_lines: int, hard_limit: Optional[int] = None) -> Ite
             if self.total_invalid > self.settings.max_invalid:
                 raise TooManyInvalidError("Maximum number of invalid lines reached!")
 
+    def _predict_forever(self) -> GeneratorType[PredString, None, None]:
+        """
+        Returns a generator infinitely producing prediction strings.
+
+        Returns:
+            A generator producing an infinite sequence of ``PredString``s.
+        """
+        @tf.function
+        def compiled_predict_and_sample(input_eval):
+            return _predict_and_sample(self.model, input_eval, self.settings.config.gen_temp)
+
+        while True:
+            yield from _predict_chars(
+                self.model, self.sp, self.settings.start_string, self.settings.config,
+                compiled_predict_and_sample)
+
 
 def _predict_chars(
     model: tf.keras.Sequential,
     sp: spm.SentencePieceProcessor,
     start_string: str,
     store: BaseConfig,
-) -> PredString:
+    predict_and_sample: Optional[Callable] = None,
+) -> GeneratorType[PredString, None, None]:
     """
     Evaluation step (generating text using the learned model).
 
@@ -198,36 +217,47 @@ def _predict_chars(
     """
 
     # Converting our start string to numbers (vectorizing)
-    input_eval = sp.EncodeAsIds(start_string)
-    input_eval = tf.expand_dims(input_eval, 0)
+    start_vec = sp.EncodeAsIds(start_string)
+    input_eval = tf.constant([start_vec for _ in range(store.predict_batch_size)])
 
-    # Here batch size == 1
-    model.reset_states()
+    if predict_and_sample is None:
+        def predict_and_sample(this_input):
+            return _predict_and_sample(model, this_input, store.gen_temp)
 
-    sentence_ids = []
+    # Batch prediction
+    batch_sentence_ids = [[] for _ in range(store.predict_batch_size)]
+    not_done = set(i for i in range(store.predict_batch_size))
 
-    while True:
-        predictions = model(input_eval)
-        # remove the batch dimension
-        predictions = tf.squeeze(predictions, 0)
-
-        # using a categorical distribution to
-        # predict the word returned by the model
-        predictions = predictions / store.gen_temp
-        predicted_id = tf.random.categorical(predictions, num_samples=1)[-1, 0].numpy()
+    model.reset_states()
 
-        # We pass the predicted word as the next input to the model
-        # along with the previous hidden state
-        input_eval = tf.expand_dims([predicted_id], 0)
-        sentence_ids.append(int(predicted_id))
+    while not_done:
+        input_eval = predict_and_sample(input_eval)
+        for i in not_done:
+            batch_sentence_ids[i].append(int(input_eval[i, 0].numpy()))
 
-        decoded = sp.DecodeIds(sentence_ids)
+        batch_decoded = [(i, sp.DecodeIds(batch_sentence_ids[i])) for i in not_done]
         if store.field_delimiter is not None:
-            decoded = decoded.replace(
+            batch_decoded = [(i, decoded.replace(
                 store.field_delimiter_token, store.field_delimiter
-            )
+            )) for i, decoded in batch_decoded]
+
+        for i, decoded in batch_decoded:
+            end_idx = decoded.find("<n>")
+            if end_idx >= 0:
+                decoded = decoded[:end_idx]
+                yield PredString(decoded)
+                not_done.remove(i)
+            elif 0 < store.gen_chars <= len(decoded):
+                yield PredString(decoded)
+                not_done.remove(i)
+
+
+def _predict_and_sample(model, input_eval, gen_temp):
+    predictions = model(input_eval)[:, -1, :]
+
+    # using a categorical distribution to
+    # predict the word returned by the model
+    predictions = predictions / gen_temp
+    predicted_ids = tf.random.categorical(predictions, num_samples=1)
 
-        if "<n>" in decoded:
-            return PredString(decoded.replace("<n>", ""))
-        elif 0 < store.gen_chars <= len(decoded):
-            return PredString(decoded)
+    return predicted_ids
diff --git a/tests/conftest.py b/tests/conftest.py
@@ -15,8 +15,10 @@ def global_local_config():
     if not target.exists():
         target.mkdir()
     config = LocalConfig(
-        checkpoint_dir=target.as_posix(), input_data_path=input_data.as_posix(),
-        field_delimiter=","
+        checkpoint_dir=target.as_posix(),
+        input_data_path=input_data.as_posix(),
+        field_delimiter=",",
+        predict_batch_size=1
     )
     _annotate_training_data(config)
     yield config

diff --git a/tests/test_config.py b/tests/test_config.py
@@ -56,6 +56,7 @@ def test_local_config_settings(mkdir):
         "field_delimiter_token": "<d>",
         "overwrite": False,
         "input_data_path": "bar",
+        "predict_batch_size": 64,
     }
 
 

diff --git a/tests/test_generate.py b/tests/test_generate.py
@@ -4,8 +4,8 @@
 import pytest
 import tensorflow as tf
 
-from gretel_synthetics.generator import _predict_chars
-from gretel_synthetics.generate import generate_text, PredString
+from gretel_synthetics.generator import _predict_chars, PredString
+from gretel_synthetics.generate import generate_text
 
 
 @pytest.fixture
@@ -14,30 +14,33 @@ def random_cat():
 
 
 @patch("tensorflow.random.categorical")
-@patch("tensorflow.expand_dims")
-def test_predict_chars(mock_dims, mock_cat, global_local_config, random_cat):
+def test_predict_chars(mock_cat, global_local_config, random_cat):
+    config = global_local_config
+
     global_local_config.gen_chars = 10
-    mock_model = Mock(return_value=[1.0])
+    mock_model = Mock(return_value=tf.constant([[[1.0]]]))
     mock_tensor = MagicMock()
     mock_tensor[-1, 0].numpy.return_value = 1
     mock_cat.return_value = mock_tensor
 
     sp = Mock()
+    sp.EncodeAsIds.return_value = [3]
     sp.DecodeIds.return_value = "this is the end<n>"
     # sp.DecodeIds.side_effect = ["this", " ", "is", " ", "the", " ", "end", "<n>"]
-
-    line = _predict_chars(mock_model, sp, "\n", global_local_config)
+    line = next(_predict_chars(mock_model, sp, "\n", config))
     assert line == PredString(data="this is the end")
-
+
+    config = global_local_config
     mock_tensor = MagicMock()
     mock_tensor[-1, 0].numpy.side_effect = [0, 1, 2, 3, 4, 5, 6, 7, 8]
     mock_cat.return_value = mock_tensor
     global_local_config.gen_chars = 3
     sp = Mock()
-    sp.DecodeIds.side_effect = ["a", "ab", "abc", "abcd"]
+    sp.EncodeAsIds.return_value = [3]
+    ret_data = [partial_rep for partial in ["a", "ab", "abc", "abcd"] for partial_rep in [partial] * config.predict_batch_size]
+    sp.DecodeIds.side_effect = ret_data
     # sp.DecodeIds.side_effect = ["a", "b", "c", "d"]
-    line = _predict_chars(mock_model, sp, "\n", global_local_config)
-    assert line.data == "abc"
+    line = next(_predict_chars(mock_model, sp, "\n", config))
 
 
 @patch("gretel_synthetics.generator.spm.SentencePieceProcessor")
@@ -47,7 +50,7 @@ def test_predict_chars(mock_dims, mock_cat, global_local_config, random_cat):
 @patch("gretel_synthetics.generate.open")
 def test_generate_text(_open, pickle, prepare, predict, spm, global_local_config):
     global_local_config.gen_lines = 10
-    predict.side_effect = [PredString(json.dumps({"foo": i})) for i in range(0, 10)]
+    predict.side_effect = [[PredString(json.dumps({"foo": i}))] for i in range(0, 10)]
     out = []
 
     sp = Mock()
@@ -65,7 +68,7 @@ def test_generate_text(_open, pickle, prepare, predict, spm, global_local_config
     }
 
     # now with no validator
-    predict.side_effect = [PredString(json.dumps({"foo": i})) for i in range(0, 10)]
+    predict.side_effect = [[PredString(json.dumps({"foo": i}))] for i in range(0, 10)]
     out = []
     for rec in generate_text(global_local_config, parallelism=1):
         out.append(rec.as_dict())
@@ -78,11 +81,11 @@ def test_generate_text(_open, pickle, prepare, predict, spm, global_local_config
     }
 
     # add validator back in, with a few bad json strings
-    predict.side_effect = (
-        [PredString(json.dumps({"foo": i})) for i in range(0, 3)]
-        + [PredString("nope"), PredString("foo"), PredString("bar")]
-        + [PredString(json.dumps({"foo": i})) for i in range(6, 10)]
-    )
+    predict.side_effect = [
+        [PredString(json.dumps({"foo": i})) for i in range(0, 3)],
+        [PredString("nope"), PredString("foo"), PredString("bar")],
+        [PredString(json.dumps({"foo": i})) for i in range(6, 10)],
+    ]
     out = []
     try:
         for rec in generate_text(global_local_config, line_validator=json.loads, parallelism=1):
@@ -93,11 +96,11 @@ def test_generate_text(_open, pickle, prepare, predict, spm, global_local_config
     assert not out[4]["valid"]
 
     # assert max invalid
-    predict.side_effect = (
-        [PredString(json.dumps({"foo": i})) for i in range(0, 3)]
-        + [PredString("nope"), PredString("foo"), PredString("bar")]
-        + [PredString(json.dumps({"foo": i})) for i in range(6, 10)]
-    )
+    predict.side_effect = [
+        [PredString(json.dumps({"foo": i})) for i in range(0, 3)],
+        [PredString("nope"), PredString("foo"), PredString("bar")],
+        [PredString(json.dumps({"foo": i})) for i in range(6, 10)],
+    ]
     out = []
     try:
         for rec in generate_text(global_local_config, line_validator=json.loads, max_invalid=2, parallelism=1):
@@ -116,11 +119,11 @@ def _val(line):
         else:
             return True
 
-    predict.side_effect = (
-        [PredString(json.dumps({"foo": i})) for i in range(0, 3)]
-        + [PredString("nope"), PredString("foo"), PredString("bar")]
-        + [PredString(json.dumps({"foo": i})) for i in range(6, 10)]
-    )
+    predict.side_effect = [
+        [PredString(json.dumps({"foo": i})) for i in range(0, 3)],
+        [PredString("nope"), PredString("foo"), PredString("bar")],
+        [PredString(json.dumps({"foo": i})) for i in range(6, 10)],
+    ]
     out = []
     try:
         for rec in generate_text(global_local_config, line_validator=_val, max_invalid=2, parallelism=1):