PROD-259: Update dp_microbatches default value to 1

* updated default to dp_microbatches=1 and removed check for even division of batch size * update test model params dp_microbatches=1 * update config test param to dp_microbatches=1 * add microbatch warning to user log * smh style GitOrigin-RevId: c4f0ad87608528aed02a6366ef8f9f37c428ca9f
gretelai · Feb 22, 2023 · 0a80292 · 0a80292
1 parent ca7ad9a
commit 0a80292
Show file tree

Hide file tree

Showing 3 changed files with 7 additions and 10 deletions.
diff --git a/src/gretel_synthetics/config.py b/src/gretel_synthetics/config.py
@@ -224,7 +224,7 @@ class TensorFlowConfig(BaseConfig):
         dp_microbatches (optional): Each batch of data is split into smaller units called micro-batches.
             Computational overhead can be reduced by increasing the size of micro-batches to include
             more than one training example. The number of micro-batches should divide evenly into
-            the overall ``batch_size``. Default is ``64``.
+            the overall ``batch_size``. Default is ``1``.
         gen_temp (optional): Controls the randomness of predictions by scaling the logits before
             applying softmax. Low temperatures result in more predictable text. Higher temperatures
             result in more surprising text. Experiment to find the best setting. Default is ``1.0``.
@@ -263,7 +263,7 @@ class TensorFlowConfig(BaseConfig):
     dp: bool = False
     dp_noise_multiplier: float = 0.1
     dp_l2_norm_clip: float = 3.0
-    dp_microbatches: int = 64
+    dp_microbatches: int = 1
 
     # Generation settings
     gen_temp: float = 1.0
@@ -284,18 +284,15 @@ def __post_init__(self):
                     "Running in differential privacy mode requires TensorFlow 2.4.x or greater. "
                     "Please see the README for details"
                 )
-            if self.batch_size % self.dp_microbatches != 0:
-                raise ValueError(
-                    "Number of dp_microbatches should divide evenly into batch_size"
-                )
 
             # TODO: To enable micro-batch size greater than 1, we need to update the differential privacy
             #  optimizer loss function to compute the vector of per-example losses, rather than the mean
             #  over a mini-batch.
             if self.dp_microbatches != 1:
                 logging.warning(
                     "***** Currently only a differential privacy micro-batch size of 1 is supported. "
-                    "Setting micro-batch size to 1. *****"
+                    "Setting micro-batch size to 1. *****",
+                    extra={"user_log": True},
                 )
                 self.dp_microbatches = 1
 

diff --git a/tests/data/0.14.x/dp-model/model_params.json b/tests/data/0.14.x/dp-model/model_params.json
@@ -21,7 +21,7 @@
   "dp_learning_rate": 0.001,
   "dp_noise_multiplier": 1.1,
   "dp_l2_norm_clip": 1.0,
-  "dp_microbatches": 64,
+  "dp_microbatches": 1,
   "gen_temp": 1.0,
   "gen_chars": 0,
   "gen_lines": 1000,
@@ -31,4 +31,4 @@
   "overwrite": false,
   "checkpoint_dir": "/Users/mi/gretel/gretel-synthetics/tests/ckpoint",
   "input_data_path": "/Users/mi/gretel/gretel-synthetics/tests/data/smol.txt"
-}
+}
diff --git a/tests/tensorflow/test_tf_config.py b/tests/tensorflow/test_tf_config.py
@@ -53,7 +53,7 @@ def test_local_config_settings(mkdir):
         "learning_rate": 0.01,
         "dp_noise_multiplier": 0.1,
         "dp_l2_norm_clip": 3.0,
-        "dp_microbatches": 64,
+        "dp_microbatches": 1,
         "gen_temp": 1.0,
         "gen_chars": 0,
         "gen_lines": 1000,