Skip to content

Commit

Permalink
PROD-259: Update dp_microbatches default value to 1
Browse files Browse the repository at this point in the history
* updated default to dp_microbatches=1 and removed check for even division of batch size

* update test model params dp_microbatches=1

* update config test param to dp_microbatches=1

* add microbatch warning to user log

* smh style

GitOrigin-RevId: c4f0ad87608528aed02a6366ef8f9f37c428ca9f
  • Loading branch information
lipikaramaswamy committed Feb 22, 2023
1 parent ca7ad9a commit 0a80292
Show file tree
Hide file tree
Showing 3 changed files with 7 additions and 10 deletions.
11 changes: 4 additions & 7 deletions src/gretel_synthetics/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -224,7 +224,7 @@ class TensorFlowConfig(BaseConfig):
dp_microbatches (optional): Each batch of data is split into smaller units called micro-batches.
Computational overhead can be reduced by increasing the size of micro-batches to include
more than one training example. The number of micro-batches should divide evenly into
the overall ``batch_size``. Default is ``64``.
the overall ``batch_size``. Default is ``1``.
gen_temp (optional): Controls the randomness of predictions by scaling the logits before
applying softmax. Low temperatures result in more predictable text. Higher temperatures
result in more surprising text. Experiment to find the best setting. Default is ``1.0``.
Expand Down Expand Up @@ -263,7 +263,7 @@ class TensorFlowConfig(BaseConfig):
dp: bool = False
dp_noise_multiplier: float = 0.1
dp_l2_norm_clip: float = 3.0
dp_microbatches: int = 64
dp_microbatches: int = 1

# Generation settings
gen_temp: float = 1.0
Expand All @@ -284,18 +284,15 @@ def __post_init__(self):
"Running in differential privacy mode requires TensorFlow 2.4.x or greater. "
"Please see the README for details"
)
if self.batch_size % self.dp_microbatches != 0:
raise ValueError(
"Number of dp_microbatches should divide evenly into batch_size"
)

# TODO: To enable micro-batch size greater than 1, we need to update the differential privacy
# optimizer loss function to compute the vector of per-example losses, rather than the mean
# over a mini-batch.
if self.dp_microbatches != 1:
logging.warning(
"***** Currently only a differential privacy micro-batch size of 1 is supported. "
"Setting micro-batch size to 1. *****"
"Setting micro-batch size to 1. *****",
extra={"user_log": True},
)
self.dp_microbatches = 1

Expand Down
4 changes: 2 additions & 2 deletions tests/data/0.14.x/dp-model/model_params.json
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
"dp_learning_rate": 0.001,
"dp_noise_multiplier": 1.1,
"dp_l2_norm_clip": 1.0,
"dp_microbatches": 64,
"dp_microbatches": 1,
"gen_temp": 1.0,
"gen_chars": 0,
"gen_lines": 1000,
Expand All @@ -31,4 +31,4 @@
"overwrite": false,
"checkpoint_dir": "/Users/mi/gretel/gretel-synthetics/tests/ckpoint",
"input_data_path": "/Users/mi/gretel/gretel-synthetics/tests/data/smol.txt"
}
}
2 changes: 1 addition & 1 deletion tests/tensorflow/test_tf_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ def test_local_config_settings(mkdir):
"learning_rate": 0.01,
"dp_noise_multiplier": 0.1,
"dp_l2_norm_clip": 3.0,
"dp_microbatches": 64,
"dp_microbatches": 1,
"gen_temp": 1.0,
"gen_chars": 0,
"gen_lines": 1000,
Expand Down

0 comments on commit 0a80292

Please sign in to comment.