From e38e00d339bfa13994f62f5e72514000524dbccb Mon Sep 17 00:00:00 2001 From: Hosein Mohebbi Date: Thu, 25 Feb 2021 18:29:28 +0330 Subject: [PATCH] fixes #1 near-zero initialization --- modeling_tf_adapter_bert.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/modeling_tf_adapter_bert.py b/modeling_tf_adapter_bert.py index 5ad4b96..6f9bd2b 100644 --- a/modeling_tf_adapter_bert.py +++ b/modeling_tf_adapter_bert.py @@ -20,13 +20,13 @@ def __init__(self, input_size, bottleneck_size, non_linearity, *inputs, **kwargs self.down_project = tf.keras.layers.Dense( bottleneck_size, - kernel_initializer=tf.keras.initializers.TruncatedNormal(stddev=0.02), + kernel_initializer=tf.keras.initializers.TruncatedNormal(stddev=1e-3), bias_initializer="zeros", name="feedforward_downproject") self.up_project = tf.keras.layers.Dense( input_size, - kernel_initializer=tf.keras.initializers.TruncatedNormal(stddev=0.02), + kernel_initializer=tf.keras.initializers.TruncatedNormal(stddev=1e-3), bias_initializer="zeros", name="feedforward_upproject")