huggingface · younesbelkada · Oct 5, 2022 · Oct 5, 2022 · Oct 5, 2022 · Oct 5, 2022
diff --git a/src/transformers/models/bloom/configuration_bloom.py b/src/transformers/models/bloom/configuration_bloom.py
@@ -53,14 +53,16 @@ class BloomConfig(PretrainedConfig):
 
 
     Args:
-        vocab_size (`int`, *optional*, defaults to 50257):
+        vocab_size (`int`, *optional*, defaults to 250880):
             Vocabulary size of the Bloom model. Defines the number of different tokens that can be represented by the
-            `inputs_ids` passed when calling [`BloomModel`].
-        hidden_size (`int`, *optional*, defaults to 768):
+            `inputs_ids` passed when calling [`BloomModel`]. Check
+            https://huggingface.co/bigscience/bloom/discussions/120#633d28389addb8530b406c2a on how the `vocab_size`
+            has been defined.
+        hidden_size (`int`, *optional*, defaults to 64):
             Dimensionality of the embeddings and hidden states.
-        n_layer (`int`, *optional*, defaults to 12):
+        n_layer (`int`, *optional*, defaults to 2):
             Number of hidden layers in the Transformer encoder.
-        n_head (`int`, *optional*, defaults to 12):
+        n_head (`int`, *optional*, defaults to 8):
             Number of attention heads for each attention layer in the Transformer encoder.
         layer_norm_epsilon (`float`, *optional*, defaults to 1e-5):
             The epsilon to use in the layer normalization layers.