From d99d89f0163c47985b046dfb929d5a623afa852b Mon Sep 17 00:00:00 2001
From: younesbelkada <younesbelkada@gmail.com>
Date: Wed, 5 Oct 2022 08:58:49 +0000
Subject: [PATCH 1/4] change `BloomConfig` docstring

- slightly change the docstring of the `BloomConfig`
- Use correct default vocab size
- Use correct default `hidden_dim`, `n_head`
---
 src/transformers/models/bloom/configuration_bloom.py | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/src/transformers/models/bloom/configuration_bloom.py b/src/transformers/models/bloom/configuration_bloom.py
index 1103a8148ae1e..d20d9e337feef 100644
--- a/src/transformers/models/bloom/configuration_bloom.py
+++ b/src/transformers/models/bloom/configuration_bloom.py
@@ -53,14 +53,16 @@ class BloomConfig(PretrainedConfig):
 
 
     Args:
-        vocab_size (`int`, *optional*, defaults to 50257):
+        vocab_size (`int`, *optional*, defaults to 250880):
             Vocabulary size of the Bloom model. Defines the number of different tokens that can be represented by the
-            `inputs_ids` passed when calling [`BloomModel`].
-        hidden_size (`int`, *optional*, defaults to 768):
+            `inputs_ids` passed when calling [`BloomModel`]. Check
+            https://huggingface.co/bigscience/bloom/discussions/120#633d28389addb8530b406c2a on how the `vocab_size`
+            has been defined.
+        hidden_size (`int`, *optional*, defaults to 64):
             Dimensionality of the embeddings and hidden states.
-        n_layer (`int`, *optional*, defaults to 12):
+        n_layer (`int`, *optional*, defaults to 2):
             Number of hidden layers in the Transformer encoder.
-        n_head (`int`, *optional*, defaults to 12):
+        n_head (`int`, *optional*, defaults to 8):
             Number of attention heads for each attention layer in the Transformer encoder.
         layer_norm_epsilon (`float`, *optional*, defaults to 1e-5):
             The epsilon to use in the layer normalization layers.

From 671ae11eb900ee6b0cd6bf2e8669f391c35d2fc7 Mon Sep 17 00:00:00 2001
From: Younes Belkada <49240599+younesbelkada@users.noreply.github.com>
Date: Wed, 5 Oct 2022 15:48:02 +0200
Subject: [PATCH 2/4] Update
 src/transformers/models/bloom/configuration_bloom.py

Co-authored-by: Sylvain Gugger <35901082+sgugger@users.noreply.github.com>
---
 src/transformers/models/bloom/configuration_bloom.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/src/transformers/models/bloom/configuration_bloom.py b/src/transformers/models/bloom/configuration_bloom.py
index d20d9e337feef..502c5e9d35c25 100644
--- a/src/transformers/models/bloom/configuration_bloom.py
+++ b/src/transformers/models/bloom/configuration_bloom.py
@@ -55,8 +55,7 @@ class BloomConfig(PretrainedConfig):
     Args:
         vocab_size (`int`, *optional*, defaults to 250880):
             Vocabulary size of the Bloom model. Defines the number of different tokens that can be represented by the
-            `inputs_ids` passed when calling [`BloomModel`]. Check
-            https://huggingface.co/bigscience/bloom/discussions/120#633d28389addb8530b406c2a on how the `vocab_size`
+            `inputs_ids` passed when calling [`BloomModel`]. Check [this discussion](https://huggingface.co/bigscience/bloom/discussions/120#633d28389addb8530b406c2a) on how the `vocab_size`
             has been defined.
         hidden_size (`int`, *optional*, defaults to 64):
             Dimensionality of the embeddings and hidden states.

From b3478f03dcf11bce9845ff413efb8159d0b1cff5 Mon Sep 17 00:00:00 2001
From: Younes Belkada <49240599+younesbelkada@users.noreply.github.com>
Date: Wed, 5 Oct 2022 15:48:26 +0200
Subject: [PATCH 3/4] Update
 src/transformers/models/bloom/configuration_bloom.py

Co-authored-by: SaulLu <55560583+SaulLu@users.noreply.github.com>
---
 src/transformers/models/bloom/configuration_bloom.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/transformers/models/bloom/configuration_bloom.py b/src/transformers/models/bloom/configuration_bloom.py
index 502c5e9d35c25..44a65153f1d8d 100644
--- a/src/transformers/models/bloom/configuration_bloom.py
+++ b/src/transformers/models/bloom/configuration_bloom.py
@@ -54,7 +54,7 @@ class BloomConfig(PretrainedConfig):
 
     Args:
         vocab_size (`int`, *optional*, defaults to 250880):
-            Vocabulary size of the Bloom model. Defines the number of different tokens that can be represented by the
+            Vocabulary size of the Bloom model. Defines the maximum number of different tokens that can be represented by the
             `inputs_ids` passed when calling [`BloomModel`]. Check [this discussion](https://huggingface.co/bigscience/bloom/discussions/120#633d28389addb8530b406c2a) on how the `vocab_size`
             has been defined.
         hidden_size (`int`, *optional*, defaults to 64):

From 137f15654ddd5236df19eaa6f0643320b88aa346 Mon Sep 17 00:00:00 2001
From: younesbelkada <younesbelkada@gmail.com>
Date: Wed, 5 Oct 2022 16:14:45 +0100
Subject: [PATCH 4/4] make style

---
 src/transformers/models/bloom/configuration_bloom.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/src/transformers/models/bloom/configuration_bloom.py b/src/transformers/models/bloom/configuration_bloom.py
index 44a65153f1d8d..4f973d93ae48a 100644
--- a/src/transformers/models/bloom/configuration_bloom.py
+++ b/src/transformers/models/bloom/configuration_bloom.py
@@ -54,9 +54,10 @@ class BloomConfig(PretrainedConfig):
 
     Args:
         vocab_size (`int`, *optional*, defaults to 250880):
-            Vocabulary size of the Bloom model. Defines the maximum number of different tokens that can be represented by the
-            `inputs_ids` passed when calling [`BloomModel`]. Check [this discussion](https://huggingface.co/bigscience/bloom/discussions/120#633d28389addb8530b406c2a) on how the `vocab_size`
-            has been defined.
+            Vocabulary size of the Bloom model. Defines the maximum number of different tokens that can be represented
+            by the `inputs_ids` passed when calling [`BloomModel`]. Check [this
+            discussion](https://huggingface.co/bigscience/bloom/discussions/120#633d28389addb8530b406c2a) on how the
+            `vocab_size` has been defined.
         hidden_size (`int`, *optional*, defaults to 64):
             Dimensionality of the embeddings and hidden states.
         n_layer (`int`, *optional*, defaults to 2):