From d99d89f0163c47985b046dfb929d5a623afa852b Mon Sep 17 00:00:00 2001 From: younesbelkada Date: Wed, 5 Oct 2022 08:58:49 +0000 Subject: [PATCH 1/4] change `BloomConfig` docstring - slightly change the docstring of the `BloomConfig` - Use correct default vocab size - Use correct default `hidden_dim`, `n_head` --- src/transformers/models/bloom/configuration_bloom.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/src/transformers/models/bloom/configuration_bloom.py b/src/transformers/models/bloom/configuration_bloom.py index 1103a8148ae1e..d20d9e337feef 100644 --- a/src/transformers/models/bloom/configuration_bloom.py +++ b/src/transformers/models/bloom/configuration_bloom.py @@ -53,14 +53,16 @@ class BloomConfig(PretrainedConfig): Args: - vocab_size (`int`, *optional*, defaults to 50257): + vocab_size (`int`, *optional*, defaults to 250880): Vocabulary size of the Bloom model. Defines the number of different tokens that can be represented by the - `inputs_ids` passed when calling [`BloomModel`]. - hidden_size (`int`, *optional*, defaults to 768): + `inputs_ids` passed when calling [`BloomModel`]. Check + https://huggingface.co/bigscience/bloom/discussions/120#633d28389addb8530b406c2a on how the `vocab_size` + has been defined. + hidden_size (`int`, *optional*, defaults to 64): Dimensionality of the embeddings and hidden states. - n_layer (`int`, *optional*, defaults to 12): + n_layer (`int`, *optional*, defaults to 2): Number of hidden layers in the Transformer encoder. - n_head (`int`, *optional*, defaults to 12): + n_head (`int`, *optional*, defaults to 8): Number of attention heads for each attention layer in the Transformer encoder. layer_norm_epsilon (`float`, *optional*, defaults to 1e-5): The epsilon to use in the layer normalization layers. From 671ae11eb900ee6b0cd6bf2e8669f391c35d2fc7 Mon Sep 17 00:00:00 2001 From: Younes Belkada <49240599+younesbelkada@users.noreply.github.com> Date: Wed, 5 Oct 2022 15:48:02 +0200 Subject: [PATCH 2/4] Update src/transformers/models/bloom/configuration_bloom.py Co-authored-by: Sylvain Gugger <35901082+sgugger@users.noreply.github.com> --- src/transformers/models/bloom/configuration_bloom.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/transformers/models/bloom/configuration_bloom.py b/src/transformers/models/bloom/configuration_bloom.py index d20d9e337feef..502c5e9d35c25 100644 --- a/src/transformers/models/bloom/configuration_bloom.py +++ b/src/transformers/models/bloom/configuration_bloom.py @@ -55,8 +55,7 @@ class BloomConfig(PretrainedConfig): Args: vocab_size (`int`, *optional*, defaults to 250880): Vocabulary size of the Bloom model. Defines the number of different tokens that can be represented by the - `inputs_ids` passed when calling [`BloomModel`]. Check - https://huggingface.co/bigscience/bloom/discussions/120#633d28389addb8530b406c2a on how the `vocab_size` + `inputs_ids` passed when calling [`BloomModel`]. Check [this discussion](https://huggingface.co/bigscience/bloom/discussions/120#633d28389addb8530b406c2a) on how the `vocab_size` has been defined. hidden_size (`int`, *optional*, defaults to 64): Dimensionality of the embeddings and hidden states. From b3478f03dcf11bce9845ff413efb8159d0b1cff5 Mon Sep 17 00:00:00 2001 From: Younes Belkada <49240599+younesbelkada@users.noreply.github.com> Date: Wed, 5 Oct 2022 15:48:26 +0200 Subject: [PATCH 3/4] Update src/transformers/models/bloom/configuration_bloom.py Co-authored-by: SaulLu <55560583+SaulLu@users.noreply.github.com> --- src/transformers/models/bloom/configuration_bloom.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/transformers/models/bloom/configuration_bloom.py b/src/transformers/models/bloom/configuration_bloom.py index 502c5e9d35c25..44a65153f1d8d 100644 --- a/src/transformers/models/bloom/configuration_bloom.py +++ b/src/transformers/models/bloom/configuration_bloom.py @@ -54,7 +54,7 @@ class BloomConfig(PretrainedConfig): Args: vocab_size (`int`, *optional*, defaults to 250880): - Vocabulary size of the Bloom model. Defines the number of different tokens that can be represented by the + Vocabulary size of the Bloom model. Defines the maximum number of different tokens that can be represented by the `inputs_ids` passed when calling [`BloomModel`]. Check [this discussion](https://huggingface.co/bigscience/bloom/discussions/120#633d28389addb8530b406c2a) on how the `vocab_size` has been defined. hidden_size (`int`, *optional*, defaults to 64): From 137f15654ddd5236df19eaa6f0643320b88aa346 Mon Sep 17 00:00:00 2001 From: younesbelkada Date: Wed, 5 Oct 2022 16:14:45 +0100 Subject: [PATCH 4/4] make style --- src/transformers/models/bloom/configuration_bloom.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/transformers/models/bloom/configuration_bloom.py b/src/transformers/models/bloom/configuration_bloom.py index 44a65153f1d8d..4f973d93ae48a 100644 --- a/src/transformers/models/bloom/configuration_bloom.py +++ b/src/transformers/models/bloom/configuration_bloom.py @@ -54,9 +54,10 @@ class BloomConfig(PretrainedConfig): Args: vocab_size (`int`, *optional*, defaults to 250880): - Vocabulary size of the Bloom model. Defines the maximum number of different tokens that can be represented by the - `inputs_ids` passed when calling [`BloomModel`]. Check [this discussion](https://huggingface.co/bigscience/bloom/discussions/120#633d28389addb8530b406c2a) on how the `vocab_size` - has been defined. + Vocabulary size of the Bloom model. Defines the maximum number of different tokens that can be represented + by the `inputs_ids` passed when calling [`BloomModel`]. Check [this + discussion](https://huggingface.co/bigscience/bloom/discussions/120#633d28389addb8530b406c2a) on how the + `vocab_size` has been defined. hidden_size (`int`, *optional*, defaults to 64): Dimensionality of the embeddings and hidden states. n_layer (`int`, *optional*, defaults to 2):