Skip to content

Commit

Permalink
clean up unused classifier_dropout in config (huggingface#20596)
Browse files Browse the repository at this point in the history
Co-authored-by: ydshieh <ydshieh@users.noreply.github.com>
  • Loading branch information
2 people authored and amyeroberts committed Dec 7, 2022
1 parent 9be5692 commit c020674
Show file tree
Hide file tree
Showing 6 changed files with 0 additions and 24 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -71,8 +71,6 @@ class BlenderbotConfig(PretrainedConfig):
The dropout ratio for the attention probabilities.
activation_dropout (`float`, *optional*, defaults to 0.0):
The dropout ratio for activations inside the fully connected layer.
classifier_dropout (`float`, *optional*, defaults to 0.0):
The dropout ratio for classifier.
max_position_embeddings (`int`, *optional*, defaults to 128):
The maximum sequence length that this model might ever be used with. Typically set this to something large
just in case (e.g., 512 or 1024 or 2048).
Expand Down Expand Up @@ -131,7 +129,6 @@ def __init__(
activation_dropout=0.0,
init_std=0.02,
decoder_start_token_id=1,
classifier_dropout=0.0,
scale_embedding=False,
pad_token_id=0,
bos_token_id=1,
Expand All @@ -156,7 +153,6 @@ def __init__(
self.init_std = init_std
self.encoder_layerdrop = encoder_layerdrop
self.decoder_layerdrop = decoder_layerdrop
self.classifier_dropout = classifier_dropout
self.use_cache = use_cache
self.num_hidden_layers = encoder_layers
self.scale_embedding = scale_embedding # scale factor will be sqrt(d_model) if True
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -71,8 +71,6 @@ class BlenderbotSmallConfig(PretrainedConfig):
The dropout ratio for the attention probabilities.
activation_dropout (`float`, *optional*, defaults to 0.0):
The dropout ratio for activations inside the fully connected layer.
classifier_dropout (`float`, *optional*, defaults to 0.0):
The dropout ratio for classifier.
max_position_embeddings (`int`, *optional*, defaults to 512):
The maximum sequence length that this model might ever be used with. Typically set this to something large
just in case (e.g., 512 or 1024 or 2048).
Expand Down Expand Up @@ -131,7 +129,6 @@ def __init__(
activation_dropout=0.0,
init_std=0.02,
decoder_start_token_id=1,
classifier_dropout=0.0,
scale_embedding=False,
pad_token_id=0,
bos_token_id=1,
Expand All @@ -155,7 +152,6 @@ def __init__(
self.init_std = init_std
self.encoder_layerdrop = encoder_layerdrop
self.decoder_layerdrop = decoder_layerdrop
self.classifier_dropout = classifier_dropout
self.use_cache = use_cache
self.num_hidden_layers = encoder_layers
self.scale_embedding = scale_embedding # scale factor will be sqrt(d_model) if True
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -87,8 +87,6 @@ class ChineseCLIPTextConfig(PretrainedConfig):
use_cache (`bool`, *optional*, defaults to `True`):
Whether or not the model should return the last key/values attentions (not used by all models). Only
relevant if `config.is_decoder=True`.
classifier_dropout (`float`, *optional*):
The dropout ratio for the classification head.
Example:
Expand Down Expand Up @@ -124,7 +122,6 @@ def __init__(
pad_token_id=0,
position_embedding_type="absolute",
use_cache=True,
classifier_dropout=None,
**kwargs
):
super().__init__(pad_token_id=pad_token_id, **kwargs)
Expand All @@ -144,7 +141,6 @@ def __init__(
self.layer_norm_eps = layer_norm_eps
self.position_embedding_type = position_embedding_type
self.use_cache = use_cache
self.classifier_dropout = classifier_dropout

@classmethod
def from_pretrained(cls, pretrained_model_name_or_path: Union[str, os.PathLike], **kwargs) -> "PretrainedConfig":
Expand Down
4 changes: 0 additions & 4 deletions src/transformers/models/marian/configuration_marian.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,8 +69,6 @@ class MarianConfig(PretrainedConfig):
The dropout ratio for the attention probabilities.
activation_dropout (`float`, *optional*, defaults to 0.0):
The dropout ratio for activations inside the fully connected layer.
classifier_dropout (`float`, *optional*, defaults to 0.0):
The dropout ratio for classifier.
max_position_embeddings (`int`, *optional*, defaults to 1024):
The maximum sequence length that this model might ever be used with. Typically set this to something large
just in case (e.g., 512 or 1024 or 2048).
Expand Down Expand Up @@ -130,7 +128,6 @@ def __init__(
activation_dropout=0.0,
init_std=0.02,
decoder_start_token_id=58100,
classifier_dropout=0.0,
scale_embedding=False,
pad_token_id=58100,
eos_token_id=0,
Expand All @@ -155,7 +152,6 @@ def __init__(
self.init_std = init_std
self.encoder_layerdrop = encoder_layerdrop
self.decoder_layerdrop = decoder_layerdrop
self.classifier_dropout = classifier_dropout
self.use_cache = use_cache
self.num_hidden_layers = encoder_layers
self.scale_embedding = scale_embedding # scale factor will be sqrt(d_model) if True
Expand Down
4 changes: 0 additions & 4 deletions src/transformers/models/pegasus/configuration_pegasus.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,8 +64,6 @@ class PegasusConfig(PretrainedConfig):
The dropout ratio for the attention probabilities.
activation_dropout (`float`, *optional*, defaults to 0.0):
The dropout ratio for activations inside the fully connected layer.
classifier_dropout (`float`, *optional*, defaults to 0.0):
The dropout ratio for classifier.
max_position_embeddings (`int`, *optional*, defaults to 1024):
The maximum sequence length that this model might ever be used with. Typically set this to something large
just in case (e.g., 512 or 1024 or 2048).
Expand Down Expand Up @@ -124,7 +122,6 @@ def __init__(
activation_dropout=0.0,
init_std=0.02,
decoder_start_token_id=0,
classifier_dropout=0.0,
scale_embedding=False,
pad_token_id=0,
eos_token_id=1,
Expand All @@ -147,7 +144,6 @@ def __init__(
self.init_std = init_std
self.encoder_layerdrop = encoder_layerdrop
self.decoder_layerdrop = decoder_layerdrop
self.classifier_dropout = classifier_dropout
self.use_cache = use_cache
self.num_hidden_layers = encoder_layers
self.scale_embedding = scale_embedding # scale factor will be sqrt(d_model) if True
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -60,8 +60,6 @@ class Speech2Text2Config(PretrainedConfig):
The dropout ratio for the attention probabilities.
activation_dropout (`float`, *optional*, defaults to 0.0):
The dropout ratio for activations inside the fully connected layer.
classifier_dropout (`float`, *optional*, defaults to 0.0):
The dropout ratio for classifier.
init_std (`float`, *optional*, defaults to 0.02):
The standard deviation of the truncated_normal_initializer for initializing all weight matrices.
https://arxiv.org/abs/1909.11556>`__ for more details.
Expand Down Expand Up @@ -109,7 +107,6 @@ def __init__(
activation_dropout=0.0,
init_std=0.02,
decoder_start_token_id=2,
classifier_dropout=0.0,
scale_embedding=True,
pad_token_id=1,
bos_token_id=0,
Expand All @@ -129,7 +126,6 @@ def __init__(
self.activation_function = activation_function
self.init_std = init_std
self.decoder_layerdrop = decoder_layerdrop
self.classifier_dropout = classifier_dropout
self.use_cache = use_cache
self.num_hidden_layers = decoder_layers
self.scale_embedding = scale_embedding # scale factor will be sqrt(d_model) if True
Expand Down

0 comments on commit c020674

Please sign in to comment.