huggingface · Rocketknight1 · Sep 3, 2025 · Aug 30, 2025 · Aug 30, 2025 · Aug 31, 2025
diff --git a/docs/source/en/model_doc/metaclip_2.md b/docs/source/en/model_doc/metaclip_2.md
@@ -32,7 +32,7 @@ MetaCLIP 2 is a replication of the original CLIP model trained on 300+ languages
 This model was contributed by [nielsr](https://huggingface.co/nielsr).
 The original code can be found [here](https://github.com/facebookresearch/MetaCLIP).
 
-You can find all the MetaCLIP 2 checkpoints under the [Meta](https://huggingface.co/facebook?search_models=metaclip-2) organization.
+You can find all the MetaCLIP 2 checkpoints under the [Meta](https://huggingface.co/facebook/models?search=metaclip-2) organization.
 
 > [!TIP]
 > Click on the MetaCLIP 2 models in the right sidebar for more examples of how to apply MetaCLIP 2 to different image and language tasks.

diff --git a/src/transformers/models/metaclip_2/configuration_metaclip_2.py b/src/transformers/models/metaclip_2/configuration_metaclip_2.py
@@ -14,18 +14,18 @@
 
 class MetaClip2TextConfig(PretrainedConfig):
     r"""
-    This is the configuration class to store the configuration of a [`MetaClip2TextModel`]. It is used to instantiate a METACLIP_2
-    text encoder according to the specified arguments, defining the model architecture. Instantiating a configuration
-    with the defaults will yield a similar configuration to that of the text encoder of the METACLIP_2
-    [openai/metaclip_2-vit-base-patch32](https://huggingface.co/openai/metaclip_2-vit-base-patch32) architecture.
+    This is the configuration class to store the configuration of a [`MetaClip2TextModel`]. It is used to instantiate
+    a MetaClip2 text encoder according to the specified arguments, defining the model architecture. Instantiating a
+    configuration with the defaults will yield a similar configuration to that of the MetaClip2
+    [facebook/metaclip-2-worldwide-huge-quickgelu](https://huggingface.co/facebook/metaclip-2-worldwide-huge-quickgelu) architecture.
 
     Configuration objects inherit from [`PretrainedConfig`] and can be used to control the model outputs. Read the
     documentation from [`PretrainedConfig`] for more information.
 
     Args:
         vocab_size (`int`, *optional*, defaults to 49408):
-            Vocabulary size of the METACLIP_2 text model. Defines the number of different tokens that can be represented by
-            the `inputs_ids` passed when calling [`MetaClip2Model`].
+            Vocabulary size of the MetaClip2 text model. Defines the number of different tokens that can be represented by
+            the `inputs_ids` passed when calling [`MetaClip2TextModel`].
         hidden_size (`int`, *optional*, defaults to 512):
             Dimensionality of the encoder layers and the pooler layer.
         intermediate_size (`int`, *optional*, defaults to 2048):
@@ -63,10 +63,10 @@ class MetaClip2TextConfig(PretrainedConfig):
     ```python
     >>> from transformers import MetaClip2TextConfig, MetaClip2TextModel
 
-    >>> # Initializing a MetaClip2TextConfig with openai/metaclip_2-vit-base-patch32 style configuration
+    >>> # Initializing a MetaClip2TextConfig with facebook/metaclip-2-worldwide-huge-quickgelu style configuration
     >>> configuration = MetaClip2TextConfig()
 
-    >>> # Initializing a MetaClip2TextModel (with random weights) from the openai/metaclip_2-vit-base-patch32 style configuration
+    >>> # Initializing a MetaClip2TextModel (with random weights) from the facebook/metaclip-2-worldwide-huge-quickgelu style configuration
     >>> model = MetaClip2TextModel(configuration)
 
     >>> # Accessing the model configuration
@@ -115,10 +115,10 @@ def __init__(
 
 class MetaClip2VisionConfig(PretrainedConfig):
     r"""
-    This is the configuration class to store the configuration of a [`MetaClip2VisionModel`]. It is used to instantiate a
-    METACLIP_2 vision encoder according to the specified arguments, defining the model architecture. Instantiating a
-    configuration with the defaults will yield a similar configuration to that of the vision encoder of the METACLIP_2
-    [openai/metaclip_2-vit-base-patch32](https://huggingface.co/openai/metaclip_2-vit-base-patch32) architecture.
+    This is the configuration class to store the configuration of a [`MetaClip2VisionModel`]. It is used to instantiate a MetaClip2
+    vision encoder according to the specified arguments, defining the model architecture. Instantiating a configuration
+    with the defaults will yield a similar configuration to that of the vision encoder of the MetaClip2
+    [facebook/metaclip-2-worldwide-huge-quickgelu](https://huggingface.co/facebook/metaclip-2-worldwide-huge-quickgelu) architecture.
 
     Configuration objects inherit from [`PretrainedConfig`] and can be used to control the model outputs. Read the
     documentation from [`PretrainedConfig`] for more information.
@@ -158,10 +158,10 @@ class MetaClip2VisionConfig(PretrainedConfig):
     ```python
     >>> from transformers import MetaClip2VisionConfig, MetaClip2VisionModel
 
-    >>> # Initializing a MetaClip2VisionConfig with openai/metaclip_2-vit-base-patch32 style configuration
+    >>> # Initializing a MetaClip2VisionConfig with facebook/metaclip-2-worldwide-huge-quickgelu style configuration
     >>> configuration = MetaClip2VisionConfig()
 
-    >>> # Initializing a MetaClip2VisionModel (with random weights) from the openai/metaclip_2-vit-base-patch32 style configuration
+    >>> # Initializing a MetaClip2VisionModel (with random weights) from the facebook/metaclip-2-worldwide-huge-quickgelu style configuration
     >>> model = MetaClip2VisionModel(configuration)
 
     >>> # Accessing the model configuration
@@ -207,10 +207,10 @@ def __init__(
 
 class MetaClip2Config(PretrainedConfig):
     r"""
-    [`MetaClip2Config`] is the configuration class to store the configuration of a [`MetaClip2Model`]. It is used to instantiate
-    a METACLIP_2 model according to the specified arguments, defining the text model and vision model configs. Instantiating
-    a configuration with the defaults will yield a similar configuration to that of the METACLIP_2
-    [openai/metaclip_2-vit-base-patch32](https://huggingface.co/openai/metaclip_2-vit-base-patch32) architecture.
+    [`MetaClip2Config`] is the configuration class to store the configuration of a [`MetaClip2Model`]. It is used to
+    instantiate a MetaClip2 model according to the specified arguments, defining the text model and vision model configs.
+    Instantiating a configuration with the defaults will yield a similar configuration to that of the MetaClip2
+    [facebook/metaclip-2-worldwide-huge-quickgelu](https://huggingface.co/facebook/metaclip-2-worldwide-huge-quickgelu) architecture.
 
     Configuration objects inherit from [`PretrainedConfig`] and can be used to control the model outputs. Read the
     documentation from [`PretrainedConfig`] for more information.
@@ -223,7 +223,7 @@ class MetaClip2Config(PretrainedConfig):
         projection_dim (`int`, *optional*, defaults to 512):
             Dimensionality of text and vision projection layers.
         logit_scale_init_value (`float`, *optional*, defaults to 2.6592):
-            The initial value of the *logit_scale* parameter. Default is used as per the original METACLIP_2 implementation.
+            The initial value of the *logit_scale* parameter. Default is used as per the original MetaClip2 implementation.
         kwargs (*optional*):
             Dictionary of keyword arguments.
 
@@ -232,10 +232,10 @@ class MetaClip2Config(PretrainedConfig):
     ```python
     >>> from transformers import MetaClip2Config, MetaClip2Model
 
-    >>> # Initializing a MetaClip2Config with openai/metaclip_2-vit-base-patch32 style configuration
+    >>> # Initializing a MetaClip2Config with facebook/metaclip-2-worldwide-huge-quickgelu style configuration
     >>> configuration = MetaClip2Config()
 
-    >>> # Initializing a MetaClip2Model (with random weights) from the openai/metaclip_2-vit-base-patch32 style configuration
+    >>> # Initializing a MetaClip2Model (with random weights) from the facebook/metaclip-2-worldwide-huge-quickgelu style configuration
     >>> model = MetaClip2Model(configuration)
 
     >>> # Accessing the model configuration