From 4f8983e94b8f9121c9980a32aa1b3577db3cc7f5 Mon Sep 17 00:00:00 2001 From: Joshua Lochner <26504141+xenova@users.noreply.github.com> Date: Mon, 17 Nov 2025 20:45:43 -0500 Subject: [PATCH 1/5] Add SAM2 support --- src/models.js | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/src/models.js b/src/models.js index 7d31e2ed3..0fbb5278d 100644 --- a/src/models.js +++ b/src/models.js @@ -6083,13 +6083,8 @@ export class Sam2ImageSegmentationOutput extends ModelOutput { } } -export class EdgeTamPreTrainedModel extends PreTrainedModel { } - -/** - * EdgeTAM for generating segmentation masks, given an input image - * and optional 2D location and bounding boxes. - */ -export class EdgeTamModel extends EdgeTamPreTrainedModel { +export class Sam2PreTrainedModel extends PreTrainedModel { } +export class Sam2Model extends Sam2PreTrainedModel { /** * Compute image embeddings and positional image embeddings, given the pixel values of an image. @@ -6159,6 +6154,7 @@ export class EdgeTamModel extends EdgeTamPreTrainedModel { return new Sam2ImageSegmentationOutput(await super._call(model_inputs)); } } +export class EdgeTamModel extends Sam2Model { } // NOTE: extends Sam2Model ////////////////////////////////////////////////// @@ -8242,6 +8238,7 @@ const MODEL_FOR_UNIVERSAL_SEGMENTATION_MAPPING_NAMES = new Map([ const MODEL_FOR_MASK_GENERATION_MAPPING_NAMES = new Map([ ['sam', ['SamModel', SamModel]], + ['sam2', ['Sam2Model', Sam2Model]], ['edgetam', ['EdgeTamModel', EdgeTamModel]], ]); From 22ceb46dacf316aa20e8a96aad48afea0837978e Mon Sep 17 00:00:00 2001 From: Joshua Lochner <26504141+xenova@users.noreply.github.com> Date: Mon, 17 Nov 2025 20:45:52 -0500 Subject: [PATCH 2/5] Update supported models list --- README.md | 2 ++ docs/snippets/6_supported-models.snippet | 2 ++ 2 files changed, 4 insertions(+) diff --git a/README.md b/README.md index cd84d99fd..b1685b09a 100644 --- a/README.md +++ b/README.md @@ -323,6 +323,7 @@ You can refine your search by selecting the task you're interested in (e.g., [te 1. **[DiT](https://huggingface.co/docs/transformers/model_doc/dit)** (from Microsoft Research) released with the paper [DiT: Self-supervised Pre-training for Document Image Transformer](https://huggingface.co/papers/2203.02378) by Junlong Li, Yiheng Xu, Tengchao Lv, Lei Cui, Cha Zhang, Furu Wei. 1. **[Donut](https://huggingface.co/docs/transformers/model_doc/donut)** (from NAVER), released together with the paper [OCR-free Document Understanding Transformer](https://huggingface.co/papers/2111.15664) by Geewook Kim, Teakgyu Hong, Moonbin Yim, Jeongyeon Nam, Jinyoung Park, Jinyeong Yim, Wonseok Hwang, Sangdoo Yun, Dongyoon Han, Seunghyun Park. 1. **[DPT](https://huggingface.co/docs/transformers/master/model_doc/dpt)** (from Intel Labs) released with the paper [Vision Transformers for Dense Prediction](https://huggingface.co/papers/2103.13413) by René Ranftl, Alexey Bochkovskiy, Vladlen Koltun. +1. **[EdgeTAM](https://huggingface.co/docs/transformers/model_doc/edgetam)** (from Facebook) released with the paper [EdgeTAM: On-Device Track Anything Model](https://huggingface.co/papers/2501.07256) by Chong Zhou, Chenchen Zhu, Yunyang Xiong, Saksham Suri, Fanyi Xiao, Lemeng Wu, Raghuraman Krishnamoorthi, Bo Dai, Chen Change Loy, Vikas Chandra, Bilge Soran. 1. **[EfficientNet](https://huggingface.co/docs/transformers/model_doc/efficientnet)** (from Google Brain) released with the paper [EfficientNet: Rethinking Model Scaling for Convolutional Neural Networks](https://huggingface.co/papers/1905.11946) by Mingxing Tan, Quoc V. Le. 1. **[ELECTRA](https://huggingface.co/docs/transformers/model_doc/electra)** (from Google Research/Stanford University) released with the paper [ELECTRA: Pre-training text encoders as discriminators rather than generators](https://huggingface.co/papers/2003.10555) by Kevin Clark, Minh-Thang Luong, Quoc V. Le, Christopher D. Manning. 1. **ERNIE-4.5** (from Baidu ERNIE Team) released with the blog post [Announcing the Open Source Release of the ERNIE 4.5 Model Family](https://ernie.baidu.com/blog/posts/ernie4.5/) by the Baidu ERNIE Team. @@ -424,6 +425,7 @@ You can refine your search by selecting the task you're interested in (e.g., [te 1. **Sapiens** (from Meta AI) released with the paper [Sapiens: Foundation for Human Vision Models](https://huggingface.co/papers/2408.12569) by Rawal Khirodkar, Timur Bagautdinov, Julieta Martinez, Su Zhaoen, Austin James, Peter Selednik, Stuart Anderson, Shunsuke Saito. 1. **[SegFormer](https://huggingface.co/docs/transformers/model_doc/segformer)** (from NVIDIA) released with the paper [SegFormer: Simple and Efficient Design for Semantic Segmentation with Transformers](https://huggingface.co/papers/2105.15203) by Enze Xie, Wenhai Wang, Zhiding Yu, Anima Anandkumar, Jose M. Alvarez, Ping Luo. 1. **[Segment Anything](https://huggingface.co/docs/transformers/model_doc/sam)** (from Meta AI) released with the paper [Segment Anything](https://huggingface.co/papers/2304.02643v1.pdf) by Alexander Kirillov, Eric Mintun, Nikhila Ravi, Hanzi Mao, Chloe Rolland, Laura Gustafson, Tete Xiao, Spencer Whitehead, Alex Berg, Wan-Yen Lo, Piotr Dollar, Ross Girshick. +1. **[Segment Anything 2](https://huggingface.co/docs/transformers/model_doc/sam2)** (from Meta AI) released with the paper [SAM 2: Segment Anything in Images and Videos](https://huggingface.co/papers/2408.00714) by Nikhila Ravi, Valentin Gabeur, Yuan-Ting Hu, Ronghang Hu, Chaitanya Ryali, Tengyu Ma, Haitham Khedr, Roman Rädle, Chloe Rolland, Laura Gustafson, Eric Mintun, Junting Pan, Kalyan Vasudev Alwala, Nicolas Carion, Chao-Yuan Wu, Ross Girshick, Piotr Dollár, Christoph Feichtenhofer. 1. **[SigLIP](https://huggingface.co/docs/transformers/main/model_doc/siglip)** (from Google AI) released with the paper [Sigmoid Loss for Language Image Pre-Training](https://huggingface.co/papers/2303.15343) by Xiaohua Zhai, Basil Mustafa, Alexander Kolesnikov, Lucas Beyer. 1. **[SmolLM3](https://huggingface.co/docs/transformers/main/model_doc/smollm3) (from Hugging Face) released with the blog post [SmolLM3: smol, multilingual, long-context reasoner](https://huggingface.co/blog/smollm3) by the Hugging Face TB Research team. 1. **[SmolVLM](https://huggingface.co/docs/transformers/main/model_doc/smolvlm) (from Hugging Face) released with the blog posts [SmolVLM - small yet mighty Vision Language Model](https://huggingface.co/blog/smolvlm) and [SmolVLM Grows Smaller – Introducing the 250M & 500M Models!](https://huggingface.co/blog/smolervlm) by the Hugging Face TB Research team. diff --git a/docs/snippets/6_supported-models.snippet b/docs/snippets/6_supported-models.snippet index 728f39803..6af87653f 100644 --- a/docs/snippets/6_supported-models.snippet +++ b/docs/snippets/6_supported-models.snippet @@ -37,6 +37,7 @@ 1. **[DiT](https://huggingface.co/docs/transformers/model_doc/dit)** (from Microsoft Research) released with the paper [DiT: Self-supervised Pre-training for Document Image Transformer](https://huggingface.co/papers/2203.02378) by Junlong Li, Yiheng Xu, Tengchao Lv, Lei Cui, Cha Zhang, Furu Wei. 1. **[Donut](https://huggingface.co/docs/transformers/model_doc/donut)** (from NAVER), released together with the paper [OCR-free Document Understanding Transformer](https://huggingface.co/papers/2111.15664) by Geewook Kim, Teakgyu Hong, Moonbin Yim, Jeongyeon Nam, Jinyoung Park, Jinyeong Yim, Wonseok Hwang, Sangdoo Yun, Dongyoon Han, Seunghyun Park. 1. **[DPT](https://huggingface.co/docs/transformers/master/model_doc/dpt)** (from Intel Labs) released with the paper [Vision Transformers for Dense Prediction](https://huggingface.co/papers/2103.13413) by René Ranftl, Alexey Bochkovskiy, Vladlen Koltun. +1. **[EdgeTAM](https://huggingface.co/docs/transformers/model_doc/edgetam)** (from Facebook) released with the paper [EdgeTAM: On-Device Track Anything Model](https://huggingface.co/papers/2501.07256) by Chong Zhou, Chenchen Zhu, Yunyang Xiong, Saksham Suri, Fanyi Xiao, Lemeng Wu, Raghuraman Krishnamoorthi, Bo Dai, Chen Change Loy, Vikas Chandra, Bilge Soran. 1. **[EfficientNet](https://huggingface.co/docs/transformers/model_doc/efficientnet)** (from Google Brain) released with the paper [EfficientNet: Rethinking Model Scaling for Convolutional Neural Networks](https://huggingface.co/papers/1905.11946) by Mingxing Tan, Quoc V. Le. 1. **[ELECTRA](https://huggingface.co/docs/transformers/model_doc/electra)** (from Google Research/Stanford University) released with the paper [ELECTRA: Pre-training text encoders as discriminators rather than generators](https://huggingface.co/papers/2003.10555) by Kevin Clark, Minh-Thang Luong, Quoc V. Le, Christopher D. Manning. 1. **ERNIE-4.5** (from Baidu ERNIE Team) released with the blog post [Announcing the Open Source Release of the ERNIE 4.5 Model Family](https://ernie.baidu.com/blog/posts/ernie4.5/) by the Baidu ERNIE Team. @@ -138,6 +139,7 @@ 1. **Sapiens** (from Meta AI) released with the paper [Sapiens: Foundation for Human Vision Models](https://huggingface.co/papers/2408.12569) by Rawal Khirodkar, Timur Bagautdinov, Julieta Martinez, Su Zhaoen, Austin James, Peter Selednik, Stuart Anderson, Shunsuke Saito. 1. **[SegFormer](https://huggingface.co/docs/transformers/model_doc/segformer)** (from NVIDIA) released with the paper [SegFormer: Simple and Efficient Design for Semantic Segmentation with Transformers](https://huggingface.co/papers/2105.15203) by Enze Xie, Wenhai Wang, Zhiding Yu, Anima Anandkumar, Jose M. Alvarez, Ping Luo. 1. **[Segment Anything](https://huggingface.co/docs/transformers/model_doc/sam)** (from Meta AI) released with the paper [Segment Anything](https://huggingface.co/papers/2304.02643v1.pdf) by Alexander Kirillov, Eric Mintun, Nikhila Ravi, Hanzi Mao, Chloe Rolland, Laura Gustafson, Tete Xiao, Spencer Whitehead, Alex Berg, Wan-Yen Lo, Piotr Dollar, Ross Girshick. +1. **[Segment Anything 2](https://huggingface.co/docs/transformers/model_doc/sam2)** (from Meta AI) released with the paper [SAM 2: Segment Anything in Images and Videos](https://huggingface.co/papers/2408.00714) by Nikhila Ravi, Valentin Gabeur, Yuan-Ting Hu, Ronghang Hu, Chaitanya Ryali, Tengyu Ma, Haitham Khedr, Roman Rädle, Chloe Rolland, Laura Gustafson, Eric Mintun, Junting Pan, Kalyan Vasudev Alwala, Nicolas Carion, Chao-Yuan Wu, Ross Girshick, Piotr Dollár, Christoph Feichtenhofer. 1. **[SigLIP](https://huggingface.co/docs/transformers/main/model_doc/siglip)** (from Google AI) released with the paper [Sigmoid Loss for Language Image Pre-Training](https://huggingface.co/papers/2303.15343) by Xiaohua Zhai, Basil Mustafa, Alexander Kolesnikov, Lucas Beyer. 1. **[SmolLM3](https://huggingface.co/docs/transformers/main/model_doc/smollm3) (from Hugging Face) released with the blog post [SmolLM3: smol, multilingual, long-context reasoner](https://huggingface.co/blog/smollm3) by the Hugging Face TB Research team. 1. **[SmolVLM](https://huggingface.co/docs/transformers/main/model_doc/smolvlm) (from Hugging Face) released with the blog posts [SmolVLM - small yet mighty Vision Language Model](https://huggingface.co/blog/smolvlm) and [SmolVLM Grows Smaller – Introducing the 250M & 500M Models!](https://huggingface.co/blog/smolervlm) by the Hugging Face TB Research team. From f692523a0082704fe2f28dbfe2c0ad7130723f7a Mon Sep 17 00:00:00 2001 From: Joshua Lochner <26504141+xenova@users.noreply.github.com> Date: Tue, 18 Nov 2025 19:33:28 -0500 Subject: [PATCH 3/5] Add support for SAM3 Tracker --- src/models.js | 2 ++ src/models/image_processors.js | 3 ++- src/models/sam2/processing_sam2.js | 3 ++- src/models/sam3/image_processing_sam3.js | 2 ++ 4 files changed, 8 insertions(+), 2 deletions(-) create mode 100644 src/models/sam3/image_processing_sam3.js diff --git a/src/models.js b/src/models.js index 0fbb5278d..9d8322aca 100644 --- a/src/models.js +++ b/src/models.js @@ -6155,6 +6155,7 @@ export class Sam2Model extends Sam2PreTrainedModel { } } export class EdgeTamModel extends Sam2Model { } // NOTE: extends Sam2Model +export class Sam3TrackerModel extends Sam2Model { } // NOTE: extends Sam2Model ////////////////////////////////////////////////// @@ -8240,6 +8241,7 @@ const MODEL_FOR_MASK_GENERATION_MAPPING_NAMES = new Map([ ['sam', ['SamModel', SamModel]], ['sam2', ['Sam2Model', Sam2Model]], ['edgetam', ['EdgeTamModel', EdgeTamModel]], + ['sam3_tracker', ['Sam3TrackerModel', Sam3TrackerModel]], ]); const MODEL_FOR_CTC_MAPPING_NAMES = new Map([ diff --git a/src/models/image_processors.js b/src/models/image_processors.js index 3c9c494f4..ff5f1df34 100644 --- a/src/models/image_processors.js +++ b/src/models/image_processors.js @@ -31,7 +31,8 @@ export * from './pvt/image_processing_pvt.js' export * from './qwen2_vl/image_processing_qwen2_vl.js' export * from './rt_detr/image_processing_rt_detr.js' export * from './sam/image_processing_sam.js' -export * from './sam2/image_processing_sam2.js'; +export * from './sam2/image_processing_sam2.js' +export * from './sam3/image_processing_sam3.js' export * from './segformer/image_processing_segformer.js' export * from './siglip/image_processing_siglip.js' export * from './smolvlm/image_processing_smolvlm.js' diff --git a/src/models/sam2/processing_sam2.js b/src/models/sam2/processing_sam2.js index 82bb488c9..7416fc9fb 100644 --- a/src/models/sam2/processing_sam2.js +++ b/src/models/sam2/processing_sam2.js @@ -1,3 +1,4 @@ import { SamProcessor } from "../sam/processing_sam.js"; -export class Sam2VideoProcessor extends SamProcessor { } +export class Sam2Processor extends SamProcessor { } +export class Sam2VideoProcessor extends Sam2Processor { } diff --git a/src/models/sam3/image_processing_sam3.js b/src/models/sam3/image_processing_sam3.js new file mode 100644 index 000000000..22a8e2914 --- /dev/null +++ b/src/models/sam3/image_processing_sam3.js @@ -0,0 +1,2 @@ + +export { Sam2ImageProcessor as Sam3ImageProcessor } from '../sam2/image_processing_sam2.js'; From 4c1ce9849406421f39016818f615317dfaa5a641 Mon Sep 17 00:00:00 2001 From: Joshua Lochner <26504141+xenova@users.noreply.github.com> Date: Wed, 19 Nov 2025 10:32:29 -0500 Subject: [PATCH 4/5] Update list of supported models --- README.md | 1 + docs/snippets/6_supported-models.snippet | 1 + 2 files changed, 2 insertions(+) diff --git a/README.md b/README.md index b1685b09a..bc9f817e1 100644 --- a/README.md +++ b/README.md @@ -426,6 +426,7 @@ You can refine your search by selecting the task you're interested in (e.g., [te 1. **[SegFormer](https://huggingface.co/docs/transformers/model_doc/segformer)** (from NVIDIA) released with the paper [SegFormer: Simple and Efficient Design for Semantic Segmentation with Transformers](https://huggingface.co/papers/2105.15203) by Enze Xie, Wenhai Wang, Zhiding Yu, Anima Anandkumar, Jose M. Alvarez, Ping Luo. 1. **[Segment Anything](https://huggingface.co/docs/transformers/model_doc/sam)** (from Meta AI) released with the paper [Segment Anything](https://huggingface.co/papers/2304.02643v1.pdf) by Alexander Kirillov, Eric Mintun, Nikhila Ravi, Hanzi Mao, Chloe Rolland, Laura Gustafson, Tete Xiao, Spencer Whitehead, Alex Berg, Wan-Yen Lo, Piotr Dollar, Ross Girshick. 1. **[Segment Anything 2](https://huggingface.co/docs/transformers/model_doc/sam2)** (from Meta AI) released with the paper [SAM 2: Segment Anything in Images and Videos](https://huggingface.co/papers/2408.00714) by Nikhila Ravi, Valentin Gabeur, Yuan-Ting Hu, Ronghang Hu, Chaitanya Ryali, Tengyu Ma, Haitham Khedr, Roman Rädle, Chloe Rolland, Laura Gustafson, Eric Mintun, Junting Pan, Kalyan Vasudev Alwala, Nicolas Carion, Chao-Yuan Wu, Ross Girshick, Piotr Dollár, Christoph Feichtenhofer. +1. **[Segment Anything 3](https://huggingface.co/docs/transformers/model_doc/sam3)** (from Meta AI) released with the paper [SAM 3: Segment Anything with Concepts](https://ai.meta.com/research/publications/sam-3-segment-anything-with-concepts/) by the Meta AI SAM team. 1. **[SigLIP](https://huggingface.co/docs/transformers/main/model_doc/siglip)** (from Google AI) released with the paper [Sigmoid Loss for Language Image Pre-Training](https://huggingface.co/papers/2303.15343) by Xiaohua Zhai, Basil Mustafa, Alexander Kolesnikov, Lucas Beyer. 1. **[SmolLM3](https://huggingface.co/docs/transformers/main/model_doc/smollm3) (from Hugging Face) released with the blog post [SmolLM3: smol, multilingual, long-context reasoner](https://huggingface.co/blog/smollm3) by the Hugging Face TB Research team. 1. **[SmolVLM](https://huggingface.co/docs/transformers/main/model_doc/smolvlm) (from Hugging Face) released with the blog posts [SmolVLM - small yet mighty Vision Language Model](https://huggingface.co/blog/smolvlm) and [SmolVLM Grows Smaller – Introducing the 250M & 500M Models!](https://huggingface.co/blog/smolervlm) by the Hugging Face TB Research team. diff --git a/docs/snippets/6_supported-models.snippet b/docs/snippets/6_supported-models.snippet index 6af87653f..fac65c0d1 100644 --- a/docs/snippets/6_supported-models.snippet +++ b/docs/snippets/6_supported-models.snippet @@ -140,6 +140,7 @@ 1. **[SegFormer](https://huggingface.co/docs/transformers/model_doc/segformer)** (from NVIDIA) released with the paper [SegFormer: Simple and Efficient Design for Semantic Segmentation with Transformers](https://huggingface.co/papers/2105.15203) by Enze Xie, Wenhai Wang, Zhiding Yu, Anima Anandkumar, Jose M. Alvarez, Ping Luo. 1. **[Segment Anything](https://huggingface.co/docs/transformers/model_doc/sam)** (from Meta AI) released with the paper [Segment Anything](https://huggingface.co/papers/2304.02643v1.pdf) by Alexander Kirillov, Eric Mintun, Nikhila Ravi, Hanzi Mao, Chloe Rolland, Laura Gustafson, Tete Xiao, Spencer Whitehead, Alex Berg, Wan-Yen Lo, Piotr Dollar, Ross Girshick. 1. **[Segment Anything 2](https://huggingface.co/docs/transformers/model_doc/sam2)** (from Meta AI) released with the paper [SAM 2: Segment Anything in Images and Videos](https://huggingface.co/papers/2408.00714) by Nikhila Ravi, Valentin Gabeur, Yuan-Ting Hu, Ronghang Hu, Chaitanya Ryali, Tengyu Ma, Haitham Khedr, Roman Rädle, Chloe Rolland, Laura Gustafson, Eric Mintun, Junting Pan, Kalyan Vasudev Alwala, Nicolas Carion, Chao-Yuan Wu, Ross Girshick, Piotr Dollár, Christoph Feichtenhofer. +1. **[Segment Anything 3](https://huggingface.co/docs/transformers/model_doc/sam3)** (from Meta AI) released with the paper [SAM 3: Segment Anything with Concepts](https://ai.meta.com/research/publications/sam-3-segment-anything-with-concepts/) by the Meta AI SAM team. 1. **[SigLIP](https://huggingface.co/docs/transformers/main/model_doc/siglip)** (from Google AI) released with the paper [Sigmoid Loss for Language Image Pre-Training](https://huggingface.co/papers/2303.15343) by Xiaohua Zhai, Basil Mustafa, Alexander Kolesnikov, Lucas Beyer. 1. **[SmolLM3](https://huggingface.co/docs/transformers/main/model_doc/smollm3) (from Hugging Face) released with the blog post [SmolLM3: smol, multilingual, long-context reasoner](https://huggingface.co/blog/smollm3) by the Hugging Face TB Research team. 1. **[SmolVLM](https://huggingface.co/docs/transformers/main/model_doc/smolvlm) (from Hugging Face) released with the blog posts [SmolVLM - small yet mighty Vision Language Model](https://huggingface.co/blog/smolvlm) and [SmolVLM Grows Smaller – Introducing the 250M & 500M Models!](https://huggingface.co/blog/smolervlm) by the Hugging Face TB Research team. From a327f2ef5cedeefa1759b5d777752956bd40f0cf Mon Sep 17 00:00:00 2001 From: Joshua Lochner <26504141+xenova@users.noreply.github.com> Date: Wed, 19 Nov 2025 11:11:57 -0500 Subject: [PATCH 5/5] Update SAM3 author list --- README.md | 2 +- docs/snippets/6_supported-models.snippet | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index bc9f817e1..db79b1248 100644 --- a/README.md +++ b/README.md @@ -426,7 +426,7 @@ You can refine your search by selecting the task you're interested in (e.g., [te 1. **[SegFormer](https://huggingface.co/docs/transformers/model_doc/segformer)** (from NVIDIA) released with the paper [SegFormer: Simple and Efficient Design for Semantic Segmentation with Transformers](https://huggingface.co/papers/2105.15203) by Enze Xie, Wenhai Wang, Zhiding Yu, Anima Anandkumar, Jose M. Alvarez, Ping Luo. 1. **[Segment Anything](https://huggingface.co/docs/transformers/model_doc/sam)** (from Meta AI) released with the paper [Segment Anything](https://huggingface.co/papers/2304.02643v1.pdf) by Alexander Kirillov, Eric Mintun, Nikhila Ravi, Hanzi Mao, Chloe Rolland, Laura Gustafson, Tete Xiao, Spencer Whitehead, Alex Berg, Wan-Yen Lo, Piotr Dollar, Ross Girshick. 1. **[Segment Anything 2](https://huggingface.co/docs/transformers/model_doc/sam2)** (from Meta AI) released with the paper [SAM 2: Segment Anything in Images and Videos](https://huggingface.co/papers/2408.00714) by Nikhila Ravi, Valentin Gabeur, Yuan-Ting Hu, Ronghang Hu, Chaitanya Ryali, Tengyu Ma, Haitham Khedr, Roman Rädle, Chloe Rolland, Laura Gustafson, Eric Mintun, Junting Pan, Kalyan Vasudev Alwala, Nicolas Carion, Chao-Yuan Wu, Ross Girshick, Piotr Dollár, Christoph Feichtenhofer. -1. **[Segment Anything 3](https://huggingface.co/docs/transformers/model_doc/sam3)** (from Meta AI) released with the paper [SAM 3: Segment Anything with Concepts](https://ai.meta.com/research/publications/sam-3-segment-anything-with-concepts/) by the Meta AI SAM team. +1. **[Segment Anything 3](https://huggingface.co/docs/transformers/model_doc/sam3)** (from Meta Superintelligence Labs) released with the paper [SAM 3: Segment Anything with Concepts](https://ai.meta.com/research/publications/sam-3-segment-anything-with-concepts/) by SAM 3D Team, Xingyu Chen, Fu-Jen Chu, Pierre Gleize, Kevin J Liang, Alexander Sax, Hao Tang, Weiyao Wang, Michelle Guo, Thibaut Hardin, Xiang Li, Aohan Lin, Jiawei Liu, Ziqi Ma, Anushka Sagar, Bowen Song, Xiaodong Wang, Jianing Yang, Bowen Zhang, Piotr Dollar, Georgia Gkioxari, Matt Feiszli, Jitendra Malik, Nicolas Carion, Laura Gustafson, Yuan-Ting Hu, Shoubhik Debnath, Ronghang Hu, Didac Suris Coll-Vinent, Chaitanya Ryali, Kalyan Vasudev Alwala, Haitham Khedr, Andrew Huang, Jie Lei, Tengyu Ma, Baishan Guo, Arpit Kalla, Markus Marks, Joseph Greer, Meng Wang, Peize Sun, Roman Rädle, Triantafyllos Afouras, Effrosyni Mavroudi, Katherine Xu, Tsung-Han Wu, Yu Zhou, Liliane Momeni, Rishi Hazra, Shuangrui Ding, Sagar Vaze, Francois Porcher, Feng Li, Siyuan Li, Aishwarya Kamath, Ho Kei Cheng, Piotr Dollar, Nikhila Ravi, Kate Saenko, Pengchuan Zhang, Christoph Feichtenhofer. 1. **[SigLIP](https://huggingface.co/docs/transformers/main/model_doc/siglip)** (from Google AI) released with the paper [Sigmoid Loss for Language Image Pre-Training](https://huggingface.co/papers/2303.15343) by Xiaohua Zhai, Basil Mustafa, Alexander Kolesnikov, Lucas Beyer. 1. **[SmolLM3](https://huggingface.co/docs/transformers/main/model_doc/smollm3) (from Hugging Face) released with the blog post [SmolLM3: smol, multilingual, long-context reasoner](https://huggingface.co/blog/smollm3) by the Hugging Face TB Research team. 1. **[SmolVLM](https://huggingface.co/docs/transformers/main/model_doc/smolvlm) (from Hugging Face) released with the blog posts [SmolVLM - small yet mighty Vision Language Model](https://huggingface.co/blog/smolvlm) and [SmolVLM Grows Smaller – Introducing the 250M & 500M Models!](https://huggingface.co/blog/smolervlm) by the Hugging Face TB Research team. diff --git a/docs/snippets/6_supported-models.snippet b/docs/snippets/6_supported-models.snippet index fac65c0d1..edf4e3fb9 100644 --- a/docs/snippets/6_supported-models.snippet +++ b/docs/snippets/6_supported-models.snippet @@ -140,7 +140,7 @@ 1. **[SegFormer](https://huggingface.co/docs/transformers/model_doc/segformer)** (from NVIDIA) released with the paper [SegFormer: Simple and Efficient Design for Semantic Segmentation with Transformers](https://huggingface.co/papers/2105.15203) by Enze Xie, Wenhai Wang, Zhiding Yu, Anima Anandkumar, Jose M. Alvarez, Ping Luo. 1. **[Segment Anything](https://huggingface.co/docs/transformers/model_doc/sam)** (from Meta AI) released with the paper [Segment Anything](https://huggingface.co/papers/2304.02643v1.pdf) by Alexander Kirillov, Eric Mintun, Nikhila Ravi, Hanzi Mao, Chloe Rolland, Laura Gustafson, Tete Xiao, Spencer Whitehead, Alex Berg, Wan-Yen Lo, Piotr Dollar, Ross Girshick. 1. **[Segment Anything 2](https://huggingface.co/docs/transformers/model_doc/sam2)** (from Meta AI) released with the paper [SAM 2: Segment Anything in Images and Videos](https://huggingface.co/papers/2408.00714) by Nikhila Ravi, Valentin Gabeur, Yuan-Ting Hu, Ronghang Hu, Chaitanya Ryali, Tengyu Ma, Haitham Khedr, Roman Rädle, Chloe Rolland, Laura Gustafson, Eric Mintun, Junting Pan, Kalyan Vasudev Alwala, Nicolas Carion, Chao-Yuan Wu, Ross Girshick, Piotr Dollár, Christoph Feichtenhofer. -1. **[Segment Anything 3](https://huggingface.co/docs/transformers/model_doc/sam3)** (from Meta AI) released with the paper [SAM 3: Segment Anything with Concepts](https://ai.meta.com/research/publications/sam-3-segment-anything-with-concepts/) by the Meta AI SAM team. +1. **[Segment Anything 3](https://huggingface.co/docs/transformers/model_doc/sam3)** (from Meta Superintelligence Labs) released with the paper [SAM 3: Segment Anything with Concepts](https://ai.meta.com/research/publications/sam-3-segment-anything-with-concepts/) by SAM 3D Team, Xingyu Chen, Fu-Jen Chu, Pierre Gleize, Kevin J Liang, Alexander Sax, Hao Tang, Weiyao Wang, Michelle Guo, Thibaut Hardin, Xiang Li, Aohan Lin, Jiawei Liu, Ziqi Ma, Anushka Sagar, Bowen Song, Xiaodong Wang, Jianing Yang, Bowen Zhang, Piotr Dollar, Georgia Gkioxari, Matt Feiszli, Jitendra Malik, Nicolas Carion, Laura Gustafson, Yuan-Ting Hu, Shoubhik Debnath, Ronghang Hu, Didac Suris Coll-Vinent, Chaitanya Ryali, Kalyan Vasudev Alwala, Haitham Khedr, Andrew Huang, Jie Lei, Tengyu Ma, Baishan Guo, Arpit Kalla, Markus Marks, Joseph Greer, Meng Wang, Peize Sun, Roman Rädle, Triantafyllos Afouras, Effrosyni Mavroudi, Katherine Xu, Tsung-Han Wu, Yu Zhou, Liliane Momeni, Rishi Hazra, Shuangrui Ding, Sagar Vaze, Francois Porcher, Feng Li, Siyuan Li, Aishwarya Kamath, Ho Kei Cheng, Piotr Dollar, Nikhila Ravi, Kate Saenko, Pengchuan Zhang, Christoph Feichtenhofer. 1. **[SigLIP](https://huggingface.co/docs/transformers/main/model_doc/siglip)** (from Google AI) released with the paper [Sigmoid Loss for Language Image Pre-Training](https://huggingface.co/papers/2303.15343) by Xiaohua Zhai, Basil Mustafa, Alexander Kolesnikov, Lucas Beyer. 1. **[SmolLM3](https://huggingface.co/docs/transformers/main/model_doc/smollm3) (from Hugging Face) released with the blog post [SmolLM3: smol, multilingual, long-context reasoner](https://huggingface.co/blog/smollm3) by the Hugging Face TB Research team. 1. **[SmolVLM](https://huggingface.co/docs/transformers/main/model_doc/smolvlm) (from Hugging Face) released with the blog posts [SmolVLM - small yet mighty Vision Language Model](https://huggingface.co/blog/smolvlm) and [SmolVLM Grows Smaller – Introducing the 250M & 500M Models!](https://huggingface.co/blog/smolervlm) by the Hugging Face TB Research team.