In [1]:
%load_ext autoreload
%autoreload 2

In [3]:
import pandas as pd

from fastembed import (
    SparseTextEmbedding,
    TextEmbedding,
    LateInteractionTextEmbedding,
    ImageEmbedding,
)
from fastembed.rerank.cross_encoder import TextCrossEncoder

  from .autonotebook import tqdm as notebook_tqdm


## Supported Text Embedding Models

In [5]:
supported_models = (
    pd.DataFrame(TextEmbedding.list_supported_models())
    .sort_values("size_in_GB")
    .drop(columns=["sources", "model_file", "additional_files"])
    .reset_index(drop=True)
)
supported_models

Unnamed: 0,model,dim,description,license,size_in_GB,tasks
0,BAAI/bge-small-en-v1.5,384,"Text embeddings, Unimodal (text), English, 512...",mit,0.067,
1,BAAI/bge-small-zh-v1.5,512,"Text embeddings, Unimodal (text), Chinese, 512...",mit,0.09,
2,sentence-transformers/all-MiniLM-L6-v2,384,"Text embeddings, Unimodal (text), English, 256...",apache-2.0,0.09,
3,snowflake/snowflake-arctic-embed-xs,384,"Text embeddings, Unimodal (text), English, 512...",apache-2.0,0.09,
4,jinaai/jina-embeddings-v2-small-en,512,"Text embeddings, Unimodal (text), English, 819...",apache-2.0,0.12,
5,BAAI/bge-small-en,384,"Text embeddings, Unimodal (text), English, 512...",mit,0.13,
6,nomic-ai/nomic-embed-text-v1.5-Q,768,"Text embeddings, Multimodal (text, image), Eng...",apache-2.0,0.13,
7,snowflake/snowflake-arctic-embed-s,384,"Text embeddings, Unimodal (text), English, 512...",apache-2.0,0.13,
8,BAAI/bge-base-en-v1.5,768,"Text embeddings, Unimodal (text), English, 512...",mit,0.21,
9,sentence-transformers/paraphrase-multilingual-...,384,"Text embeddings, Unimodal (text), Multilingual...",apache-2.0,0.22,


## Supported Sparse Text Embedding Models

In [13]:
(
    pd.DataFrame(SparseTextEmbedding.list_supported_models())
    .sort_values("size_in_GB")
    .drop(columns=["sources", "model_file", "additional_files"])
    .reset_index(drop=True)
)

Unnamed: 0,model,vocab_size,description,license,size_in_GB,requires_idf
0,Qdrant/bm25,,BM25 as sparse embeddings meant to be used wit...,apache-2.0,0.01,True
1,Qdrant/bm42-all-minilm-l6-v2-attentions,30522.0,"Light sparse embedding model, which assigns an...",apache-2.0,0.09,True
2,prithivida/Splade_PP_en_v1,30522.0,Independent Implementation of SPLADE++ Model f...,apache-2.0,0.532,
3,prithvida/Splade_PP_en_v1,30522.0,Independent Implementation of SPLADE++ Model f...,apache-2.0,0.532,


## Supported Late Interaction Text Embedding Models

In [14]:
(
    pd.DataFrame(LateInteractionTextEmbedding.list_supported_models())
    .sort_values("size_in_GB")
    .drop(columns=["sources", "model_file"])
    .reset_index(drop=True)
)

Unnamed: 0,model,dim,description,license,size_in_GB,additional_files
0,answerdotai/answerai-colbert-small-v1,96,"Text embeddings, Unimodal (text), Multilingual...",apache-2.0,0.13,
1,colbert-ir/colbertv2.0,128,Late interaction model,mit,0.44,
2,jinaai/jina-colbert-v2,128,New model that expands capabilities of colbert...,cc-by-nc-4.0,2.24,[onnx/model.onnx_data]


## Supported Image Embedding Models

In [15]:
(
    pd.DataFrame(ImageEmbedding.list_supported_models())
    .sort_values("size_in_GB")
    .drop(columns=["sources", "model_file"])
    .reset_index(drop=True)
)

Unnamed: 0,model,dim,description,license,size_in_GB
0,Qdrant/resnet50-onnx,2048,"Image embeddings, Unimodal (image), 2016 year",apache-2.0,0.1
1,Qdrant/clip-ViT-B-32-vision,512,"Image embeddings, Multimodal (text&image), 202...",mit,0.34
2,Qdrant/Unicom-ViT-B-32,512,"Image embeddings, Multimodal (text&image), 202...",apache-2.0,0.48
3,Qdrant/Unicom-ViT-B-16,768,Image embeddings (more detailed than Unicom-Vi...,apache-2.0,0.82


## Supported Rerank Cross Encoder Models

In [16]:
(
    pd.DataFrame(TextCrossEncoder.list_supported_models())
    .sort_values("size_in_GB")
    .drop(columns=["sources", "model_file"])
    .reset_index(drop=True)
)

Unnamed: 0,model,size_in_GB,description,license
0,Xenova/ms-marco-MiniLM-L-6-v2,0.08,MiniLM-L-6-v2 model optimized for re-ranking t...,apache-2.0
1,Xenova/ms-marco-MiniLM-L-12-v2,0.12,MiniLM-L-12-v2 model optimized for re-ranking ...,apache-2.0
2,jinaai/jina-reranker-v1-tiny-en,0.13,Designed for blazing-fast re-ranking with 8K c...,apache-2.0
3,jinaai/jina-reranker-v1-turbo-en,0.15,Designed for blazing-fast re-ranking with 8K c...,apache-2.0
4,BAAI/bge-reranker-base,1.04,BGE reranker base model for cross-encoder re-r...,mit
5,jinaai/jina-reranker-v2-base-multilingual,1.11,A multi-lingual reranker model for cross-encod...,cc-by-nc-4.0
