## Fast Embeded

In [1]:
from typing import List
import numpy as np

### Load default model (BAAI/bge-small-en-v1.5)

In [2]:
from fastembed import TextEmbedding

In [3]:
embedding_model = TextEmbedding()
print("The model BAAI/bge-small-en-v1.5 is ready to use.")

Fetching 5 files:   0%|          | 0/5 [00:00<?, ?it/s]

config.json:   0%|          | 0.00/706 [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/711k [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/1.24k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/695 [00:00<?, ?B/s]

model_optimized.onnx:   0%|          | 0.00/66.5M [00:00<?, ?B/s]

The model BAAI/bge-small-en-v1.5 is ready to use.


### Sample data

In [4]:
documents: List[str] = [
    "FastEmbed is lighter than Transformers & Sentence-Transformers.",
    "FastEmbed is supported by and maintained by Qdrant.",
]

### Embed data

In [5]:
embeddings_generator = embedding_model.embed(documents)
embeddings_list = list(embeddings_generator)
len(embeddings_list[0])  

384

In [6]:
print("Embeddings:\n", embeddings_list)

Embeddings:
 [array([-9.47402492e-02,  1.00907953e-02, -3.08784992e-02,  2.37383470e-02,
        2.35118857e-03,  6.27760717e-04, -8.24847594e-02,  8.47143936e-04,
        3.71927395e-02,  1.43602341e-02, -2.38315947e-02,  8.75035126e-04,
        3.01857982e-02,  4.09759544e-02,  4.37467620e-02,  8.65210779e-03,
        5.06204925e-02, -1.26484623e-02, -3.13580632e-02,  2.06078682e-02,
        8.13657790e-02, -5.41905686e-02,  1.02106864e-02, -4.58781496e-02,
       -7.70630362e-03, -3.52212042e-02, -2.35518496e-02, -2.68954672e-02,
       -2.73883492e-02, -1.57296523e-01,  1.25152506e-02, -3.55941989e-02,
        6.44212365e-02, -1.33711367e-03, -3.40489410e-02,  9.80438944e-03,
       -3.25036831e-02,  1.50529351e-02, -6.96964264e-02, -1.67580470e-02,
       -2.34719217e-02,  2.00483780e-02, -1.56124253e-02,  1.41620811e-03,
       -1.32012917e-02, -2.17135269e-02,  6.86307326e-02, -2.02348754e-02,
       -9.11168847e-03, -1.49064027e-02,  6.37818202e-02,  1.36075877e-02,
        7.0

## Fast Embeded & Qdrant

### Inital the client
Qdrant Client has a simple in-memory mode that lets you try semantic search locally.



In [7]:
from qdrant_client import QdrantClient

client = QdrantClient(":memory:")  # Qdrant is running from RAM.

### Sample data

In [8]:
docs = ["Qdrant has a LangChain integration for chatbots.", "Qdrant has a LlamaIndex integration for agents."]
metadata = [
    {"source": "langchain-docs"},
    {"source": "llamaindex-docs"},
]
ids = [42, 2]

### Load data to a collection

In [9]:
# Create a test collection and upsert your two documents to it.
client.add(
    collection_name="test_collection",
    documents=docs,
    metadata=metadata,
    ids=ids
)

Fetching 5 files:   0%|          | 0/5 [00:00<?, ?it/s]

tokenizer_config.json:   0%|          | 0.00/366 [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/711k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/125 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/701 [00:00<?, ?B/s]

model_optimized.onnx:   0%|          | 0.00/133M [00:00<?, ?B/s]

[42, 2]

### Vector Search

In [10]:
search_result = client.query(
    collection_name="test_collection",
    query_text="Which integration is best for agents?"
)
print(search_result)

[QueryResponse(id=2, embedding=None, sparse_embedding=None, metadata={'document': 'Qdrant has a LlamaIndex integration for agents.', 'source': 'llamaindex-docs'}, document='Qdrant has a LlamaIndex integration for agents.', score=0.87491801319731), QueryResponse(id=42, embedding=None, sparse_embedding=None, metadata={'document': 'Qdrant has a LangChain integration for chatbots.', 'source': 'langchain-docs'}, document='Qdrant has a LangChain integration for chatbots.', score=0.8351846627714035)]
