In [13]:
from langchain_community.document_loaders import TextLoader
from langchain_qdrant import QdrantVectorStore, FastEmbedSparse, RetrievalMode
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_huggingface import HuggingFaceEmbeddings

In [5]:
loader = TextLoader("python.txt")
documents = loader.load()

text_splitter = RecursiveCharacterTextSplitter(chunk_size=200)
docs = text_splitter.split_documents(documents)

In [7]:
embedding_model = HuggingFaceEmbeddings()

  from tqdm.autonotebook import tqdm, trange


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/10.6k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/571 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/438M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/363 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/239 [00:00<?, ?B/s]



1_Pooling/config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

In [8]:
docker_url = "http://localhost:6333/"
qdrant_docker = QdrantVectorStore.from_documents(
  docs,
  embedding_model,
  collection_name = "test_collection"
)

In [9]:
qdrant_docker.similarity_search("what is python?")

[Document(metadata={'source': 'python.txt', '_id': 'cb6c2ff0-17d8-4ecf-ac18-7b9e426cea33', '_collection_name': 'test_collection'}, page_content='Python is a high-level, general-purpose programming language. Its design philosophy emphasizes code readability with the use of significant indentation.[32]'),
 Document(metadata={'source': 'python.txt', '_id': 'e9b78485-6947-454e-b73a-5230e89a8245', '_collection_name': 'test_collection'}, page_content='Python is a multi-paradigm programming language. Object-oriented programming and structured programming are fully supported, and many of their features support functional programming and'),
 Document(metadata={'source': 'python.txt', '_id': '89de10c1-5a93-4beb-a0eb-6792967311bd', '_collection_name': 'test_collection'}, page_content='Python is dynamically typed and garbage-collected. It supports multiple programming paradigms, including structured (particularly procedural), object-oriented and functional programming. It is often'),
 Document(met

# Hybrid Search

In [21]:
sparse_model = FastEmbedSparse(model_name="Qdrant/bm42-all-minilm-l6-v2-attentions",batch_size=4)

Fetching 6 files:   0%|          | 0/6 [00:00<?, ?it/s]

In [22]:
qdrant_hybrid = QdrantVectorStore.from_documents(
  docs,
  embedding_model,
  url=docker_url,
  collection_name = "test_hybrid",
  sparse_embedding = sparse_model,
  retrieval_mode=RetrievalMode.HYBRID
)

In [23]:
qdrant_hybrid.similarity_search("what is python?")

[Document(metadata={'source': 'python.txt', '_id': 'a420861c-7f6d-4a0a-b8fc-1690fb083860', '_collection_name': 'test_hybrid'}, page_content='Python is a high-level, general-purpose programming language. Its design philosophy emphasizes code readability with the use of significant indentation.[32]'),
 Document(metadata={'source': 'python.txt', '_id': '27ebc135-9219-41f4-a947-dfab70f5851a', '_collection_name': 'test_hybrid'}, page_content='Python is a high-level, general-purpose programming language. Its design philosophy emphasizes code readability with the use of significant indentation.[32]'),
 Document(metadata={'source': 'python.txt', '_id': '154f4c5f-4762-42b3-b650-49a45f096d7b', '_collection_name': 'test_hybrid'}, page_content='Python is a multi-paradigm programming language. Object-oriented programming and structured programming are fully supported, and many of their features support functional programming and'),
 Document(metadata={'source': 'python.txt', '_id': 'a2f68f3d-69c2-4

# Client

In [24]:
from qdrant_client import QdrantClient

In [25]:
client = QdrantClient(url=docker_url)

In [27]:
hybrid_search = QdrantVectorStore(
  client=client,
  collection_name = "test_hybrid",
  embedding=embedding_model,
  sparse_embedding = sparse_model,
  retrieval_mode=RetrievalMode.HYBRID
  
)

In [28]:
hybrid_search.similarity_search("what is indentation in pythohn")

[Document(metadata={'source': 'python.txt', '_id': '5ac2ab93-b709-4d42-a344-1a4c3c210eab', '_collection_name': 'test_hybrid'}, page_content="in indentation signifies the end of the current block.[92] Thus, the program's visual structure accurately represents its semantic structure.[93] This feature is sometimes termed the off-side rule."),
 Document(metadata={'source': 'python.txt', '_id': '227ee372-4c39-4022-99f2-1a7c2e7559a4', '_collection_name': 'test_hybrid'}, page_content='Indentation\nMain article: Python syntax and semantics ยง Indentation'),
 Document(metadata={'source': 'python.txt', '_id': '9bdcb8a7-1e48-41b4-8ec7-6555929b5440', '_collection_name': 'test_hybrid'}, page_content="indentation signifies the end of the current block.[92] Thus, the program's visual structure accurately represents its semantic structure.[93] This feature is sometimes termed the off-side rule. Some"),
 Document(metadata={'source': 'python.txt', '_id': 'fb0d2ec6-a29b-4fd8-9f54-2d3cfa9900d2', '_collect