### Load HuggingFace API Key

In [1]:
import os
from dotenv import load_dotenv

os.environ["HUGGINGFACEHUB_API_TOKEN"]=os.getenv("HF_TOKEN")

### Specify a location where HuggingFace models will be downloaded

In [2]:

os.environ["HF_HOME"] = "/home/abhishek/ad-workspace/huggingface"

### Load HuggingFace all-MiniLM-L6-v2 embedding model

In [3]:
from langchain_huggingface import HuggingFaceEmbeddings

embeddings = HuggingFaceEmbeddings(model="sentence-transformers/all-MiniLM-L6-v2")

### Documents

In [4]:
documents = [
    "Kolkata is the capital of West Bengal",
    "Dispur is the capital of Assam",
    "Mumbai is the capital of Maharashtra",
    "Chennai is the capital of Tamil Nadu",
    "Kohima is the capital of Nagaland",
    "Bengaluru is known as the Silicon Valley of India",
    "Hyderabad is famous for its IT industry and biryani",
    "The Ganges is one of the longest rivers in India",
    "The Himalayas are the highest mountain range in the world",
    "New Delhi is the capital of India"
]

In [5]:
from langchain_core.documents import Document

docs = [Document(page_content=text) for text in documents]

In [6]:
docs

[Document(metadata={}, page_content='Kolkata is the capital of West Bengal'),
 Document(metadata={}, page_content='Dispur is the capital of Assam'),
 Document(metadata={}, page_content='Mumbai is the capital of Maharashtra'),
 Document(metadata={}, page_content='Chennai is the capital of Tamil Nadu'),
 Document(metadata={}, page_content='Kohima is the capital of Nagaland'),
 Document(metadata={}, page_content='Bengaluru is known as the Silicon Valley of India'),
 Document(metadata={}, page_content='Hyderabad is famous for its IT industry and biryani'),
 Document(metadata={}, page_content='The Ganges is one of the longest rivers in India'),
 Document(metadata={}, page_content='The Himalayas are the highest mountain range in the world'),
 Document(metadata={}, page_content='New Delhi is the capital of India')]

In [7]:
from uuid import uuid4

uuids = [str(uuid4()) for _ in range(len(docs))]

In [8]:
uuids

['786df122-3cea-4d20-b599-7956693c49c1',
 '66ffa341-fe40-4716-af25-d9ad52c46b0c',
 'b3d3c103-ec95-4b0e-ad74-a70da614e9d8',
 '0a9e5d3c-9e4a-4da8-835d-43d7f19c36b7',
 '4f6b8c6c-f5ac-4bdc-8e1b-8c91ae525658',
 '33deefcb-c5a6-4deb-b5bf-5942a9f2bb60',
 '67e3a3a7-5080-4325-bec2-11afd444ca78',
 '1f10358a-ff99-4529-8a8c-33dea2226b52',
 '38928b50-bb5b-4e2b-8ca1-224380b02948',
 '212cb8b1-3ce9-45bd-86f5-3308194afd9d']

### Get Text embeddings of documents

In [9]:
vector_embeddings=embeddings.embed_documents(documents)

In [10]:
print("No. of embedding vectors : ", len(vector_embeddings))
print("Dimension of each embedding vector : ", len(vector_embeddings[0]))

No. of embedding vectors :  10
Dimension of each embedding vector :  384


## Qdrant vector store

### Initialization

In [11]:
from langchain_qdrant import QdrantVectorStore
from qdrant_client import QdrantClient
from qdrant_client.http.models import Distance, VectorParams

client = QdrantClient(":memory:")

client.create_collection(
    collection_name="india_records_collection",
    vectors_config=VectorParams(size=384, distance=Distance.COSINE),
)

vector_store = QdrantVectorStore(
    client=client,
    collection_name="india_records_collection",
    embedding=embeddings,
)

### Add documents

In [12]:
vector_store.add_documents(documents=docs, ids=uuids)

['786df122-3cea-4d20-b599-7956693c49c1',
 '66ffa341-fe40-4716-af25-d9ad52c46b0c',
 'b3d3c103-ec95-4b0e-ad74-a70da614e9d8',
 '0a9e5d3c-9e4a-4da8-835d-43d7f19c36b7',
 '4f6b8c6c-f5ac-4bdc-8e1b-8c91ae525658',
 '33deefcb-c5a6-4deb-b5bf-5942a9f2bb60',
 '67e3a3a7-5080-4325-bec2-11afd444ca78',
 '1f10358a-ff99-4529-8a8c-33dea2226b52',
 '38928b50-bb5b-4e2b-8ca1-224380b02948',
 '212cb8b1-3ce9-45bd-86f5-3308194afd9d']

### Query

In [13]:
query = "Why is Hyderabad famous for?"

### Similarity Search

In [14]:
similar_docs = vector_store.similarity_search(query,k=3)

In [15]:
similar_docs

[Document(metadata={'_id': '67e3a3a7-5080-4325-bec2-11afd444ca78', '_collection_name': 'india_records_collection'}, page_content='Hyderabad is famous for its IT industry and biryani'),
 Document(metadata={'_id': 'b3d3c103-ec95-4b0e-ad74-a70da614e9d8', '_collection_name': 'india_records_collection'}, page_content='Mumbai is the capital of Maharashtra'),
 Document(metadata={'_id': '786df122-3cea-4d20-b599-7956693c49c1', '_collection_name': 'india_records_collection'}, page_content='Kolkata is the capital of West Bengal')]

In [16]:
similar_docs[0].page_content

'Hyderabad is famous for its IT industry and biryani'