In [6]:
from langchain_google_genai import GoogleGenerativeAIEmbeddings
from langchain.vectorstores import Qdrant
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.document_loaders import PyPDFLoader
import google.generativeai as genai
import os
from dotenv import load_dotenv

In [7]:
#load gemini api key from .env file
load_dotenv()
gemini_api_key = os.getenv("GEMINI_API_KEY")
qdrant_api_key = os.getenv("QDRANT_API_KEY")
if not gemini_api_key:
    raise ValueError("GEMINI_API_KEY is not set in the environment variables.")
if not qdrant_api_key:
    raise ValueError("QDRANT_API_KEY is not set in the environment variables.")


In [8]:
from qdrant_client.models import VectorParams, Distance
from qdrant_client import QdrantClient
qdrant_client = QdrantClient(
    url="https://d5a5f5ce-ffe6-4b64-b58e-361b6ec60509.us-west-2-0.aws.cloud.qdrant.io",
    api_key=qdrant_api_key,
)
collection_name = "gemini_embeddings"
try:
    collection_info = qdrant_client.get_collection(collection_name)
    print(f"Collection '{collection_name}' already exists.")
except Exception as e:
    try:
        qdrant_client.create_collection(
            collection_name=collection_name,
            vectors_config=VectorParams(size=768, distance=Distance.COSINE)
        )
        print(f"Collection '{collection_name}' created successfully.")
    except Exception as create_error:
        print(f"Failed to create collection '{collection_name}': {create_error}")

Collection 'gemini_embeddings' already exists.


In [11]:
pdf_file_path = "Docker.pdf"
loader = PyPDFLoader(pdf_file_path)
documents = loader.load()
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=768,
    chunk_overlap=200
)
split_documents = text_splitter.split_documents(documents)
embeddings = GoogleGenerativeAIEmbeddings(
    model="models/embedding-001",
    google_api_key=gemini_api_key,
    max_retries=3,
    timeout=60,
)

# Create the vector store instance with the existing client
vector_store = Qdrant(
    client=qdrant_client,
    collection_name=collection_name,
    embeddings=embeddings,
)

# Add documents to the vector store
vector_store.add_documents(split_documents)

  vector_store = Qdrant(


['f5ccda36d58f490b9adc1312a7ea58ed',
 '729cc79e50b14e1dba08817e5b7470c9',
 'f9d95e74051d439489422d2c0be94a36',
 '87b509332cce42efa34826980f7c6d1f',
 'b5b5364789a4420a91fd4836af946ae9']

In [12]:
# Test the vector store with a similarity search
query = "What is Docker?"
similar_docs = vector_store.similarity_search(query, k=3)
print(f"Found {len(similar_docs)} similar documents for query: '{query}'")
print("\nFirst result:")
print(similar_docs[0].page_content[:200] + "..." if len(similar_docs[0].page_content) > 200 else similar_docs[0].page_content)

Found 3 similar documents for query: 'What is Docker?'

First result:
Docker 
• Docker is an open-source platform that automates the deployment, scaling, and 
management of applications inside containers. 
• Containers package software with all its dependencies, ensurin...
