In [3]:
from langchain_google_genai import GoogleGenerativeAIEmbeddings
from langchain.vectorstores import Qdrant
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.document_loaders import PyPDFLoader
import os
from dotenv import load_dotenv

In [4]:
#load gemini api key from .env file
load_dotenv()
gemini_api_key = os.getenv("GEMINI_API_KEY")
qdrant_api_key = os.getenv("QDRANT_API_KEY")
if not gemini_api_key:
    raise ValueError("GEMINI_API_KEY is not set in the environment variables.")
if not qdrant_api_key:
    raise ValueError("QDRANT_API_KEY is not set in the environment variables.")


In [8]:
from qdrant_client import QdrantClient
qdrant_client = QdrantClient(
    url="https://d5a5f5ce-ffe6-4b64-b58e-361b6ec60509.us-west-2-0.aws.cloud.qdrant.io",
    api_key=qdrant_api_key,
)

collection_name = "gemini_embeddings"

try:
    # Check if collection exists
    collection_info = qdrant_client.get_collection(collection_name)
    print(f"Collection '{collection_name}' already exists.")
except Exception as e:
    # If collection doesn't exist, create it
    try:
        qdrant_client.create_collection(
            collection_name=collection_name,
            vectors_config={
                "vector": {
                    "size": 768,  
                    "distance": "Cosine"  
                }
            }
        )
        print(f"Collection '{collection_name}' created successfully.")
    except Exception as create_error:
        print(f"Failed to create collection '{collection_name}': {create_error}")

Collection 'gemini_embeddings' created successfully.


In [None]:

# Load the PDF file
pdf_file_path = "sample.pdf"
loader = PyPDFLoader(pdf_file_path)
documents = loader.load()

# Split the documents into smaller chunks
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=768,  # Adjust chunk size as needed
    chunk_overlap=200  # Adjust overlap as needed
)
split_documents = text_splitter.split_documents(documents)
# Create embeddings using Google Generative AI
embeddings = GoogleGenerativeAIEmbeddings(
    model_name="gemini-embedding-exp-03-07",
    api_key=gemini_api_key,
    max_retries=3,
    timeout=60,
)
# Create a Qdrant vector store
vector_store = Qdrant.from_documents(
    documents=split_documents,
    embedding=embeddings,
    collection_name=collection_name,
    client=qdrant_client,
)
