Initialize Quadrant client and create collection

In [None]:
%pip install qdrant-client
%pip install dotenv
%pip install PyPDF2
%pip install langchain
%pip install google-genai


In [None]:
from qdrant_client import QdrantClient
from qdrant_client.http import models
import os
from dotenv import load_dotenv

load_dotenv()

QDRANT_HOST_URL = os.getenv('QDRANT_HOST_URL')
COLLECTION_NAME = "documents-collection"
EMBEDDING_MODEL = "text-embedding-004"

qdrant_client = QdrantClient(
  url=QDRANT_HOST_URL
)

if not qdrant_client.collection_exists(COLLECTION_NAME):
  qdrant_client.create_collection(
      collection_name=COLLECTION_NAME,
      vectors_config=models.VectorParams(
        size=768,
        distance=models.Distance.COSINE
      )
  )

Read a PDF file and convert those to chunks of text

In [None]:
from PyPDF2 import PdfReader
from langchain.text_splitter import CharacterTextSplitter

pdf = "assets/StockMarketAnalysis.pdf"
text = ""

with open(pdf,'rb') as file:
    pdf_reader = PdfReader(file)

    for page in pdf_reader.pages:
        text += page.extract_text()

text_splitter = CharacterTextSplitter(
        separator="\n",
        chunk_size=1000,
        chunk_overlap=200,
        length_function=len
      )
chunks = text_splitter.split_text(text)

Convert chunks into points vector by sending to open AI and store those to Qdrant collection

In [None]:
# Converting chunks into points which are central entity of Qdrant
# and putting them up on vector store

import uuid
from google import genai
from google.genai import types
from qdrant_client.http.models import PointStruct
import os
from dotenv import load_dotenv

load_dotenv()

points = []
google_ai_client = genai.Client()

print(len(chunks), "chunks to embed")

for idx, chunk in enumerate(chunks):
  print("Embedding chunk", idx+1, "/", len(chunks))
  response = google_ai_client.models.embed_content(
    contents=chunk,
    model=EMBEDDING_MODEL,
    config=types.EmbedContentConfig(output_dimensionality=768)
  )
  embeddings = response.embeddings[0].values
  point_id = str(uuid.uuid4())  # Generate a unique ID for the point
  points.append(PointStruct(
    id=point_id,
    payload={"text": chunk},
    vector=embeddings
    )
  )


In [None]:
print("Upserting", len(points), "points")
qdrant_client.upsert(
  collection_name=COLLECTION_NAME,
  wait=True,
  points=points
)

Send a query to OpenAI to vecorize and search that in Qdrant

In [None]:
query = "What is a candlistick chart?"

response = google_ai_client.models.embed_content(
  contents=query,
  model=EMBEDDING_MODEL,
  config=types.EmbedContentConfig(output_dimensionality=768)
)

embeddings = response.embeddings[0].values
search_result = qdrant_client.query_points(
    collection_name=COLLECTION_NAME,
    query=embeddings,
    limit=3
).points

prompt=""
for result in search_result:
    prompt += result.payload['text']
concatenated_string = " ".join([prompt,query])
completion = google_ai_client.models.generate_content(
  model='gemini-2.0-flash',
  contents= concatenated_string
)

print(completion.text)