### Import dependencies

In [2]:
import os
from dotenv import load_dotenv

from openai import OpenAI

from langchain.agents import AgentExecutor
from langchain_openai import ChatOpenAI
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
from langchain.agents import create_tool_calling_agent
from langchain_core.prompts import PromptTemplate
from langchain_community.vectorstores import SupabaseVectorStore
from langchain_openai import OpenAIEmbeddings
from langchain import hub
from langchain_core.tools import tool
from langchain_community.chat_models import ChatOllama

from qdrant_client import QdrantClient
from qdrant_client.http.models import Filter, SparseVector
from fastembed import TextEmbedding, SparseTextEmbedding
from langchain.vectorstores import Qdrant

### Load environment variables

In [3]:
# Load environment variables
load_dotenv()
QDRANT_URL = os.getenv("QDRANT_URL")
QDRANT_API_KEY = os.getenv("QDRANT_API_KEY")
COLLECTION_NAME = os.getenv("QDRANT_COLLECTION_NAME")

In [4]:
# Initialize dense and sparse embedding models
dense_model = TextEmbedding(model_name="sentence-transformers/all-MiniLM-L6-v2")
sparse_model = SparseTextEmbedding(model_name="Qdrant/bm42-all-minilm-l6-v2-attentions")

In [5]:
# Initialize Qdrant client
qdrant_client = QdrantClient(
    url=QDRANT_URL,
    api_key=QDRANT_API_KEY,
)


  qdrant_client = QdrantClient(


In [6]:
# Initialize LLM OpenAI
llm = ChatOpenAI(temperature=0)

# Initialize LLM Ollama
llm_ollama = ChatOllama(
    model="dolphin3:latest",  # options are llama3.1:latest, dolphin3.1:latest, llava-llama3.1:latest
    base_url=os.getenv("OLLAMA_URL"),
    temperature=0
)


  llm_ollama = ChatOllama(


In [7]:
from langchain.prompts import PromptTemplate

template = """Answer the following questions as best you can. You have access to the following tools:

{tools}

Use the following format:

Question: the input question you must answer
Thought: you should always think about what to do
Action: the action to take, should be one of [{tool_names}]
Action Input: the input to the action
Observation: the result of the action
... (this Thought/Action/Action Input/Observation can repeat N times)
Thought: I now know the final answer
Final Answer: the final answer to the original question

Begin!

Question: {input}
{agent_scratchpad}
"""

prompt = PromptTemplate.from_template(template)



In [8]:
from qdrant_client import QdrantClient, models
from langchain.agents import tool

@tool(response_format="content_and_artifact")
def retrieve(query: str):
    """Perform hybrid (dense + sparse) search using Qdrant Query API."""

    # Generate dense and sparse embeddings
    dense_emb = list(dense_model.embed([query]))[0]
    sparse_emb = list(sparse_model.embed([query]))[0]
    sparse_vector = models.SparseVector(
        indices=sparse_emb.indices.tolist(),
        values=sparse_emb.values.tolist()
    )

    
    # Fusion query
    context = qdrant_client.query_points(
        collection_name=COLLECTION_NAME,
        prefetch=[
            models.Prefetch(
                query=sparse_vector,
                using="sparse",
                limit=20,
            ),
            models.Prefetch(
                query=dense_emb,
                using="dense",
                limit=20,
            ),
        ],
        query=models.FusionQuery(fusion=models.Fusion.RRF),
        with_payload=True,
        # with_vector=False,
        limit=5
    )

    # Format result
    serialized = "\n\n".join(
        (f"ID: {pt.id}\
        \nScore: {pt.score}\
        \nText: {pt.payload['text']}\
        \nSource: {pt.payload['file_name']}\
        \nPage: {pt.payload['page_number']}")
        for pt in context.points
    )

    return serialized, context


In [9]:
from langchain.agents import create_react_agent
from langchain.agents import AgentExecutor

# combine the tools and provide to the llm
tools = [retrieve]
agent = create_react_agent(llm=llm_ollama, tools=tools, prompt=prompt)

# create the agent executor
agent_executor = AgentExecutor(
    agent=agent,
    tools=tools,
    verbose=True,
    handle_parsing_errors=True
)

In [10]:
%%time
# invoke the agent
response = agent_executor.invoke({"input": "which are the services offered by the company?"})



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mThought: I need to find information about the services provided by the company.

Action: retrieve
Action Input: "services"[0m[36;1m[1;3mID: 168        
Score: 0.5        
Text: Tools and
resources        
Source: benefits-guide-2025.pdf        
Page: 29

ID: 12        
Score: 0.5        
Text: In a Full-Service relationship with a dedicated ﬁnancial professional, you have access to a full suite of TIS brokerage and TAS 
investment advisory products and services. Minimum asset balance requirements apply. You have a direct relationship with
your ﬁnancial professional for investment strategies, recommendations, and guidance speciﬁc to your needs and goals. Full- 
Service brokerage accounts are eligible to invest in equities, ﬁxed income, ETFs, mutual funds, annuities, structured products,
alternative investments, and other securities on our Platform which TIS makes available to such accounts. 
In a Full-Service relationship w

In [11]:
print(response['output'])

The company offers various services such as Full-Service brokerage accounts, Self-Directed brokerage services, mutual funds, annuities, structured products, alternative investments, and other securities. These services are more expensive than Self-Directed brokerage services and charge higher commission rates on securities transactions.


In [17]:
%%time
# invoke the agent
response = agent_executor.invoke({"input": "which are the main financial and health risk factors highlighted in the documents?"})



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mThought: I need to find information about financial and health risk factors from the documents.

Action: retrieve
Action Input: query for financial and health risk factors[0m[36;1m[1;3mID: 125        
Score: 0.5714286        
Text: Support
my health        
Source: benefits-guide-2025.pdf        
Page: 5

ID: 131        
Score: 0.5        
Text: ; Oregon; and Washington
You can earn medical credits to offset your medical premium cost by participating in LifeForce. 
See the 2025 medical premiums on Benefits.Truist.com.
Support my health
$4,000 HDHP
•	 You pay out‑of‑pocket for medical and 
prescription expenses until you reach your      
annual deductible.
•	 Once you meet the annual deductible, you  
pay 20% coinsurance for in‑network and       
50% out‑of‑network. 
•	 The deductible and out‑of‑pocket  
maximum are embedded.
•	 You can enroll in the HSA and receive a 
contribution: $500 for individual and $1,000 
for a fam

In [13]:
print(response['output'])

The main financial risk factor highlighted is the high-deductible health plan (HDHP) with different coverage amounts of $2,500 and $4,000. The HDHP offers a Health Savings Account (HSA), which provides advantages and flexibility in managing healthcare expenses.

The main health risk factors mentioned include triglycerides, HDL ("good cholesterol"), blood pressure, fasting glucose, waist circumference, and metabolic syndrome risk. Metabolic syndrome is defined as having any three of these five risk factors and is associated with increased risk of heart disease, stroke, and type 2 diabetes.


# *** ONLY RUN THE NOTEBOOK UP TO THIS POINT ***

From this point on the notebook explains the most critical parts of the code

In [None]:
serialized = retrieve(query)

In [None]:
print(serialized)

In [None]:
dense_emb = list(dense_model.embed([query]))[0]
len(dense_emb)

In [None]:
dense_emb[:10]

In [None]:
sparse_embedding = list(sparse_model.embed([query]))[0]

In [None]:
sparse_embedding

In [None]:
# Create sparse vector
sparse_vector = SparseVector(
    indices=sparse_embedding.indices.tolist(),
    values=sparse_embedding.values.tolist()
    )

In [None]:
sparse_vector

In [None]:
# Fusion query
response = qdrant_client.query_points(
    collection_name=COLLECTION_NAME,
    prefetch=[
        models.Prefetch(
            query=sparse_vector,
            using="sparse",
            limit=20,
        ),
        models.Prefetch(
            query=dense_emb,
            using="dense",
            limit=20,
        ),
    ],
    query=models.FusionQuery(fusion=models.Fusion.RRF),
    with_payload=True,
    # with_vector=False,
    limit=5
)

In [None]:
response

In [None]:
len(response.points)

In [None]:
response.points[4].score

In [None]:
# Format result
serialized_1 = "\n\n".join(
    (f"ID: {pt.id}\
     \nScore: {pt.score}\
     \nText: {pt.payload['text']}\
     \nSource: {pt.payload['file_name']\
     }")
    for pt in response.points
)

print(serialized_1)

In [None]:
serialized_2 = "\n\n".join(
    (f"Source: {pt.payload['file_name']}\
     \n" f"Content: {pt.payload['text']}")
    for pt in response.points
    )

print(serialized_2)

In [None]:
query='what are the services offered'

### Appendix

Qdrant documentation at https://api.qdrant.tech/api-reference/search/query-points

In [None]:
from qdrant_client import QdrantClient, models

client = QdrantClient(url="http://localhost:6333")

# Query nearest by ID
nearest = client.query_points(
    collection_name="{collection_name}",
    query="43cf51e2-8777-4f52-bc74-c2cbde0c8b04",
)

# Recommend on the average of these vectors
recommended = client.query_points(
    collection_name="{collection_name}",
    query=models.RecommendQuery(recommend=models.RecommendInput(
        positive=["43cf51e2-8777-4f52-bc74-c2cbde0c8b04", [0.11, 0.35, 0.6, ...]],
        negative=[[0.01, 0.45, 0.67, ...]]
    ))
)

# Fusion query
hybrid = client.query_points(
    collection_name="{collection_name}",
    prefetch=[
        models.Prefetch(
            query=models.SparseVector(indices=[1, 42], values=[0.22, 0.8]),
            using="sparse",
            limit=20,
        ),
        models.Prefetch(
            query=[0.01, 0.45, 0.67, ...],  # <-- dense vector
            using="dense",
            limit=20,
        ),
    ],
    query=models.FusionQuery(fusion=models.Fusion.RRF),
)

# 2-stage query
refined = client.query_points(
    collection_name="{collection_name}",
    prefetch=models.Prefetch(
        query=[0.01, 0.45, 0.67, ...],  # <-- dense vector
        limit=100,
    ),
    query=[
        [0.1, 0.2, ...],  # <─┐
        [0.2, 0.1, ...],  # < ├─ multi-vector
        [0.8, 0.9, ...],  # < ┘
    ],
    using="colbert",
    limit=10,
)

# Random sampling (as of 1.11.0)
sampled = client.query_points(
    collection_name="{collection_name}",
    query=models.SampleQuery(sample=models.Sample.RANDOM)
)

# Score boost depending on payload conditions (as of 1.14.0)
tag_boosted = client.query_points(
    collection_name="{collection_name}",
    prefetch=models.Prefetch(
        query=[0.2, 0.8, ...],  # <-- dense vector
        limit=50
    ),
    query=models.FormulaQuery(
        formula=models.SumExpression(sum=[
            "$score",
            models.MultExpression(mult=[0.5, models.FieldCondition(key="tag", match=models.MatchAny(any=["h1", "h2", "h3", "h4"]))]),
            models.MultExpression(mult=[0.25, models.FieldCondition(key="tag", match=models.MatchAny(any=["p", "li"]))])
        ]
    ))
)

# Score boost geographically closer points (as of 1.14.0)
geo_boosted = client.query_points(
    collection_name="{collection_name}",
    prefetch=models.Prefetch(
        query=[0.2, 0.8, ...],  # <-- dense vector
        limit=50
    ),
    query=models.FormulaQuery(
        formula=models.SumExpression(sum=[
            "$score",
            models.GaussDecayExpression(
                gauss_decay=models.DecayParamsExpression(
                    x=models.GeoDistance(
                        geo_distance=models.GeoDistanceParams(
                            origin=models.GeoPoint(
                                lat=52.504043,
                                lon=13.393236
                            ),  # Berlin
                            to="geo.location"
                        )
                    ),
                    scale=5000  # 5km
                )
            )
        ]),
        defaults={"geo.location": models.GeoPoint(lat=48.137154, lon=11.576124)}  # Munich
    )
)
