### Import dependencies

### Import dependencies

In [None]:
# pip install spacy datasets span-marker scikit-learn

In [None]:
# %pip install numpy == 2.0.0

In [None]:
# %pip install pandas == 2.2.2

In [119]:
import os
from dotenv import load_dotenv

from openai import OpenAI

from langchain.agents import AgentExecutor
from langchain_openai import ChatOpenAI
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
from langchain.agents import create_tool_calling_agent
from langchain_core.prompts import PromptTemplate
from langchain_community.vectorstores import SupabaseVectorStore
from langchain_openai import OpenAIEmbeddings
from langchain import hub
from langchain_core.tools import tool

from qdrant_client import QdrantClient
from qdrant_client.http.models import Filter, SparseVector
from fastembed import TextEmbedding, SparseTextEmbedding
from langchain.vectorstores import Qdrant

In [120]:
import pandas as pd
import numpy as np

In [121]:
# attach to the same event-loop
import nest_asyncio

nest_asyncio.apply()

In [122]:
from langchain.evaluation import (
    EvaluatorType,
    load_evaluator,
    CriteriaEvalChain,
    LabeledCriteriaEvalChain
)
from langchain.schema import Document
import asyncio

In [123]:
import json
from pathlib import Path

### Load environment variables

In [None]:
# Load environment variables
load_dotenv()
QDRANT_URL = os.getenv("QDRANT_URL")
QDRANT_API_KEY = os.getenv("QDRANT_API_KEY")
COLLECTION_NAME = os.getenv("QDRANT_COLLECTION_NAME")
OPENAI_API_KEY= os.getenv("OPENAI_API_KEY")


In [125]:
# Initialize dense and sparse embedding models
dense_model = TextEmbedding(model_name="sentence-transformers/all-MiniLM-L6-v2")
sparse_model = SparseTextEmbedding(model_name="Qdrant/bm42-all-minilm-l6-v2-attentions")

In [126]:
# Initialize Qdrant client
qdrant_client = QdrantClient(
    url=QDRANT_URL,
    api_key=QDRANT_API_KEY,
)


  qdrant_client = QdrantClient(


In [127]:
# fetch the prompt from the prompt hub
prompt = hub.pull("hwchase17/openai-functions-agent")



In [128]:
# explore the elements inside the prompt imported
for ele in prompt:
    print(ele)

('name', None)
('input_variables', ['agent_scratchpad', 'input'])
('optional_variables', ['chat_history'])
('input_types', {'chat_history': list[typing.Annotated[typing.Union[typing.Annotated[langchain_core.messages.ai.AIMessage, Tag(tag='ai')], typing.Annotated[langchain_core.messages.human.HumanMessage, Tag(tag='human')], typing.Annotated[langchain_core.messages.chat.ChatMessage, Tag(tag='chat')], typing.Annotated[langchain_core.messages.system.SystemMessage, Tag(tag='system')], typing.Annotated[langchain_core.messages.function.FunctionMessage, Tag(tag='function')], typing.Annotated[langchain_core.messages.tool.ToolMessage, Tag(tag='tool')], typing.Annotated[langchain_core.messages.ai.AIMessageChunk, Tag(tag='AIMessageChunk')], typing.Annotated[langchain_core.messages.human.HumanMessageChunk, Tag(tag='HumanMessageChunk')], typing.Annotated[langchain_core.messages.chat.ChatMessageChunk, Tag(tag='ChatMessageChunk')], typing.Annotated[langchain_core.messages.system.SystemMessageChunk, T

In [129]:
from qdrant_client import QdrantClient, models
from langchain.agents import tool

@tool(response_format="content_and_artifact")
def retrieve(query: str):
    """Perform hybrid (dense + sparse) search using Qdrant Query API."""

    # Generate dense and sparse embeddings
    dense_emb = list(dense_model.embed([query]))[0]
    sparse_emb = list(sparse_model.embed([query]))[0]
    sparse_vector = models.SparseVector(
        indices=sparse_emb.indices.tolist(),
        values=sparse_emb.values.tolist()
    )

    
    # Fusion query
    context = qdrant_client.query_points(
        collection_name=COLLECTION_NAME,
        prefetch=[
            models.Prefetch(
                query=sparse_vector,
                using="sparse",
                limit=20,
            ),
            models.Prefetch(
                query=dense_emb,
                using="dense",
                limit=20,
            ),
        ],
        query=models.FusionQuery(fusion=models.Fusion.RRF),
        with_payload=True,
        # with_vector=False,
        limit=5
    )

    # Format result
    serialized = "\n\n".join(
        (f"ID: {pt.id}\
        \nScore: {pt.score}\
        \nText: {pt.payload['text']}\
        \nSource: {pt.payload['file_name']}\
        \nPage: {pt.payload['page_number']}")
        for pt in context.points
    )

    return serialized, context


def context(query: str):
    """Perform hybrid (dense + sparse) search using Qdrant Query API."""

    # Generate dense and sparse embeddings
    dense_emb = list(dense_model.embed([query]))[0]
    sparse_emb = list(sparse_model.embed([query]))[0]
    sparse_vector = models.SparseVector(
        indices=sparse_emb.indices.tolist(),
        values=sparse_emb.values.tolist()
    )

    
    # Fusion query
    context = qdrant_client.query_points(
        collection_name=COLLECTION_NAME,
        prefetch=[
            models.Prefetch(
                query=sparse_vector,
                using="sparse",
                limit=20,
            ),
            models.Prefetch(
                query=dense_emb,
                using="dense",
                limit=20,
            ),
        ],
        query=models.FusionQuery(fusion=models.Fusion.RRF),
        with_payload=True,
        # with_vector=False,
        limit=5
    )

    return context



In [130]:
# Get the generated answer from your agent
query = "which are the services offered by the company?"

# Generate dense and sparse embeddings
dense_emb = list(dense_model.embed([query]))[0]
sparse_emb = list(sparse_model.embed([query]))[0]
sparse_vector = models.SparseVector(
    indices=sparse_emb.indices.tolist(),
    values=sparse_emb.values.tolist()
)

# Fusion query
context = qdrant_client.query_points(
    collection_name=COLLECTION_NAME,
    prefetch=[
        models.Prefetch(
            query=sparse_vector,
            using="sparse",
            limit=20,
        ),
        models.Prefetch(
            query=dense_emb,
            using="dense",
            limit=20,
        ),
    ],
    query=models.FusionQuery(fusion=models.Fusion.RRF),
    with_payload=True,
    # with_vector=False,
    limit=5
)

In [131]:
context.points

[ScoredPoint(id=1, version=0, score=0.5, payload={'text': '(“TIS”) oﬀers and conﬂicts of interest that arise through delivery of \nbrokerage services to you, along with how TIS aﬃliates, including Truist Advisory Services, Inc. and Truist Bank, can interact with \nyou and TIS to provide additional non-brokerage services to your account. Our goal is to help you understand the products and \nservices we oﬀer, fees involved, applicable compensation, and relevant conﬂicts of interest, so that you can make informed \ninvestment decisions. \nDeﬁned Terms \nThe information contained in this document is current as of the date above and is subject to change at our discretion. You \nshould review this document carefully, retain it with your records, and refer to it when you receive recommendations from us. \nFor purposes of this guide, the terms “TIS,” “we,” “us” and “our” refer to Truist Investment Services, Inc., a wholly owned \nsubsidiary of Truist Financial Corporation (“TFC”). TIS is a bro

In [132]:
context_list = [dict(pt) for pt in context.points]
context_list

[{'id': 1,
  'version': 0,
  'score': 0.5,
  'payload': {'text': '(“TIS”) oﬀers and conﬂicts of interest that arise through delivery of \nbrokerage services to you, along with how TIS aﬃliates, including Truist Advisory Services, Inc. and Truist Bank, can interact with \nyou and TIS to provide additional non-brokerage services to your account. Our goal is to help you understand the products and \nservices we oﬀer, fees involved, applicable compensation, and relevant conﬂicts of interest, so that you can make informed \ninvestment decisions. \nDeﬁned Terms \nThe information contained in this document is current as of the date above and is subject to change at our discretion. You \nshould review this document carefully, retain it with your records, and refer to it when you receive recommendations from us. \nFor purposes of this guide, the terms “TIS,” “we,” “us” and “our” refer to Truist Investment Services, Inc., a wholly owned \nsubsidiary of Truist Financial Corporation (“TFC”). TIS i

In [133]:
dict(context.points[0])

{'id': 1,
 'version': 0,
 'score': 0.5,
 'payload': {'text': '(“TIS”) oﬀers and conﬂicts of interest that arise through delivery of \nbrokerage services to you, along with how TIS aﬃliates, including Truist Advisory Services, Inc. and Truist Bank, can interact with \nyou and TIS to provide additional non-brokerage services to your account. Our goal is to help you understand the products and \nservices we oﬀer, fees involved, applicable compensation, and relevant conﬂicts of interest, so that you can make informed \ninvestment decisions. \nDeﬁned Terms \nThe information contained in this document is current as of the date above and is subject to change at our discretion. You \nshould review this document carefully, retain it with your records, and refer to it when you receive recommendations from us. \nFor purposes of this guide, the terms “TIS,” “we,” “us” and “our” refer to Truist Investment Services, Inc., a wholly owned \nsubsidiary of Truist Financial Corporation (“TFC”). TIS is a 

In [134]:
for (key, value) in enumerate(dict(context.points[0])):
    print(f"Key: {key}, Value: {value}")

Key: 0, Value: id
Key: 1, Value: version
Key: 2, Value: score
Key: 3, Value: payload
Key: 4, Value: vector
Key: 5, Value: shard_key
Key: 6, Value: order_value


In [135]:
dict(context.points[0])['payload']['text']

'(“TIS”) oﬀers and conﬂicts of interest that arise through delivery of \nbrokerage services to you, along with how TIS aﬃliates, including Truist Advisory Services, Inc. and Truist Bank, can interact with \nyou and TIS to provide additional non-brokerage services to your account. Our goal is to help you understand the products and \nservices we oﬀer, fees involved, applicable compensation, and relevant conﬂicts of interest, so that you can make informed \ninvestment decisions. \nDeﬁned Terms \nThe information contained in this document is current as of the date above and is subject to change at our discretion. You \nshould review this document carefully, retain it with your records, and refer to it when you receive recommendations from us. \nFor purposes of this guide, the terms “TIS,” “we,” “us” and “our” refer to Truist Investment Services, Inc., a wholly owned \nsubsidiary of Truist Financial Corporation (“TFC”). TIS is a broker-dealer registered with the U.S. Securities and Exchang

In [136]:
class CustomRAGEvaluator:
    def __init__(self, llm_model=None, temperature=0):
        if llm_model is None:
            self.llm = ChatOpenAI(temperature=temperature)
        elif isinstance(llm_model, str):
            self.llm = ChatOpenAI(model=llm_model, temperature=temperature)
        else:
            # Assume it's already an LLM instance
            self.llm = llm_model
        self.nodes = []
        self._setup_evaluators()

    def _setup_evaluators(self):
        self.correctness_evaluator = LabeledCriteriaEvalChain.from_llm(
            llm=self.llm, criteria="correctness"
        )
        self.relevance_evaluator = CriteriaEvalChain.from_llm(
            llm=self.llm, criteria="relevance"
        )
        self.coherence_evaluator = CriteriaEvalChain.from_llm(
            llm=self.llm, criteria="coherence"
        )
        self.helpfulness_evaluator = CriteriaEvalChain.from_llm(
            llm=self.llm, criteria="helpfulness"
        )

    def load_your_nodes(self, context_source):
        if isinstance(context_source, str):
            with open(context_source, 'r', encoding='utf-8') as f:
                self.nodes = json.load(f)
        elif isinstance(context_source, list):
            self.nodes = context_source
        else:
            raise ValueError("context_source must be file path or list of nodes")

        print(f"Loaded {len(self.nodes)} nodes")

    # def format_context(self):
    #   if not self.nodes:
    #       return "No relevant information found."
    #   return "\n\n".join(
    #       f"Document {i+1} ({n.get('title', f'Node {i+1}')}):\n{n.get('content', '')}"
    #       for i, n in enumerate(self.nodes)
    #   )

    def evaluate(self, question: str, generated_answer: str, reference_answer: str = None):
        context = self.nodes
        results = {
            "question": question,
            "generated_answer": generated_answer,
            "reference_answer": reference_answer,
            "context": context
        }

        # Coherence
        try:
            coherence = self.coherence_evaluator.evaluate_strings(
                prediction=generated_answer,
                input=f"Context: {context}\nQuestion: {question}"
            )
            results.update({
                "coherence_score": coherence.get("score"),
                "coherence_reasoning": coherence.get("reasoning")
            })
        except Exception as e:
            results.update({"coherence_score": None, "coherence_reasoning": str(e)})

        # Relevance
        try:
            rel = self.relevance_evaluator.evaluate_strings(
                prediction=generated_answer,
                input=question
            )
            results.update({
                "relevance_score": rel.get("score"),
                "relevance_reasoning": rel.get("reasoning")
            })
        except Exception as e:
            results.update({"relevance_score": None, "relevance_reasoning": str(e)})

        # Helpfulness
        try:
            help_ = self.helpfulness_evaluator.evaluate_strings(
                prediction=generated_answer,
                input=f"Context: {context}\nQuestion: {question}"
            )
            results.update({
                "helpfulness_score": help_.get("score"),
                "helpfulness_reasoning": help_.get("reasoning")
            })
        except Exception as e:
            results.update({"helpfulness_score": None, "helpfulness_reasoning": str(e)})

        # Correctness (optional)
        if reference_answer:
            try:
                corr = self.correctness_evaluator.evaluate_strings(
                    prediction=generated_answer,
                    input=question,
                    reference=reference_answer
                )
                results.update({
                    "correctness_score": corr.get("score"),
                    "correctness_reasoning": corr.get("reasoning")
                })
            except Exception as e:
                results.update({"correctness_score": None, "correctness_reasoning": str(e)})

        return results

In [137]:
evaluator = CustomRAGEvaluator()



In [138]:
retrieve(question)

'ID: 63        \nScore: 0.5        \nText: This compensation,\ndescribed below, can vary based on the type of annuity, the issuing insurance company, and the amount invested. \n• \nUnder an agreement with each insurance company, TIS is paid a commission for selling the company’s annuity products \nbased on the type of annuity and the amount of your annuity purchase payments. Your TIS financial professional receives a \nportion of this payment. \n• \nTIS also often receives ongoing payments, known as residual or trail commissions, on invested assets that are held in your \nvariable annuity or certain indexed annuities to compensate for ongoing servicing. The insurance company sets these\nongoing payments, and TIS generally pays a portion of these commissions to your TIS financial professional. \nAnnuity commissions are determined by the type of annuity product and the amount the corresponding insurance carrier\ndecides to offer. For TIS, such commission amounts range from 1% to 5%, and 

In [150]:
import json

evaluator = CustomRAGEvaluator()

# Load context (retrieved nodes in JSON or list format)
# evaluator.load_your_nodes("nodes.json")  # or pass a list of dicts

# Input your pre-generated answer and run evaluation
tools = [retrieve]

# Initialize LLM
llm = ChatOpenAI(temperature=1)

# Create the agent with the prompt and tools
agent = create_tool_calling_agent(llm, tools, prompt)

# create the agent executor
agent_executor = AgentExecutor(agent=agent, tools=tools, verbose=False)

# reference_answer = nodes[0]["text"]  # or use logic to select the right node

# Get the generated answer from your agent
question = "which are the services offered by the company?"
def get_context(query: str):
    # ...existing code...

    ctx_response = get_context(question)
    context_list = [dict(pt) for pt in ctx_response.points]
    evaluator.load_your_nodes(context_list)

    response = agent_executor.invoke({"input": question})
    generated_answer = response['output']

# Evaluate
    result = evaluator.evaluate(question, 
                            generated_answer, 
                            # reference_answer,
                            )

In [151]:
for key, val in result.items():
    print(f"{key}:")
    print(f"{val}")

question:
which are the services offered by the company?
generated_answer:
The company offers a range of services, including Full-Service brokerage with access to various share classes, mutual funds, equities, fixed income, ETFs, annuities, structured products, alternative investments, and other securities. They also provide investment advisory products and services, client advisory services, and tools and resources. Additionally, there are different levels of brokerage relationships with varying service offerings and associated costs.
reference_answer:
None
context:
[{'id': 1, 'version': 0, 'score': 0.5, 'payload': {'text': '(“TIS”) oﬀers and conﬂicts of interest that arise through delivery of \nbrokerage services to you, along with how TIS aﬃliates, including Truist Advisory Services, Inc. and Truist Bank, can interact with \nyou and TIS to provide additional non-brokerage services to your account. Our goal is to help you understand the products and \nservices we oﬀer, fees involved,

In [None]:
df = pd.DataFrame([result])
df

In [None]:

print(response['output'])

In [None]:
# combine the tools and provide to the llm
tools = [retrieve]
agent = create_tool_calling_agent(llm, tools, prompt)

# create the agent executor
agent_executor = AgentExecutor(agent=agent, tools=tools, verbose=True)

In [None]:
# invoke the agent
response = agent_executor.invoke({"input": "which are the services offered by the company?"})

In [None]:
print(response['output'])

In [None]:
# invoke the agent
response = agent_executor.invoke({"input": "which are the main risk factors highlighted in the document?"})

In [None]:
print(response['output'])

### Appendix

Qdrant documentation at https://api.qdrant.tech/api-reference/search/query-points

In [None]:
from qdrant_client import QdrantClient, models

client = QdrantClient(url="http://localhost:6333")

# Query nearest by ID
nearest = client.query_points(
    collection_name="{collection_name}",
    query="43cf51e2-8777-4f52-bc74-c2cbde0c8b04",
)

# Recommend on the average of these vectors
recommended = client.query_points(
    collection_name="{collection_name}",
    query=models.RecommendQuery(recommend=models.RecommendInput(
        positive=["43cf51e2-8777-4f52-bc74-c2cbde0c8b04", [0.11, 0.35, 0.6, ...]],
        negative=[[0.01, 0.45, 0.67, ...]]
    ))
)

# Fusion query
hybrid = client.query_points(
    collection_name="{collection_name}",
    prefetch=[
        models.Prefetch(
            query=models.SparseVector(indices=[1, 42], values=[0.22, 0.8]),
            using="sparse",
            limit=20,
        ),
        models.Prefetch(
            query=[0.01, 0.45, 0.67, ...],  # <-- dense vector
            using="dense",
            limit=20,
        ),
    ],
    query=models.FusionQuery(fusion=models.Fusion.RRF),
)

# 2-stage query
refined = client.query_points(
    collection_name="{collection_name}",
    prefetch=models.Prefetch(
        query=[0.01, 0.45, 0.67, ...],  # <-- dense vector
        limit=100,
    ),
    query=[
        [0.1, 0.2, ...],  # <─┐
        [0.2, 0.1, ...],  # < ├─ multi-vector
        [0.8, 0.9, ...],  # < ┘
    ],
    using="colbert",
    limit=10,
)

# Random sampling (as of 1.11.0)
sampled = client.query_points(
    collection_name="{collection_name}",
    query=models.SampleQuery(sample=models.Sample.RANDOM)
)

# Score boost depending on payload conditions (as of 1.14.0)
tag_boosted = client.query_points(
    collection_name="{collection_name}",
    prefetch=models.Prefetch(
        query=[0.2, 0.8, ...],  # <-- dense vector
        limit=50
    ),
    query=models.FormulaQuery(
        formula=models.SumExpression(sum=[
            "$score",
            models.MultExpression(mult=[0.5, models.FieldCondition(key="tag", match=models.MatchAny(any=["h1", "h2", "h3", "h4"]))]),
            models.MultExpression(mult=[0.25, models.FieldCondition(key="tag", match=models.MatchAny(any=["p", "li"]))])
        ]
    ))
)

# Score boost geographically closer points (as of 1.14.0)
geo_boosted = client.query_points(
    collection_name="{collection_name}",
    prefetch=models.Prefetch(
        query=[0.2, 0.8, ...],  # <-- dense vector
        limit=50
    ),
    query=models.FormulaQuery(
        formula=models.SumExpression(sum=[
            "$score",
            models.GaussDecayExpression(
                gauss_decay=models.DecayParamsExpression(
                    x=models.GeoDistance(
                        geo_distance=models.GeoDistanceParams(
                            origin=models.GeoPoint(
                                lat=52.504043,
                                lon=13.393236
                            ),  # Berlin
                            to="geo.location"
                        )
                    ),
                    scale=5000  # 5km
                )
            )
        ]),
        defaults={"geo.location": models.GeoPoint(lat=48.137154, lon=11.576124)}  # Munich
    )
)
