In [None]:
from azure.ai.projects import AIProjectClient
from azure.identity import DefaultAzureCredential
import qdrant_client.http.models as qmodels
from config_qdrant import *
from qdrant_client import QdrantClient
from pprint import pprint
from sentence_transformers import CrossEncoder

In [None]:
qdrant_client = QdrantClient(
    url=HOST,
    api_key=API_KEY
)

In [None]:
# Get the user question
user_question = "cute gray fuzzy bee"
COLLECTION_NAME ="BEES"

In [None]:
project = AIProjectClient(
    endpoint=AZURE_OPENAI_ENDPOINT,
    credential=DefaultAzureCredential(),
)

client = project.get_openai_client(api_version="2024-10-21")

MODEL_NAME = AZURE_OPENAI_DEPLOYMENT_ID

### RAG without Reranking

In [None]:
query_vector = client.embeddings.create(model="text-embedding-3-small", input=user_question).data[0].embedding

In [None]:
search_result = qdrant_client.query_points(collection_name=COLLECTION_NAME,
                                        query=query_vector, 
                                        query_filter=None,
                                        limit=5)

In [None]:
search_result

In [None]:
context = ""

In [None]:
for scored_point in search_result.points:
    context = context + " " + (scored_point.payload["content"])

In [None]:
len(context)

In [None]:
# Now we can use the matches to generate a response
SYSTEM_MESSAGE = """
You are a helpful assistant that answers questions about insects.
You must use the data set to answer the questions,
you should not provide any info that is not in the provided sources.
"""

response = client.chat.completions.create(
    model=MODEL_NAME,
    temperature=0.3,
    messages=[
        {"role": "system", "content": SYSTEM_MESSAGE},
        {"role": "user", "content": f"{user_question}\nSources: {context}"},
    ],
)

print(f"\nResponse from {MODEL_NAME} on : \n")
pprint(response.choices[0].message.content)

### ReRank

In [None]:
def rerank(query, retrieved_documents):
    """
    Rerank the results using a cross-encoder model.
    """
    encoder = CrossEncoder("cross-encoder/ms-marco-MiniLM-L-6-v2")
    scores = encoder.predict([(query, doc.payload["content"]) for doc in retrieved_documents])
    scored_documents = [v for _, v in sorted(zip(scores, retrieved_documents), reverse=True)]
    return scored_documents

In [None]:
reranked_results = rerank(user_question, search_result.points)

In [None]:
reranked_results

In [None]:
context =""
for scored_point in reranked_results:
    context = context + " " + (scored_point.payload["content"])

In [None]:
context

In [None]:
# Now we can use the matches to generate a response
SYSTEM_MESSAGE = """
You are a helpful assistant that answers questions about insects.
You must use the data set to answer the questions,
you should not provide any info that is not in the provided sources.
"""

response = client.chat.completions.create(
    model=MODEL_NAME,
    temperature=0.3,
    messages=[
        {"role": "system", "content": SYSTEM_MESSAGE},
        {"role": "user", "content": f"{user_question}\nSources: {context}"},
    ],
)

print(f"\nResponse from {MODEL_NAME} on : \n")
pprint(response.choices[0].message.content)