In [0]:
%run ./00_constants

In [0]:

# The * means: All parameters after * must be passed as keyword arguments.
def embed_texts(texts, *, single: bool = None):
    """
    Generate embeddings for a single string or a list of strings.

    Args:
        texts: str or list[str]
        single: Optional[bool]. If True, returns a single embedding.
                If False, returns a list of embeddings.
                If None, inferred from input type.

    Returns:
        If single=True or input is str -> list[float]
        If single=False or input is list[str] -> list[list[float]]
    """
    # Normalize input
    if isinstance(texts, str):
        inputs = [texts]
        inferred_single = True
    elif isinstance(texts, list):
        inputs = texts
        inferred_single = False
    else:
        raise TypeError("texts must be a string or list of strings")

    if single is None:
        single = inferred_single

    response = aoai.embeddings.create(
        model=EMBEDDING_DEPLOYMENT,
        input=inputs
    )

    embeddings = [d.embedding for d in response.data]

    if single:
        return embeddings[0]
    return embeddings

In [0]:
# cosine similarity function
import numpy as np

def cosine_similarity(a, b):
    a = np.array(a)
    b = np.array(b)
    return np.dot(a, b) / (np.linalg.norm(a) * np.linalg.norm(b))

In [0]:
# ---------- Option A: brute-force cosine similarity ----------

def retrieve_top_k_optionA(query_embedding, k=5, limit=2000):

    # Load all embedding rows
    embeddings_df = spark.table(EMB_TABLE)

    rows = embeddings_df.select(
        "chunk_id",
        "doc_id",
        "title",
        "url",
        "chunk_index",
        "chunk_text",
        "category",
        "embedding"
    ).limit(limit).collect()

    scored = []

    for r in rows:
        score = cosine_similarity(query_embedding, r.embedding)
        scored.append({
            "chunk_id": r.chunk_id,
            "doc_id": r.doc_id,
            "title": r.title,
            "url": r.url,
            "chunk_index": int(r.chunk_index) if r.chunk_index is not None else None,
            "chunk_text": r.chunk_text,
            "category": r.category,
            "score": float(score),
        })

    scored.sort(key=lambda x: x["score"], reverse=True)
    return scored[:k]

# ---------- Option B: Databricks Vector Search ----------

def retrieve_top_k_optionB(query_embedding, k=5, filters=None):

    from databricks.vector_search.client import VectorSearchClient

    vsc = VectorSearchClient()

    VS_ENDPOINT = "vs_azure_compute"
    VS_INDEX_FULLNAME = f"{CATALOG}.{SCHEMA}.azure_compute_docs_vs_index"

    index = vsc.get_index(endpoint_name=VS_ENDPOINT, index_name=VS_INDEX_FULLNAME)

    vs_result = index.similarity_search(
        query_vector=query_embedding,
        columns=[
            "chunk_id",
            "doc_id",
            "title",
            "url",
            "chunk_index",
            "chunk_text",
            "category",
            # no store
        ],
        num_results=k,
        filters=filters
    )

    rows = vs_result["result"]["data_array"]

    normalized = []
    for r in rows:
        normalized.append({
            "chunk_id": r[0],
            "doc_id": r[1],
            "title": r[2],
            "url": r[3],
            "chunk_index": r[4],
            "chunk_text": r[5],
            "category": r[6],
            "score": None   # Vector Search does not always return score
        })

    return normalized


# ---------- Wrapper: choose A or B, same output ----------
# Both will return this structure:
# {
#   "chunk_id": ...,
#   "doc_id": ...,
#   "title": ...,
#   "url": ...,
#   "chunk_index": ...,
#   "chunk_text": ...,
#   "category": ...,
#   "score": ...
# }

def retrieve_top_k(query_embedding, k=5, option="A", **kwargs) -> list[dict]:
    """
    Unified retrieval entrypoint.
    - option="A": brute-force cosine similarity
    - option="B": vector search
    Always returns list[dict] with the same schema.
    """
    option = option.upper().strip()
    if option == "A":
        return retrieve_top_k_optionA(query_embedding, k=k, **kwargs)
    elif option == "B":
        return retrieve_top_k_optionB(query_embedding, k=k, **kwargs)
    else:
        raise ValueError("option must be 'A' or 'B'")

In [0]:
def log_rag_event(event: dict):
    import uuid
    from datetime import datetime
    from pyspark.sql import Row
    # from pyspark.sql.types import *

    rag_log_schema = StructType([
        StructField("query_id", StringType(), True),
        StructField("question", StringType(), True),
        StructField("top_k", IntegerType(), True),
        StructField("retrieved_chunks", ArrayType(
            StructType([
                StructField("chunk_id", StringType(), True),
                StructField("doc_id", StringType(), True),
                StructField("title", StringType(), True),
                StructField("url", StringType(), True),
                StructField("chunk_index", IntegerType(), True),
                StructField("category", StringType(), True),
                StructField("score", DoubleType(), True),
            ])
        ), True),
        StructField("prompt", StringType(), True),
        StructField("answer", StringType(), True),
        StructField("embedding_deployment", StringType(), True),
        StructField("chat_deployment", StringType(), True),
        StructField("created_at", TimestampType(), True),
    ])

    query_id = str(uuid.uuid4())

    # This produces a timezone-aware UTC timestamp.
    now = datetime.now(UTC)

    row_data = [{
        "query_id": query_id,
        "question": str(event["question"]),
        "top_k": int(event["top_k"]),
        "retrieved_chunks": event["retrieved_chunks"],
        "prompt": str(event["prompt"]),
        "answer": str(event["answer"]),
        "embedding_deployment": str(event["embedding_deployment"]),
        "chat_deployment": str(event["chat_deployment"]),
        "created_at": now
    }]

    df = spark.createDataFrame(row_data, schema=rag_log_schema)

    (
        df.write
        .format("delta")
        .mode("append")
        .saveAsTable("databricks_rag_demo.default.rag_query_logs")
    )

    return query_id

In [0]:
# Prompt assembly

def build_prompt(question, contexts):
    joined_context = "\n\n".join(contexts)
    return f"""
You are a helpful assistant answering questions about Azure Compute.

Use the following documentation excerpts to answer the question.

Context:
{joined_context}

Question:
{question}

Answer:
"""

In [0]:
##### LLM judge

###### Judge prompt builder
import json

def build_judge_prompt(question: str, answer: str, retrieved_chunks: list[dict]) -> str:
    # keep judge prompt compact: only short excerpts
    excerpts = []
    for i, c in enumerate(retrieved_chunks[:6], start=1):
        chunk_text = (c.get("chunk_text") or "")[:1200]  # cap to avoid giant prompts
        url = c.get("url")
        excerpts.append(f"[{i}] URL: {url}\nEXCERPT:\n{chunk_text}")

    sources_block = "\n\n---\n\n".join(excerpts)

    return f"""
You are evaluating a Retrieval-Augmented Generation (RAG) system for Azure Compute documentation.

Score the system on a 1 - 5 scale (integers only):
- retrieval_relevance: are the retrieved excerpts relevant to the question?
- answer_relevance: does the answer address the question?
- faithfulness: is the answer supported by the provided excerpts (no hallucination)?

Return ONLY valid JSON with keys:
retrieval_relevance, answer_relevance, faithfulness, notes

Question:
{question}

Answer:
{answer}

Retrieved sources:
{sources_block}
"""

###### Judge function (Azure OpenAI chat)

def judge_rag(question: str, answer: str, retrieved_chunks: list[dict]) -> dict:
    prompt = build_judge_prompt(question, answer, retrieved_chunks)

    resp = aoai.chat.completions.create(
        model=CHAT_DEPLOYMENT,
        messages=[{"role": "user", "content": prompt}],
        temperature=0.0
    )

    text = resp.choices[0].message.content.strip()

    # parse JSON robustly
    try:
        data = json.loads(text)
    except json.JSONDecodeError:
        # fallback: try to extract JSON substring if model added extra text
        start = text.find("{")
        end = text.rfind("}")
        data = json.loads(text[start:end+1])

    # enforce integers 1..5
    def clamp_int(x):
        x = int(x)
        return max(1, min(5, x))

    return {
        "retrieval_relevance": clamp_int(data.get("retrieval_relevance", 3)),
        "answer_relevance": clamp_int(data.get("answer_relevance", 3)),
        "faithfulness": clamp_int(data.get("faithfulness", 3)),
        "notes": str(data.get("notes", "")).strip()[:2000]
    }


In [0]:
### Write evaluation results to Delta

import uuid
from datetime import datetime, UTC
from pyspark.sql import Row
from pyspark.sql.types import *

def write_evaluation(query_id: str, question: str, answer: str, scores: dict, evaluator="llm_judge_v1"):

    evaluation_id = str(uuid.uuid4())
    now = datetime.now(UTC)

    rag_eval_schema = StructType([
        StructField("evaluation_id", StringType(), True),
        StructField("query_id", StringType(), True),
        StructField("question", StringType(), True),
        StructField("answer", StringType(), True),
        StructField("retrieval_relevance", IntegerType(), True),
        StructField("answer_relevance", IntegerType(), True),
        StructField("faithfulness", IntegerType(), True),
        StructField("evaluator", StringType(), True),
        StructField("notes", StringType(), True),
        StructField("created_at", TimestampType(), True),
    ])

    row = Row(
        evaluation_id=evaluation_id,
        query_id=query_id,
        question=str(question),
        answer=str(answer),
        retrieval_relevance=int(scores["retrieval_relevance"]),  # <-- FORCE
        answer_relevance=int(scores["answer_relevance"]),        # <-- FORCE
        faithfulness=int(scores["faithfulness"]),                # <-- FORCE
        evaluator=str(evaluator),
        notes=str(scores.get("notes", "")),
        created_at=now
    )

    (
        spark.createDataFrame([row], schema=rag_eval_schema)
        .write
        .format("delta")
        .mode("append")
        .saveAsTable("databricks_rag_demo.default.rag_evaluations")
    )

    return evaluation_id