# LLM as a Judge

An LLM is uses as a judge to rank (to assign a score) to determine how much a chunk is relevant to a given query.

In [5]:
import chromadb
import json
import os

from llama_index.core import Settings
from llama_index.core import VectorStoreIndex
from llama_index.embeddings.openai import OpenAIEmbedding
from llama_index.llms.openai import OpenAI
from llama_index.vector_stores.chroma import ChromaVectorStore

In [2]:
OPEN_AI_KEY_NAME='OPENAI_API_KEY'
assert OPEN_AI_KEY_NAME in os.environ

TAI_DATASET_ROOT_ENV_VAR='TAI_DATASET_ROOT'
assert TAI_DATASET_ROOT_ENV_VAR in os.environ

In [3]:
assert TAI_DATASET_ROOT_ENV_VAR in os.environ
vectorDbPath = os.path.join( os.environ[TAI_DATASET_ROOT_ENV_VAR], 'ai_tutor_knowledge_vectdb')
print(f'vectorDbPath: {vectorDbPath}')

# Load the vector store from the local storage.
db = chromadb.PersistentClient(path=vectorDbPath)
chroma_collection = db.get_collection("ai_tutor_knowledge")
vector_store = ChromaVectorStore(chroma_collection=chroma_collection)

# Create the index based on the vector store.
index = VectorStoreIndex.from_vector_store(vector_store=vector_store)

query_engine = index.as_query_engine(similarity_top_k=4)

res = query_engine.query("Explain how RAG works?")

print(f"top results:")
print("\t", res.response)
print("-_" * 20)

⚠️ It looks like you upgraded from a version below 0.5.6 and could benefit from vacuuming your database. Run chromadb utils vacuum --help for more information.


vectorDbPath: /home/minguzzi/repo/towards_ai_course/dataset/ai_tutor_knowledge_vectdb
top results:
	 RAG (Retrieval-Augmented Generation) works by combining retrieval-based methods with generative models to enhance the quality of generated text. It involves retrieving relevant information from a large dataset or knowledge source using retriever models and then incorporating this retrieved information into the generative model's text generation process. This approach aims to improve the coherence, relevance, and factual accuracy of the generated text by leveraging external knowledge during the generation process.
-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_


In [6]:
from pydantic import BaseModel

class OrderedNodes(BaseModel):
  """A list of nodes with the ids and assigned scores."""
  node_id: list
  score: list

In [8]:
from llama_index.core.prompts import PromptTemplate

prompt_tmpl = PromptTemplate(
    """
    You receive a qurey along with a list of nodes' text and their ids. Your task is to assign score
    to each node based on its contextually closeness to the given query. The final output is each
    node id along with its proximity score.
    Here is the list of nodes:
    {nodes_list}

    And the following is the query:
    {user_query}

    Score each of the nodes based on their text and their relevancy to the provided query.
    The score must be a decimal number between 0 an 1 so we can rank them."""
  )
prompt_tmpl

PromptTemplate(metadata={'prompt_type': <PromptType.CUSTOM: 'custom'>}, template_vars=['nodes_list', 'user_query'], kwargs={}, output_parser=None, template_var_mappings=None, function_mappings=None, template="\n    You receive a qurey along with a list of nodes' text and their ids. Your task is to assign score\n    to each node based on its contextually closeness to the given query. The final output is each\n    node id along with its proximity score.\n    Here is the list of nodes:\n    {nodes_list}\n\n    And the following is the query:\n    {user_query}\n\n    Score each of the nodes based on their text and their relevancy to the provided query.\n    The score must be a decimal number between 0 an 1 so we can rank them.")

In [None]:
from pydantic import BaseModel
from llama_index.llms.openai import OpenAI
from llama_index.core.prompts import PromptTemplate


def judger(nodes, query):

  # The model's output template
  class OrderedNodes(BaseModel):
    """A node with the id and assigned score."""
    node_id: list
    score: list

  # Prepare the nodes and wrap them in <NODE></NODE> identifier, as well as the query
  the_nodes=""
  for idx, item in enumerate(nodes):
    the_nodes += f"<NODE{idx+1}>\nNode ID: {item.node_id}\nText: {item.text}\n</NODE{idx+1}>\n"

  query = "<QUERY>\n{}\n</QUERY>".format(query)

  # Define the prompt template
  prompt_tmpl = PromptTemplate(
    """
    You receive a qurey along with a list of nodes' text and their ids. Your task is to assign score
    to each node based on its contextually closeness to the given query. The final output is each
    node id along with its proximity score.
    Here is the list of nodes:
    {nodes_list}

    And the following is the query:
    {user_query}

    Score each of the nodes based on their text and their relevancy to the provided query.
    The score must be a decimal number between 0 an 1 so we can rank them."""
  )

  # Define the an instance of GPT-4o-mini and send the request
  llm = OpenAI(model="gpt-4o-mini")
  ordered_nodes = llm.structured_predict(
    OrderedNodes, prompt_tmpl, nodes_list=the_nodes, user_query=query
  )

  return ordered_nodes